diff options
183 files changed, 1653 insertions, 838 deletions
diff --git a/Documentation/networking/ppp_generic.txt b/Documentation/networking/ppp_generic.txt index 091d20273dcb..61daf4b39600 100644 --- a/Documentation/networking/ppp_generic.txt +++ b/Documentation/networking/ppp_generic.txt @@ -300,12 +300,6 @@ unattached instance are: The ioctl calls available on an instance of /dev/ppp attached to a channel are: -* PPPIOCDETACH detaches the instance from the channel. This ioctl is - deprecated since the same effect can be achieved by closing the - instance. In order to prevent possible races this ioctl will fail - with an EINVAL error if more than one file descriptor refers to this - instance (i.e. as a result of dup(), dup2() or fork()). - * PPPIOCCONNECT connects this channel to a PPP interface. The argument should point to an int containing the interface unit number. It will return an EINVAL error if the channel is already diff --git a/MAINTAINERS b/MAINTAINERS index 078fd80f664f..ca4afd68530c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2332,7 +2332,7 @@ F: drivers/gpio/gpio-ath79.c F: Documentation/devicetree/bindings/gpio/gpio-ath79.txt ATHEROS ATH GENERIC UTILITIES -M: "Luis R. Rodriguez" <mcgrof@do-not-panic.com> +M: Kalle Valo <kvalo@codeaurora.org> L: linux-wireless@vger.kernel.org S: Supported F: drivers/net/wireless/ath/* @@ -2347,7 +2347,7 @@ S: Maintained F: drivers/net/wireless/ath/ath5k/ ATHEROS ATH6KL WIRELESS DRIVER -M: Kalle Valo <kvalo@qca.qualcomm.com> +M: Kalle Valo <kvalo@codeaurora.org> L: linux-wireless@vger.kernel.org W: http://wireless.kernel.org/en/users/Drivers/ath6kl T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git @@ -5388,7 +5388,6 @@ S: Maintained F: drivers/iommu/exynos-iommu.c EZchip NPS platform support -M: Elad Kanfi <eladkan@mellanox.com> M: Vineet Gupta <vgupta@synopsys.com> S: Supported F: arch/arc/plat-eznps @@ -6504,9 +6503,15 @@ F: Documentation/networking/hinic.txt F: drivers/net/ethernet/huawei/hinic/ HUGETLB FILESYSTEM -M: Nadia Yvette Chambers <nyc@holomorphy.com> +M: Mike Kravetz <mike.kravetz@oracle.com> +L: linux-mm@kvack.org S: Maintained F: fs/hugetlbfs/ +F: mm/hugetlb.c +F: include/linux/hugetlb.h +F: Documentation/admin-guide/mm/hugetlbpage.rst +F: Documentation/vm/hugetlbfs_reserv.rst +F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages HVA ST MEDIA DRIVER M: Jean-Christophe Trotin <jean-christophe.trotin@st.com> @@ -9021,7 +9026,6 @@ Q: http://patchwork.ozlabs.org/project/netdev/list/ F: drivers/net/ethernet/mellanox/mlx5/core/en_* MELLANOX ETHERNET INNOVA DRIVER -M: Ilan Tayari <ilant@mellanox.com> R: Boris Pismenny <borisp@mellanox.com> L: netdev@vger.kernel.org S: Supported @@ -9031,7 +9035,6 @@ F: drivers/net/ethernet/mellanox/mlx5/core/fpga/* F: include/linux/mlx5/mlx5_ifc_fpga.h MELLANOX ETHERNET INNOVA IPSEC DRIVER -M: Ilan Tayari <ilant@mellanox.com> R: Boris Pismenny <borisp@mellanox.com> L: netdev@vger.kernel.org S: Supported @@ -9087,7 +9090,6 @@ F: include/uapi/rdma/mlx4-abi.h MELLANOX MLX5 core VPI driver M: Saeed Mahameed <saeedm@mellanox.com> -M: Matan Barak <matanb@mellanox.com> M: Leon Romanovsky <leonro@mellanox.com> L: netdev@vger.kernel.org L: linux-rdma@vger.kernel.org @@ -9098,7 +9100,6 @@ F: drivers/net/ethernet/mellanox/mlx5/core/ F: include/linux/mlx5/ MELLANOX MLX5 IB driver -M: Matan Barak <matanb@mellanox.com> M: Leon Romanovsky <leonro@mellanox.com> L: linux-rdma@vger.kernel.org W: http://www.mellanox.com @@ -9832,7 +9833,6 @@ F: net/netfilter/xt_CONNSECMARK.c F: net/netfilter/xt_SECMARK.c NETWORKING [TLS] -M: Ilya Lesokhin <ilyal@mellanox.com> M: Aviad Yehezkel <aviadye@mellanox.com> M: Dave Watson <davejwatson@fb.com> L: netdev@vger.kernel.org @@ -11632,7 +11632,7 @@ S: Maintained F: drivers/media/tuners/qt1010* QUALCOMM ATHEROS ATH10K WIRELESS DRIVER -M: Kalle Valo <kvalo@qca.qualcomm.com> +M: Kalle Valo <kvalo@codeaurora.org> L: ath10k@lists.infradead.org W: http://wireless.kernel.org/en/users/Drivers/ath10k T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git @@ -11683,7 +11683,7 @@ S: Maintained F: drivers/media/platform/qcom/venus/ QUALCOMM WCN36XX WIRELESS DRIVER -M: Eugene Krasnikov <k.eugene.e@gmail.com> +M: Kalle Valo <kvalo@codeaurora.org> L: wcn36xx@lists.infradead.org W: http://wireless.kernel.org/en/users/Drivers/wcn36xx T: git git://github.com/KrasnikovEugene/wcn36xx.git diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index b2022885ced8..f19dc31288c8 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -211,6 +211,7 @@ config ALPHA_EIGER config ALPHA_JENSEN bool "Jensen" depends on BROKEN + select DMA_DIRECT_OPS help DEC PC 150 AXP (aka Jensen): This is a very old Digital system - one of the first-generation Alpha systems. A number of these systems diff --git a/arch/alpha/include/asm/dma-mapping.h b/arch/alpha/include/asm/dma-mapping.h index b78f61f20796..8beeafd4f68e 100644 --- a/arch/alpha/include/asm/dma-mapping.h +++ b/arch/alpha/include/asm/dma-mapping.h @@ -2,11 +2,15 @@ #ifndef _ALPHA_DMA_MAPPING_H #define _ALPHA_DMA_MAPPING_H -extern const struct dma_map_ops *dma_ops; +extern const struct dma_map_ops alpha_pci_ops; static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - return dma_ops; +#ifdef CONFIG_ALPHA_JENSEN + return &dma_direct_ops; +#else + return &alpha_pci_ops; +#endif } #endif /* _ALPHA_DMA_MAPPING_H */ diff --git a/arch/alpha/kernel/io.c b/arch/alpha/kernel/io.c index 3e3d49c254c5..c025a3e5e357 100644 --- a/arch/alpha/kernel/io.c +++ b/arch/alpha/kernel/io.c @@ -37,20 +37,20 @@ unsigned int ioread32(void __iomem *addr) void iowrite8(u8 b, void __iomem *addr) { - IO_CONCAT(__IO_PREFIX,iowrite8)(b, addr); mb(); + IO_CONCAT(__IO_PREFIX,iowrite8)(b, addr); } void iowrite16(u16 b, void __iomem *addr) { - IO_CONCAT(__IO_PREFIX,iowrite16)(b, addr); mb(); + IO_CONCAT(__IO_PREFIX,iowrite16)(b, addr); } void iowrite32(u32 b, void __iomem *addr) { - IO_CONCAT(__IO_PREFIX,iowrite32)(b, addr); mb(); + IO_CONCAT(__IO_PREFIX,iowrite32)(b, addr); } EXPORT_SYMBOL(ioread8); @@ -176,26 +176,26 @@ u64 readq(const volatile void __iomem *addr) void writeb(u8 b, volatile void __iomem *addr) { - __raw_writeb(b, addr); mb(); + __raw_writeb(b, addr); } void writew(u16 b, volatile void __iomem *addr) { - __raw_writew(b, addr); mb(); + __raw_writew(b, addr); } void writel(u32 b, volatile void __iomem *addr) { - __raw_writel(b, addr); mb(); + __raw_writel(b, addr); } void writeq(u64 b, volatile void __iomem *addr) { - __raw_writeq(b, addr); mb(); + __raw_writeq(b, addr); } EXPORT_SYMBOL(readb); diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c index b6ebb65127a8..c7c5879869d3 100644 --- a/arch/alpha/kernel/pci-noop.c +++ b/arch/alpha/kernel/pci-noop.c @@ -102,36 +102,3 @@ SYSCALL_DEFINE5(pciconfig_write, unsigned long, bus, unsigned long, dfn, else return -ENODEV; } - -static void *alpha_noop_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, - unsigned long attrs) -{ - void *ret; - - if (!dev || *dev->dma_mask >= 0xffffffffUL) - gfp &= ~GFP_DMA; - ret = (void *)__get_free_pages(gfp, get_order(size)); - if (ret) { - memset(ret, 0, size); - *dma_handle = virt_to_phys(ret); - } - return ret; -} - -static int alpha_noop_supported(struct device *dev, u64 mask) -{ - return mask < 0x00ffffffUL ? 0 : 1; -} - -const struct dma_map_ops alpha_noop_ops = { - .alloc = alpha_noop_alloc_coherent, - .free = dma_noop_free_coherent, - .map_page = dma_noop_map_page, - .map_sg = dma_noop_map_sg, - .mapping_error = dma_noop_mapping_error, - .dma_supported = alpha_noop_supported, -}; - -const struct dma_map_ops *dma_ops = &alpha_noop_ops; -EXPORT_SYMBOL(dma_ops); diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 83b34b9188ea..6923b0d9c1e1 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -950,6 +950,4 @@ const struct dma_map_ops alpha_pci_ops = { .mapping_error = alpha_pci_mapping_error, .dma_supported = alpha_pci_supported, }; - -const struct dma_map_ops *dma_ops = &alpha_pci_ops; -EXPORT_SYMBOL(dma_ops); +EXPORT_SYMBOL(alpha_pci_ops); diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 8c398fedbbb6..ada8eb206a90 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -466,12 +466,6 @@ void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) void __init dma_contiguous_remap(void) { int i; - - if (!dma_mmu_remap_num) - return; - - /* call flush_cache_all() since CMA area would be large enough */ - flush_cache_all(); for (i = 0; i < dma_mmu_remap_num; i++) { phys_addr_t start = dma_mmu_remap[i].base; phys_addr_t end = start + dma_mmu_remap[i].size; @@ -504,15 +498,7 @@ void __init dma_contiguous_remap(void) flush_tlb_kernel_range(__phys_to_virt(start), __phys_to_virt(end)); - /* - * All the memory in CMA region will be on ZONE_MOVABLE. - * If that zone is considered as highmem, the memory in CMA - * region is also considered as highmem even if it's - * physical address belong to lowmem. In this case, - * re-mapping isn't required. - */ - if (!is_highmem_idx(ZONE_MOVABLE)) - iotable_init(&map, 1); + iotable_init(&map, 1); } } diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 9ef0797380cb..f9b0b09153e0 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -117,7 +117,7 @@ static inline void atomic_and(int i, atomic_t *v) /* LSE atomics */ " mvn %w[i], %w[i]\n" " stclr %w[i], %[v]") - : [i] "+r" (w0), [v] "+Q" (v->counter) + : [i] "+&r" (w0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -135,7 +135,7 @@ static inline int atomic_fetch_and##name(int i, atomic_t *v) \ /* LSE atomics */ \ " mvn %w[i], %w[i]\n" \ " ldclr" #mb " %w[i], %w[i], %[v]") \ - : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : [i] "+&r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -161,7 +161,7 @@ static inline void atomic_sub(int i, atomic_t *v) /* LSE atomics */ " neg %w[i], %w[i]\n" " stadd %w[i], %[v]") - : [i] "+r" (w0), [v] "+Q" (v->counter) + : [i] "+&r" (w0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -180,7 +180,7 @@ static inline int atomic_sub_return##name(int i, atomic_t *v) \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], w30, %[v]\n" \ " add %w[i], %w[i], w30") \ - : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : [i] "+&r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS , ##cl); \ \ @@ -207,7 +207,7 @@ static inline int atomic_fetch_sub##name(int i, atomic_t *v) \ /* LSE atomics */ \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], %w[i], %[v]") \ - : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : [i] "+&r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -314,7 +314,7 @@ static inline void atomic64_and(long i, atomic64_t *v) /* LSE atomics */ " mvn %[i], %[i]\n" " stclr %[i], %[v]") - : [i] "+r" (x0), [v] "+Q" (v->counter) + : [i] "+&r" (x0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -332,7 +332,7 @@ static inline long atomic64_fetch_and##name(long i, atomic64_t *v) \ /* LSE atomics */ \ " mvn %[i], %[i]\n" \ " ldclr" #mb " %[i], %[i], %[v]") \ - : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : [i] "+&r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -358,7 +358,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) /* LSE atomics */ " neg %[i], %[i]\n" " stadd %[i], %[v]") - : [i] "+r" (x0), [v] "+Q" (v->counter) + : [i] "+&r" (x0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -377,7 +377,7 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v) \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], x30, %[v]\n" \ " add %[i], %[i], x30") \ - : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : [i] "+&r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -404,7 +404,7 @@ static inline long atomic64_fetch_sub##name(long i, atomic64_t *v) \ /* LSE atomics */ \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], %[i], %[v]") \ - : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : [i] "+&r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -435,7 +435,7 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) " sub x30, x30, %[ret]\n" " cbnz x30, 1b\n" "2:") - : [ret] "+r" (x0), [v] "+Q" (v->counter) + : [ret] "+&r" (x0), [v] "+Q" (v->counter) : : __LL_SC_CLOBBERS, "cc", "memory"); @@ -516,7 +516,7 @@ static inline long __cmpxchg_double##name(unsigned long old1, \ " eor %[old1], %[old1], %[oldval1]\n" \ " eor %[old2], %[old2], %[oldval2]\n" \ " orr %[old1], %[old1], %[old2]") \ - : [old1] "+r" (x0), [old2] "+r" (x1), \ + : [old1] "+&r" (x0), [old2] "+&r" (x1), \ [v] "+Q" (*(unsigned long *)ptr) \ : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \ [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \ diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 66be504edb6c..d894a20b70b2 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -75,3 +75,11 @@ NOKPROBE_SYMBOL(_mcount); /* arm-smccc */ EXPORT_SYMBOL(__arm_smccc_smc); EXPORT_SYMBOL(__arm_smccc_hvc); + + /* tishift.S */ +extern long long __ashlti3(long long a, int b); +EXPORT_SYMBOL(__ashlti3); +extern long long __ashrti3(long long a, int b); +EXPORT_SYMBOL(__ashrti3); +extern long long __lshrti3(long long a, int b); +EXPORT_SYMBOL(__lshrti3); diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S index d3db9b2cd479..0fdff97794de 100644 --- a/arch/arm64/lib/tishift.S +++ b/arch/arm64/lib/tishift.S @@ -1,17 +1,6 @@ -/* - * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (C) 2017-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ #include <linux/linkage.h> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 4165485e8b6e..2af3dd89bcdb 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -293,6 +293,57 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, static void __do_user_fault(struct siginfo *info, unsigned int esr) { current->thread.fault_address = (unsigned long)info->si_addr; + + /* + * If the faulting address is in the kernel, we must sanitize the ESR. + * From userspace's point of view, kernel-only mappings don't exist + * at all, so we report them as level 0 translation faults. + * (This is not quite the way that "no mapping there at all" behaves: + * an alignment fault not caused by the memory type would take + * precedence over translation fault for a real access to empty + * space. Unfortunately we can't easily distinguish "alignment fault + * not caused by memory type" from "alignment fault caused by memory + * type", so we ignore this wrinkle and just return the translation + * fault.) + */ + if (current->thread.fault_address >= TASK_SIZE) { + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_DABT_LOW: + /* + * These bits provide only information about the + * faulting instruction, which userspace knows already. + * We explicitly clear bits which are architecturally + * RES0 in case they are given meanings in future. + * We always report the ESR as if the fault was taken + * to EL1 and so ISV and the bits in ISS[23:14] are + * clear. (In fact it always will be a fault to EL1.) + */ + esr &= ESR_ELx_EC_MASK | ESR_ELx_IL | + ESR_ELx_CM | ESR_ELx_WNR; + esr |= ESR_ELx_FSC_FAULT; + break; + case ESR_ELx_EC_IABT_LOW: + /* + * Claim a level 0 translation fault. + * All other bits are architecturally RES0 for faults + * reported with that DFSC value, so we clear them. + */ + esr &= ESR_ELx_EC_MASK | ESR_ELx_IL; + esr |= ESR_ELx_FSC_FAULT; + break; + default: + /* + * This should never happen (entry.S only brings us + * into this code for insn and data aborts from a lower + * exception level). Fail safe by not providing an ESR + * context record at all. + */ + WARN(1, "ESR 0x%x is not DABT or IABT from EL0\n", esr); + esr = 0; + break; + } + } + current->thread.fault_code = esr; arm64_force_sig_info(info, esr_to_fault_info(esr)->name, current); } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 2dbb2c9f1ec1..493ff75670ff 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -933,13 +933,15 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); + pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot); - /* ioremap_page_range doesn't honour BBM */ - if (pud_present(READ_ONCE(*pudp))) + /* Only allow permission changes for now */ + if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)), + pud_val(new_pud))) return 0; BUG_ON(phys & ~PUD_MASK); - set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot)); + set_pud(pudp, new_pud); return 1; } @@ -947,13 +949,15 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); + pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot); - /* ioremap_page_range doesn't honour BBM */ - if (pmd_present(READ_ONCE(*pmdp))) + /* Only allow permission changes for now */ + if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)), + pmd_val(new_pmd))) return 0; BUG_ON(phys & ~PMD_MASK); - set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot)); + set_pmd(pmdp, new_pmd); return 1; } diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 471b2274fbeb..c40b4380951c 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -74,6 +74,27 @@ */ #define EX_R3 EX_DAR +#define STF_ENTRY_BARRIER_SLOT \ + STF_ENTRY_BARRIER_FIXUP_SECTION; \ + nop; \ + nop; \ + nop + +#define STF_EXIT_BARRIER_SLOT \ + STF_EXIT_BARRIER_FIXUP_SECTION; \ + nop; \ + nop; \ + nop; \ + nop; \ + nop; \ + nop + +/* + * r10 must be free to use, r13 must be paca + */ +#define INTERRUPT_TO_KERNEL \ + STF_ENTRY_BARRIER_SLOT + /* * Macros for annotating the expected destination of (h)rfid * @@ -90,16 +111,19 @@ rfid #define RFI_TO_USER \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ rfid; \ b rfi_flush_fallback #define RFI_TO_USER_OR_KERNEL \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ rfid; \ b rfi_flush_fallback #define RFI_TO_GUEST \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ rfid; \ b rfi_flush_fallback @@ -108,21 +132,25 @@ hrfid #define HRFI_TO_USER \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback #define HRFI_TO_USER_OR_KERNEL \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback #define HRFI_TO_GUEST \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback #define HRFI_TO_UNKNOWN \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback @@ -254,6 +282,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define __EXCEPTION_PROLOG_1_PRE(area) \ OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \ OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \ + INTERRUPT_TO_KERNEL; \ SAVE_CTR(r10, area); \ mfcr r9; diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 1e82eb3caabd..a9b64df34e2a 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -187,6 +187,22 @@ label##3: \ FTR_ENTRY_OFFSET label##1b-label##3b; \ .popsection; +#define STF_ENTRY_BARRIER_FIXUP_SECTION \ +953: \ + .pushsection __stf_entry_barrier_fixup,"a"; \ + .align 2; \ +954: \ + FTR_ENTRY_OFFSET 953b-954b; \ + .popsection; + +#define STF_EXIT_BARRIER_FIXUP_SECTION \ +955: \ + .pushsection __stf_exit_barrier_fixup,"a"; \ + .align 2; \ +956: \ + FTR_ENTRY_OFFSET 955b-956b; \ + .popsection; + #define RFI_FLUSH_FIXUP_SECTION \ 951: \ .pushsection __rfi_flush_fixup,"a"; \ @@ -199,6 +215,9 @@ label##3: \ #ifndef __ASSEMBLY__ #include <linux/types.h> +extern long stf_barrier_fallback; +extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; +extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; void apply_feature_fixups(void); diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 4c02a7378d06..e7377b73cfec 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -96,6 +96,7 @@ struct kvmppc_vcore { struct kvm_vcpu *runner; struct kvm *kvm; u64 tb_offset; /* guest timebase - host timebase */ + u64 tb_offset_applied; /* timebase offset currently in force */ ulong lpcr; u32 arch_compat; ulong pcr; diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index fa4d2e1cf772..44989b22383c 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -12,6 +12,17 @@ extern unsigned long powerpc_security_features; extern bool rfi_flush; +/* These are bit flags */ +enum stf_barrier_type { + STF_BARRIER_NONE = 0x1, + STF_BARRIER_FALLBACK = 0x2, + STF_BARRIER_EIEIO = 0x4, + STF_BARRIER_SYNC_ORI = 0x8, +}; + +void setup_stf_barrier(void); +void do_stf_barrier_fixups(enum stf_barrier_type types); + static inline void security_ftr_set(unsigned long feature) { powerpc_security_features |= feature; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6bee65f3cfd3..373dc1d6ef44 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -562,6 +562,7 @@ int main(void) OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads); OFFSET(VCORE_KVM, kvmppc_vcore, kvm); OFFSET(VCORE_TB_OFFSET, kvmppc_vcore, tb_offset); + OFFSET(VCORE_TB_OFFSET_APPL, kvmppc_vcore, tb_offset_applied); OFFSET(VCORE_LPCR, kvmppc_vcore, lpcr); OFFSET(VCORE_PCR, kvmppc_vcore, pcr); OFFSET(VCORE_DPDES, kvmppc_vcore, dpdes); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 3f30c994e931..458b928dbd84 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -28,6 +28,7 @@ _GLOBAL(__setup_cpu_power7) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR li r4,(LPCR_LPES1 >> LPCR_LPES_SH) bl __init_LPCR_ISA206 @@ -41,6 +42,7 @@ _GLOBAL(__restore_cpu_power7) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR li r4,(LPCR_LPES1 >> LPCR_LPES_SH) bl __init_LPCR_ISA206 @@ -57,6 +59,7 @@ _GLOBAL(__setup_cpu_power8) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH li r4,0 /* LPES = 0 */ @@ -78,6 +81,7 @@ _GLOBAL(__restore_cpu_power8) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH li r4,0 /* LPES = 0 */ @@ -99,6 +103,7 @@ _GLOBAL(__setup_cpu_power9) mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mtspr SPRN_PID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) or r3, r3, r4 @@ -123,6 +128,7 @@ _GLOBAL(__restore_cpu_power9) mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mtspr SPRN_PID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) or r3, r3, r4 diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 8ab51f6ca03a..c904477abaf3 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -101,6 +101,7 @@ static void __restore_cpu_cpufeatures(void) if (hv_mode) { mtspr(SPRN_LPID, 0); mtspr(SPRN_HFSCR, system_registers.hfscr); + mtspr(SPRN_PCR, 0); } mtspr(SPRN_FSCR, system_registers.fscr); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ae6a849db60b..f283958129f2 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -885,7 +885,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif -EXC_REAL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED) +EXC_REAL_OOL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED) EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED) TRAMP_KVM(PACA_EXGEN, 0x900) EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) @@ -961,6 +961,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) mtctr r13; \ GET_PACA(r13); \ std r10,PACA_EXGEN+EX_R10(r13); \ + INTERRUPT_TO_KERNEL; \ KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \ HMT_MEDIUM; \ mfctr r9; @@ -969,7 +970,8 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) #define SYSCALL_KVMTEST \ HMT_MEDIUM; \ mr r9,r13; \ - GET_PACA(r13); + GET_PACA(r13); \ + INTERRUPT_TO_KERNEL; #endif #define LOAD_SYSCALL_HANDLER(reg) \ @@ -1507,6 +1509,19 @@ masked_##_H##interrupt: \ b .; \ MASKED_DEC_HANDLER(_H) +TRAMP_REAL_BEGIN(stf_barrier_fallback) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + sync + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ori 31,31,0 + .rept 14 + b 1f +1: + .endr + blr + TRAMP_REAL_BEGIN(rfi_flush_fallback) SET_SCRATCH0(r13); GET_PACA(r13); diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index bab5a27ea805..b98a722da915 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -8,6 +8,7 @@ #include <linux/device.h> #include <linux/seq_buf.h> +#include <asm/debugfs.h> #include <asm/security_features.h> @@ -86,3 +87,151 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c return s.len; } + +/* + * Store-forwarding barrier support. + */ + +static enum stf_barrier_type stf_enabled_flush_types; +static bool no_stf_barrier; +bool stf_barrier; + +static int __init handle_no_stf_barrier(char *p) +{ + pr_info("stf-barrier: disabled on command line."); + no_stf_barrier = true; + return 0; +} + +early_param("no_stf_barrier", handle_no_stf_barrier); + +/* This is the generic flag used by other architectures */ +static int __init handle_ssbd(char *p) +{ + if (!p || strncmp(p, "auto", 5) == 0 || strncmp(p, "on", 2) == 0 ) { + /* Until firmware tells us, we have the barrier with auto */ + return 0; + } else if (strncmp(p, "off", 3) == 0) { + handle_no_stf_barrier(NULL); + return 0; + } else + return 1; + + return 0; +} +early_param("spec_store_bypass_disable", handle_ssbd); + +/* This is the generic flag used by other architectures */ +static int __init handle_no_ssbd(char *p) +{ + handle_no_stf_barrier(NULL); + return 0; +} +early_param("nospec_store_bypass_disable", handle_no_ssbd); + +static void stf_barrier_enable(bool enable) +{ + if (enable) + do_stf_barrier_fixups(stf_enabled_flush_types); + else + do_stf_barrier_fixups(STF_BARRIER_NONE); + + stf_barrier = enable; +} + +void setup_stf_barrier(void) +{ + enum stf_barrier_type type; + bool enable, hv; + + hv = cpu_has_feature(CPU_FTR_HVMODE); + + /* Default to fallback in case fw-features are not available */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + type = STF_BARRIER_EIEIO; + else if (cpu_has_feature(CPU_FTR_ARCH_207S)) + type = STF_BARRIER_SYNC_ORI; + else if (cpu_has_feature(CPU_FTR_ARCH_206)) + type = STF_BARRIER_FALLBACK; + else + type = STF_BARRIER_NONE; + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || + (security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && hv)); + + if (type == STF_BARRIER_FALLBACK) { + pr_info("stf-barrier: fallback barrier available\n"); + } else if (type == STF_BARRIER_SYNC_ORI) { + pr_info("stf-barrier: hwsync barrier available\n"); + } else if (type == STF_BARRIER_EIEIO) { + pr_info("stf-barrier: eieio barrier available\n"); + } + + stf_enabled_flush_types = type; + + if (!no_stf_barrier) + stf_barrier_enable(enable); +} + +ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (stf_barrier && stf_enabled_flush_types != STF_BARRIER_NONE) { + const char *type; + switch (stf_enabled_flush_types) { + case STF_BARRIER_EIEIO: + type = "eieio"; + break; + case STF_BARRIER_SYNC_ORI: + type = "hwsync"; + break; + case STF_BARRIER_FALLBACK: + type = "fallback"; + break; + default: + type = "unknown"; + } + return sprintf(buf, "Mitigation: Kernel entry/exit barrier (%s)\n", type); + } + + if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && + !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)) + return sprintf(buf, "Not affected\n"); + + return sprintf(buf, "Vulnerable\n"); +} + +#ifdef CONFIG_DEBUG_FS +static int stf_barrier_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != stf_barrier) + stf_barrier_enable(enable); + + return 0; +} + +static int stf_barrier_get(void *data, u64 *val) +{ + *val = stf_barrier ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, "%llu\n"); + +static __init int stf_barrier_debugfs_init(void) +{ + debugfs_create_file("stf_barrier", 0600, powerpc_debugfs_root, NULL, &fops_stf_barrier); + return 0; +} +device_initcall(stf_barrier_debugfs_init); +#endif /* CONFIG_DEBUG_FS */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index c8af90ff49f0..b8d82678f8b4 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -134,6 +134,20 @@ SECTIONS #ifdef CONFIG_PPC64 . = ALIGN(8); + __stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) { + __start___stf_entry_barrier_fixup = .; + *(__stf_entry_barrier_fixup) + __stop___stf_entry_barrier_fixup = .; + } + + . = ALIGN(8); + __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) { + __start___stf_exit_barrier_fixup = .; + *(__stf_exit_barrier_fixup) + __stop___stf_exit_barrier_fixup = .; + } + + . = ALIGN(8); __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) { __start___rfi_flush_fixup = .; *(__rfi_flush_fixup) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index a57eafec4dc2..361f42c8c73e 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -162,7 +162,7 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); - asm volatile("ptesync": : :"memory"); + asm volatile("eieio ; tlbsync ; ptesync": : :"memory"); } static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr) @@ -173,7 +173,7 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr) /* RIC=1 PRS=0 R=1 IS=2 */ asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1) : : "r" (rb), "r" (kvm->arch.lpid) : "memory"); - asm volatile("ptesync": : :"memory"); + asm volatile("eieio ; tlbsync ; ptesync": : :"memory"); } unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, @@ -584,7 +584,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep)) { - old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0, + old = kvmppc_radix_update_pte(kvm, ptep, ~0UL, 0, gpa, shift); kvmppc_radix_tlbie_page(kvm, gpa, shift); if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) { diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 4d07fca5121c..9963f65c212b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2441,6 +2441,7 @@ static void init_vcore_to_run(struct kvmppc_vcore *vc) vc->in_guest = 0; vc->napping_threads = 0; vc->conferring_threads = 0; + vc->tb_offset_applied = 0; } static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index bd63fa8a08b5..07ca1b2a7966 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -692,6 +692,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 22: ld r8,VCORE_TB_OFFSET(r5) cmpdi r8,0 beq 37f + std r8, VCORE_TB_OFFSET_APPL(r5) mftb r6 /* current host timebase */ add r8,r8,r6 mtspr SPRN_TBU40,r8 /* update upper 40 bits */ @@ -940,18 +941,6 @@ FTR_SECTION_ELSE ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) 8: - /* - * Set the decrementer to the guest decrementer. - */ - ld r8,VCPU_DEC_EXPIRES(r4) - /* r8 is a host timebase value here, convert to guest TB */ - ld r5,HSTATE_KVM_VCORE(r13) - ld r6,VCORE_TB_OFFSET(r5) - add r8,r8,r6 - mftb r7 - subf r3,r7,r8 - mtspr SPRN_DEC,r3 - ld r5, VCPU_SPRG0(r4) ld r6, VCPU_SPRG1(r4) ld r7, VCPU_SPRG2(r4) @@ -1005,6 +994,18 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_LPCR,r8 isync + /* + * Set the decrementer to the guest decrementer. + */ + ld r8,VCPU_DEC_EXPIRES(r4) + /* r8 is a host timebase value here, convert to guest TB */ + ld r5,HSTATE_KVM_VCORE(r13) + ld r6,VCORE_TB_OFFSET_APPL(r5) + add r8,r8,r6 + mftb r7 + subf r3,r7,r8 + mtspr SPRN_DEC,r3 + /* Check if HDEC expires soon */ mfspr r3, SPRN_HDEC EXTEND_HDEC(r3) @@ -1597,8 +1598,27 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) guest_bypass: stw r12, STACK_SLOT_TRAP(r1) - mr r3, r12 + + /* Save DEC */ + /* Do this before kvmhv_commence_exit so we know TB is guest TB */ + ld r3, HSTATE_KVM_VCORE(r13) + mfspr r5,SPRN_DEC + mftb r6 + /* On P9, if the guest has large decr enabled, don't sign extend */ +BEGIN_FTR_SECTION + ld r4, VCORE_LPCR(r3) + andis. r4, r4, LPCR_LD@h + bne 16f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + extsw r5,r5 +16: add r5,r5,r6 + /* r5 is a guest timebase value here, convert to host TB */ + ld r4,VCORE_TB_OFFSET_APPL(r3) + subf r5,r4,r5 + std r5,VCPU_DEC_EXPIRES(r9) + /* Increment exit count, poke other threads to exit */ + mr r3, r12 bl kvmhv_commence_exit nop ld r9, HSTATE_KVM_VCPU(r13) @@ -1639,23 +1659,6 @@ guest_bypass: mtspr SPRN_PURR,r3 mtspr SPRN_SPURR,r4 - /* Save DEC */ - ld r3, HSTATE_KVM_VCORE(r13) - mfspr r5,SPRN_DEC - mftb r6 - /* On P9, if the guest has large decr enabled, don't sign extend */ -BEGIN_FTR_SECTION - ld r4, VCORE_LPCR(r3) - andis. r4, r4, LPCR_LD@h - bne 16f -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - extsw r5,r5 -16: add r5,r5,r6 - /* r5 is a guest timebase value here, convert to host TB */ - ld r4,VCORE_TB_OFFSET(r3) - subf r5,r4,r5 - std r5,VCPU_DEC_EXPIRES(r9) - BEGIN_FTR_SECTION b 8f END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) @@ -1905,6 +1908,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) cmpwi cr2, r0, 0 beq cr2, 4f + /* + * Radix: do eieio; tlbsync; ptesync sequence in case we + * interrupted the guest between a tlbie and a ptesync. + */ + eieio + tlbsync + ptesync + /* Radix: Handle the case where the guest used an illegal PID */ LOAD_REG_ADDR(r4, mmu_base_pid) lwz r3, VCPU_GUEST_PID(r9) @@ -2017,9 +2028,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 27: /* Subtract timebase offset from timebase */ - ld r8,VCORE_TB_OFFSET(r5) + ld r8, VCORE_TB_OFFSET_APPL(r5) cmpdi r8,0 beq 17f + li r0, 0 + std r0, VCORE_TB_OFFSET_APPL(r5) mftb r6 /* current guest timebase */ subf r8,r8,r6 mtspr SPRN_TBU40,r8 /* update upper 40 bits */ @@ -2700,7 +2713,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) add r3, r3, r5 ld r4, HSTATE_KVM_VCPU(r13) ld r5, HSTATE_KVM_VCORE(r13) - ld r6, VCORE_TB_OFFSET(r5) + ld r6, VCORE_TB_OFFSET_APPL(r5) subf r3, r6, r3 /* convert to host TB value */ std r3, VCPU_DEC_EXPIRES(r4) @@ -2799,7 +2812,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* Restore guest decrementer */ ld r3, VCPU_DEC_EXPIRES(r4) ld r5, HSTATE_KVM_VCORE(r13) - ld r6, VCORE_TB_OFFSET(r5) + ld r6, VCORE_TB_OFFSET_APPL(r5) add r3, r3, r6 /* convert host TB to guest TB value */ mftb r7 subf r3, r7, r3 @@ -3606,12 +3619,9 @@ kvmppc_fix_pmao: */ kvmhv_start_timing: ld r5, HSTATE_KVM_VCORE(r13) - lbz r6, VCORE_IN_GUEST(r5) - cmpwi r6, 0 - beq 5f /* if in guest, need to */ - ld r6, VCORE_TB_OFFSET(r5) /* subtract timebase offset */ -5: mftb r5 - subf r5, r6, r5 + ld r6, VCORE_TB_OFFSET_APPL(r5) + mftb r5 + subf r5, r6, r5 /* subtract current timebase offset */ std r3, VCPU_CUR_ACTIVITY(r4) std r5, VCPU_ACTIVITY_START(r4) blr @@ -3622,15 +3632,12 @@ kvmhv_start_timing: */ kvmhv_accumulate_time: ld r5, HSTATE_KVM_VCORE(r13) - lbz r8, VCORE_IN_GUEST(r5) - cmpwi r8, 0 - beq 4f /* if in guest, need to */ - ld r8, VCORE_TB_OFFSET(r5) /* subtract timebase offset */ -4: ld r5, VCPU_CUR_ACTIVITY(r4) + ld r8, VCORE_TB_OFFSET_APPL(r5) + ld r5, VCPU_CUR_ACTIVITY(r4) ld r6, VCPU_ACTIVITY_START(r4) std r3, VCPU_CUR_ACTIVITY(r4) mftb r7 - subf r7, r8, r7 + subf r7, r8, r7 /* subtract current timebase offset */ std r7, VCPU_ACTIVITY_START(r4) cmpdi r5, 0 beqlr diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index c7a5deadd1cc..99c3620b40d9 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -11,6 +11,9 @@ #define XGLUE(a,b) a##b #define GLUE(a,b) XGLUE(a,b) +/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */ +#define XICS_DUMMY 1 + static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc) { u8 cppr; @@ -205,6 +208,10 @@ skip_ipi: goto skip_ipi; } + /* If it's the dummy interrupt, continue searching */ + if (hirq == XICS_DUMMY) + goto skip_ipi; + /* If fetching, update queue pointers */ if (scan_type == scan_fetch) { q->idx = idx; @@ -385,9 +392,76 @@ static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc) __x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING); } +static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive, + struct kvmppc_xive_vcpu *xc) +{ + unsigned int prio; + + /* For each priority that is now masked */ + for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) { + struct xive_q *q = &xc->queues[prio]; + struct kvmppc_xive_irq_state *state; + struct kvmppc_xive_src_block *sb; + u32 idx, toggle, entry, irq, hw_num; + struct xive_irq_data *xd; + __be32 *qpage; + u16 src; + + idx = q->idx; + toggle = q->toggle; + qpage = READ_ONCE(q->qpage); + if (!qpage) + continue; + + /* For each interrupt in the queue */ + for (;;) { + entry = be32_to_cpup(qpage + idx); + + /* No more ? */ + if ((entry >> 31) == toggle) + break; + irq = entry & 0x7fffffff; + + /* Skip dummies and IPIs */ + if (irq == XICS_DUMMY || irq == XICS_IPI) + goto next; + sb = kvmppc_xive_find_source(xive, irq, &src); + if (!sb) + goto next; + state = &sb->irq_state[src]; + + /* Has it been rerouted ? */ + if (xc->server_num == state->act_server) + goto next; + + /* + * Allright, it *has* been re-routed, kill it from + * the queue. + */ + qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY); + + /* Find the HW interrupt */ + kvmppc_xive_select_irq(state, &hw_num, &xd); + + /* If it's not an LSI, set PQ to 11 the EOI will force a resend */ + if (!(xd->flags & XIVE_IRQ_FLAG_LSI)) + GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11); + + /* EOI the source */ + GLUE(X_PFX,source_eoi)(hw_num, xd); + + next: + idx = (idx + 1) & q->msk; + if (idx == 0) + toggle ^= 1; + } + } +} + X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + struct kvmppc_xive *xive = vcpu->kvm->arch.xive; u8 old_cppr; pr_devel("H_CPPR(cppr=%ld)\n", cppr); @@ -407,14 +481,34 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr) */ smp_mb(); - /* - * We are masking less, we need to look for pending things - * to deliver and set VP pending bits accordingly to trigger - * a new interrupt otherwise we might miss MFRR changes for - * which we have optimized out sending an IPI signal. - */ - if (cppr > old_cppr) + if (cppr > old_cppr) { + /* + * We are masking less, we need to look for pending things + * to deliver and set VP pending bits accordingly to trigger + * a new interrupt otherwise we might miss MFRR changes for + * which we have optimized out sending an IPI signal. + */ GLUE(X_PFX,push_pending_to_hw)(xc); + } else { + /* + * We are masking more, we need to check the queue for any + * interrupt that has been routed to another CPU, take + * it out (replace it with the dummy) and retrigger it. + * + * This is necessary since those interrupts may otherwise + * never be processed, at least not until this CPU restores + * its CPPR. + * + * This is in theory racy vs. HW adding new interrupts to + * the queue. In practice this works because the interesting + * cases are when the guest has done a set_xive() to move the + * interrupt away, which flushes the xive, followed by the + * target CPU doing a H_CPPR. So any new interrupt coming into + * the queue must still be routed to us and isn't a source + * of concern. + */ + GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc); + } /* Apply new CPPR */ xc->hw_cppr = cppr; diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 288fe4f0db4e..e1bcdc32a851 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -23,6 +23,7 @@ #include <asm/page.h> #include <asm/sections.h> #include <asm/setup.h> +#include <asm/security_features.h> #include <asm/firmware.h> struct fixup_entry { @@ -117,6 +118,120 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } #ifdef CONFIG_PPC_BOOK3S_64 +void do_stf_entry_barrier_fixups(enum stf_barrier_type types) +{ + unsigned int instrs[3], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___stf_entry_barrier_fixup), + end = PTRRELOC(&__stop___stf_entry_barrier_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + + i = 0; + if (types & STF_BARRIER_FALLBACK) { + instrs[i++] = 0x7d4802a6; /* mflr r10 */ + instrs[i++] = 0x60000000; /* branch patched below */ + instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + } else if (types & STF_BARRIER_EIEIO) { + instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + } else if (types & STF_BARRIER_SYNC_ORI) { + instrs[i++] = 0x7c0004ac; /* hwsync */ + instrs[i++] = 0xe94d0000; /* ld r10,0(r13) */ + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + } + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + + if (types & STF_BARRIER_FALLBACK) + patch_branch(dest + 1, (unsigned long)&stf_barrier_fallback, + BRANCH_SET_LINK); + else + patch_instruction(dest + 1, instrs[1]); + + patch_instruction(dest + 2, instrs[2]); + } + + printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i, + (types == STF_BARRIER_NONE) ? "no" : + (types == STF_BARRIER_FALLBACK) ? "fallback" : + (types == STF_BARRIER_EIEIO) ? "eieio" : + (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync" + : "unknown"); +} + +void do_stf_exit_barrier_fixups(enum stf_barrier_type types) +{ + unsigned int instrs[6], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___stf_exit_barrier_fixup), + end = PTRRELOC(&__stop___stf_exit_barrier_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + instrs[3] = 0x60000000; /* nop */ + instrs[4] = 0x60000000; /* nop */ + instrs[5] = 0x60000000; /* nop */ + + i = 0; + if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) { + if (cpu_has_feature(CPU_FTR_HVMODE)) { + instrs[i++] = 0x7db14ba6; /* mtspr 0x131, r13 (HSPRG1) */ + instrs[i++] = 0x7db04aa6; /* mfspr r13, 0x130 (HSPRG0) */ + } else { + instrs[i++] = 0x7db243a6; /* mtsprg 2,r13 */ + instrs[i++] = 0x7db142a6; /* mfsprg r13,1 */ + } + instrs[i++] = 0x7c0004ac; /* hwsync */ + instrs[i++] = 0xe9ad0000; /* ld r13,0(r13) */ + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + if (cpu_has_feature(CPU_FTR_HVMODE)) { + instrs[i++] = 0x7db14aa6; /* mfspr r13, 0x131 (HSPRG1) */ + } else { + instrs[i++] = 0x7db242a6; /* mfsprg r13,2 */ + } + } else if (types & STF_BARRIER_EIEIO) { + instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + } + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + patch_instruction(dest + 1, instrs[1]); + patch_instruction(dest + 2, instrs[2]); + patch_instruction(dest + 3, instrs[3]); + patch_instruction(dest + 4, instrs[4]); + patch_instruction(dest + 5, instrs[5]); + } + printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i, + (types == STF_BARRIER_NONE) ? "no" : + (types == STF_BARRIER_FALLBACK) ? "fallback" : + (types == STF_BARRIER_EIEIO) ? "eieio" : + (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync" + : "unknown"); +} + + +void do_stf_barrier_fixups(enum stf_barrier_type types) +{ + do_stf_entry_barrier_fixups(types); + do_stf_exit_barrier_fixups(types); +} + void do_rfi_flush_fixups(enum l1d_flush_type types) { unsigned int instrs[3], *dest; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index ef8c9ce53a61..a6648ec99ca7 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -131,6 +131,7 @@ static void __init pnv_setup_arch(void) set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); pnv_setup_rfi_flush(); + setup_stf_barrier(); /* Initialize SMP */ pnv_smp_init(); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index b55ad4286dc7..fdb32e056ef4 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -710,6 +710,7 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); pseries_setup_rfi_flush(); + setup_stf_barrier(); /* By default, only probe PCI (can be overridden by rtas_pci) */ pci_add_flags(PCI_PROBE_ONLY); diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 8961e3970901..969882b54266 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -578,7 +578,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) gpa = READ_ONCE(scb_o->itdba) & ~0xffUL; if (gpa && (scb_s->ecb & ECB_TE)) { - if (!(gpa & ~0x1fffU)) { + if (!(gpa & ~0x1fffUL)) { rc = set_validity_icpt(scb_s, 0x0080U); goto unpin; } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 598461e24be3..92bf2f2e7cdd 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -495,6 +495,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ecx &= ~F(PKU); entry->edx &= kvm_cpuid_7_0_edx_x86_features; cpuid_mask(&entry->edx, CPUID_7_EDX); + /* + * We emulate ARCH_CAPABILITIES in software even + * if the host doesn't support it. + */ + entry->edx |= F(ARCH_CAPABILITIES); } else { entry->ebx = 0; entry->ecx = 0; diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 5708e951a5c6..46ff64da44ca 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1260,14 +1260,18 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) } } -static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) +static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) { - struct kvm_run *run = vcpu->run; - - kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result); + kvm_hv_hypercall_set_result(vcpu, result); + ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); } +static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) +{ + return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result); +} + static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) { struct eventfd_ctx *eventfd; @@ -1350,7 +1354,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) /* Hypercall continuation is not supported yet */ if (rep_cnt || rep_idx) { ret = HV_STATUS_INVALID_HYPERCALL_CODE; - goto set_result; + goto out; } switch (code) { @@ -1381,9 +1385,8 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) break; } -set_result: - kvm_hv_hypercall_set_result(vcpu, ret); - return 1; +out: + return kvm_hv_hypercall_complete(vcpu, ret); } void kvm_hv_init_vm(struct kvm *kvm) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index b74c9c1405b9..3773c4625114 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1522,11 +1522,23 @@ static bool set_target_expiration(struct kvm_lapic *apic) static void advance_periodic_target_expiration(struct kvm_lapic *apic) { - apic->lapic_timer.tscdeadline += - nsec_to_cycles(apic->vcpu, apic->lapic_timer.period); + ktime_t now = ktime_get(); + u64 tscl = rdtsc(); + ktime_t delta; + + /* + * Synchronize both deadlines to the same time source or + * differences in the periods (caused by differences in the + * underlying clocks or numerical approximation errors) will + * cause the two to drift apart over time as the errors + * accumulate. + */ apic->lapic_timer.target_expiration = ktime_add_ns(apic->lapic_timer.target_expiration, apic->lapic_timer.period); + delta = ktime_sub(apic->lapic_timer.target_expiration, now); + apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + + nsec_to_cycles(apic->vcpu, delta); } static void start_sw_period(struct kvm_lapic *apic) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 22a183aac1c6..71e7cda6d014 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6671,11 +6671,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) unsigned long nr, a0, a1, a2, a3, ret; int op_64_bit; - if (kvm_hv_hypercall_enabled(vcpu->kvm)) { - if (!kvm_hv_hypercall(vcpu)) - return 0; - goto out; - } + if (kvm_hv_hypercall_enabled(vcpu->kvm)) + return kvm_hv_hypercall(vcpu); nr = kvm_register_read(vcpu, VCPU_REGS_RAX); a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); @@ -6696,7 +6693,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) if (kvm_x86_ops->get_cpl(vcpu) != 0) { ret = -KVM_EPERM; - goto out_error; + goto out; } switch (nr) { @@ -6716,12 +6713,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) ret = -KVM_ENOSYS; break; } -out_error: +out: if (!op_64_bit) ret = (u32)ret; kvm_register_write(vcpu, VCPU_REGS_RAX, ret); -out: ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); } @@ -7980,6 +7976,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct msr_data apic_base_msr; int mmu_reset_needed = 0; + int cpuid_update_needed = 0; int pending_vec, max_bits, idx; struct desc_ptr dt; int ret = -EINVAL; @@ -8018,8 +8015,10 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) vcpu->arch.cr0 = sregs->cr0; mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; + cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) & + (X86_CR4_OSXSAVE | X86_CR4_PKE)); kvm_x86_ops->set_cr4(vcpu, sregs->cr4); - if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE)) + if (cpuid_update_needed) kvm_update_cpuid(vcpu); idx = srcu_read_lock(&vcpu->kvm->srcu); diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 6389c88b3500..738fb22978dd 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -334,6 +334,7 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x9c07), board_ahci_mobile }, /* Lynx LP RAID */ { PCI_VDEVICE(INTEL, 0x9c0e), board_ahci_mobile }, /* Lynx LP RAID */ { PCI_VDEVICE(INTEL, 0x9c0f), board_ahci_mobile }, /* Lynx LP RAID */ + { PCI_VDEVICE(INTEL, 0x9dd3), board_ahci_mobile }, /* Cannon Lake PCH-LP AHCI */ { PCI_VDEVICE(INTEL, 0x1f22), board_ahci }, /* Avoton AHCI */ { PCI_VDEVICE(INTEL, 0x1f23), board_ahci }, /* Avoton AHCI */ { PCI_VDEVICE(INTEL, 0x1f24), board_ahci }, /* Avoton RAID */ diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 68596bd4cf06..346b163f6e89 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4493,6 +4493,10 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* https://bugzilla.kernel.org/show_bug.cgi?id=15573 */ { "C300-CTFDDAC128MAG", "0001", ATA_HORKAGE_NONCQ, }, + /* Some Sandisk SSDs lock up hard with NCQ enabled. Reported on + SD7SN6S256G and SD8SN8U256G */ + { "SanDisk SD[78]SN*G", NULL, ATA_HORKAGE_NONCQ, }, + /* devices which puke on READ_NATIVE_MAX */ { "HDS724040KLSA80", "KFAOA20N", ATA_HORKAGE_BROKEN_HPA, }, { "WDC WD3200JD-00KLB0", "WD-WCAMR1130137", ATA_HORKAGE_BROKEN_HPA }, @@ -4549,13 +4553,16 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { ATA_HORKAGE_ZERO_AFTER_TRIM | ATA_HORKAGE_NOLPM, }, - /* This specific Samsung model/firmware-rev does not handle LPM well */ + /* These specific Samsung models/firmware-revs do not handle LPM well */ { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, }, + { "SAMSUNG SSD PM830 mSATA *", "CXM13D1Q", ATA_HORKAGE_NOLPM, }, /* Sandisk devices which are known to not handle LPM well */ { "SanDisk SD7UB3Q*G1001", NULL, ATA_HORKAGE_NOLPM, }, /* devices that don't properly handle queued TRIM commands */ + { "Micron_M500IT_*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Micron_M500_*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Crucial_CT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | diff --git a/drivers/base/node.c b/drivers/base/node.c index 7a3a580821e0..a5e821d09656 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -490,7 +490,8 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, return 0; } -int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages) +int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages, + bool check_nid) { unsigned long end_pfn = start_pfn + nr_pages; unsigned long pfn; @@ -514,7 +515,7 @@ int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages) mem_blk = find_memory_block_hinted(mem_sect, mem_blk); - ret = register_mem_sect_under_node(mem_blk, nid, true); + ret = register_mem_sect_under_node(mem_blk, nid, check_nid); if (!err) err = ret; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 02a497e7c785..e5e067091572 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1923,10 +1923,8 @@ static int device_prepare(struct device *dev, pm_message_t state) dev->power.wakeup_path = false; - if (dev->power.no_pm_callbacks) { - ret = 1; /* Let device go direct_complete */ + if (dev->power.no_pm_callbacks) goto unlock; - } if (dev->pm_domain) callback = dev->pm_domain->ops.prepare; @@ -1960,7 +1958,8 @@ unlock: */ spin_lock_irq(&dev->power.lock); dev->power.direct_complete = state.event == PM_EVENT_SUSPEND && - pm_runtime_suspended(dev) && ret > 0 && + ((pm_runtime_suspended(dev) && ret > 0) || + dev->power.no_pm_callbacks) && !dev_pm_test_driver_flags(dev, DPM_FLAG_NEVER_SKIP); spin_unlock_irq(&dev->power.lock); return 0; diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c index f040aba48d50..27e9686b6d3a 100644 --- a/drivers/bcma/driver_mips.c +++ b/drivers/bcma/driver_mips.c @@ -184,7 +184,7 @@ static void bcma_core_mips_print_irq(struct bcma_device *dev, unsigned int irq) { int i; static const char *irq_name[] = {"2(S)", "3", "4", "5", "6", "D", "I"}; - char interrupts[20]; + char interrupts[25]; char *ints = interrupts; for (i = 0; i < ARRAY_SIZE(irq_name); i++) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 5d4e31655d96..55cf554bc914 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1068,6 +1068,7 @@ static int loop_clr_fd(struct loop_device *lo) if (bdev) { bdput(bdev); invalidate_bdev(bdev); + bdev->bd_inode->i_mapping->wb_err = 0; } set_capacity(lo->lo_disk, 0); loop_sysfs_exit(lo); diff --git a/drivers/gpu/drm/rcar-du/rcar_lvds.c b/drivers/gpu/drm/rcar-du/rcar_lvds.c index 3d2d3bbd1342..155ad840f3c5 100644 --- a/drivers/gpu/drm/rcar-du/rcar_lvds.c +++ b/drivers/gpu/drm/rcar-du/rcar_lvds.c @@ -88,6 +88,9 @@ static int rcar_lvds_connector_atomic_check(struct drm_connector *connector, const struct drm_display_mode *panel_mode; struct drm_crtc_state *crtc_state; + if (!state->crtc) + return 0; + if (list_empty(&connector->modes)) { dev_dbg(lvds->dev, "connector: empty modes list\n"); return -EINVAL; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 70e1a8820a7c..8b770a8e02cd 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1278,8 +1278,6 @@ static void vmw_master_drop(struct drm_device *dev, dev_priv->active_master = &dev_priv->fbdev_master; ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM); ttm_vt_unlock(&dev_priv->fbdev_master.lock); - - vmw_fb_refresh(dev_priv); } /** @@ -1483,7 +1481,6 @@ static int vmw_pm_freeze(struct device *kdev) vmw_kms_resume(dev); if (dev_priv->enable_fb) vmw_fb_on(dev_priv); - vmw_fb_refresh(dev_priv); return -EBUSY; } @@ -1523,8 +1520,6 @@ static int vmw_pm_restore(struct device *kdev) if (dev_priv->enable_fb) vmw_fb_on(dev_priv); - vmw_fb_refresh(dev_priv); - return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index f34f368c1a2e..5fcbe1620d50 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -910,7 +910,6 @@ int vmw_fb_init(struct vmw_private *vmw_priv); int vmw_fb_close(struct vmw_private *dev_priv); int vmw_fb_off(struct vmw_private *vmw_priv); int vmw_fb_on(struct vmw_private *vmw_priv); -void vmw_fb_refresh(struct vmw_private *vmw_priv); /** * Kernel modesetting - vmwgfx_kms.c diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index ba0cdb743c3e..54e300365a5c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -866,21 +866,13 @@ int vmw_fb_on(struct vmw_private *vmw_priv) spin_lock_irqsave(&par->dirty.lock, flags); par->dirty.active = true; spin_unlock_irqrestore(&par->dirty.lock, flags); - - return 0; -} -/** - * vmw_fb_refresh - Refresh fb display - * - * @vmw_priv: Pointer to device private - * - * Call into kms to show the fbdev display(s). - */ -void vmw_fb_refresh(struct vmw_private *vmw_priv) -{ - if (!vmw_priv->fb_info) - return; + /* + * Need to reschedule a dirty update, because otherwise that's + * only done in dirty_mark() if the previous coalesced + * dirty region was empty. + */ + schedule_delayed_work(&par->local_work, 0); - vmw_fb_set_par(vmw_priv->fb_info); + return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c index cdff99211602..21d746bdc922 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c @@ -329,8 +329,6 @@ int vmw_host_get_guestinfo(const char *guest_info_param, struct rpc_channel channel; char *msg, *reply = NULL; size_t reply_len = 0; - int ret = 0; - if (!vmw_msg_enabled) return -ENODEV; @@ -344,15 +342,14 @@ int vmw_host_get_guestinfo(const char *guest_info_param, return -ENOMEM; } - if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM) || - vmw_send_msg(&channel, msg) || - vmw_recv_msg(&channel, (void *) &reply, &reply_len) || - vmw_close_channel(&channel)) { - DRM_ERROR("Failed to get %s", guest_info_param); + if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM)) + goto out_open; - ret = -EINVAL; - } + if (vmw_send_msg(&channel, msg) || + vmw_recv_msg(&channel, (void *) &reply, &reply_len)) + goto out_msg; + vmw_close_channel(&channel); if (buffer && reply && reply_len > 0) { /* Remove reply code, which are the first 2 characters of * the reply @@ -369,7 +366,17 @@ int vmw_host_get_guestinfo(const char *guest_info_param, kfree(reply); kfree(msg); - return ret; + return 0; + +out_msg: + vmw_close_channel(&channel); + kfree(reply); +out_open: + *length = 0; + kfree(msg); + DRM_ERROR("Failed to get %s", guest_info_param); + + return -EINVAL; } @@ -400,15 +407,22 @@ int vmw_host_log(const char *log) return -ENOMEM; } - if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM) || - vmw_send_msg(&channel, msg) || - vmw_close_channel(&channel)) { - DRM_ERROR("Failed to send log\n"); + if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM)) + goto out_open; - ret = -EINVAL; - } + if (vmw_send_msg(&channel, msg)) + goto out_msg; + vmw_close_channel(&channel); kfree(msg); - return ret; + return 0; + +out_msg: + vmw_close_channel(&channel); +out_open: + kfree(msg); + DRM_ERROR("Failed to send log\n"); + + return -EINVAL; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.h b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.h index 557a033fb610..8545488aa0cf 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.h @@ -135,17 +135,24 @@ #else -/* In the 32-bit version of this macro, we use "m" because there is no - * more register left for bp +/* + * In the 32-bit version of this macro, we store bp in a memory location + * because we've ran out of registers. + * Now we can't reference that memory location while we've modified + * %esp or %ebp, so we first push it on the stack, just before we push + * %ebp, and then when we need it we read it from the stack where we + * just pushed it. */ #define VMW_PORT_HB_OUT(cmd, in_ecx, in_si, in_di, \ port_num, magic, bp, \ eax, ebx, ecx, edx, si, di) \ ({ \ - asm volatile ("push %%ebp;" \ - "mov %12, %%ebp;" \ + asm volatile ("push %12;" \ + "push %%ebp;" \ + "mov 0x04(%%esp), %%ebp;" \ "rep outsb;" \ - "pop %%ebp;" : \ + "pop %%ebp;" \ + "add $0x04, %%esp;" : \ "=a"(eax), \ "=b"(ebx), \ "=c"(ecx), \ @@ -167,10 +174,12 @@ port_num, magic, bp, \ eax, ebx, ecx, edx, si, di) \ ({ \ - asm volatile ("push %%ebp;" \ - "mov %12, %%ebp;" \ + asm volatile ("push %12;" \ + "push %%ebp;" \ + "mov 0x04(%%esp), %%ebp;" \ "rep insb;" \ - "pop %%ebp" : \ + "pop %%ebp;" \ + "add $0x04, %%esp;" : \ "=a"(eax), \ "=b"(ebx), \ "=c"(ecx), \ diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 9a4e899d94b3..2b6c9b516070 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -119,7 +119,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, umem->length = size; umem->address = addr; umem->page_shift = PAGE_SHIFT; - umem->pid = get_task_pid(current, PIDTYPE_PID); /* * We ask for writable memory if any of the following * access flags are set. "Local write" and "remote write" @@ -132,7 +131,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND)); if (access & IB_ACCESS_ON_DEMAND) { - put_pid(umem->pid); ret = ib_umem_odp_get(context, umem, access); if (ret) { kfree(umem); @@ -148,7 +146,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) { - put_pid(umem->pid); kfree(umem); return ERR_PTR(-ENOMEM); } @@ -231,7 +228,6 @@ out: if (ret < 0) { if (need_release) __ib_umem_release(context->device, umem, 0); - put_pid(umem->pid); kfree(umem); } else current->mm->pinned_vm = locked; @@ -274,8 +270,7 @@ void ib_umem_release(struct ib_umem *umem) __ib_umem_release(umem->context->device, umem, 1); - task = get_pid_task(umem->pid, PIDTYPE_PID); - put_pid(umem->pid); + task = get_pid_task(umem->context->tgid, PIDTYPE_PID); if (!task) goto out; mm = get_task_mm(task); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index e90f2fd8dc16..1445918e3239 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -489,10 +489,10 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) err_dereg_mem: dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp); -err_free_wr_wait: - c4iw_put_wr_wait(mhp->wr_waitp); err_free_skb: kfree_skb(mhp->dereg_skb); +err_free_wr_wait: + c4iw_put_wr_wait(mhp->wr_waitp); err_free_mhp: kfree(mhp); return ERR_PTR(ret); diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index e6a60fa59f2b..e6bdd0c1e80a 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -5944,6 +5944,7 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd, u64 status; u32 sw_index; int i = 0; + unsigned long irq_flags; sw_index = dd->hw_to_sw[hw_context]; if (sw_index >= dd->num_send_contexts) { @@ -5953,10 +5954,12 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd, return; } sci = &dd->send_contexts[sw_index]; + spin_lock_irqsave(&dd->sc_lock, irq_flags); sc = sci->sc; if (!sc) { dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__, sw_index, hw_context); + spin_unlock_irqrestore(&dd->sc_lock, irq_flags); return; } @@ -5978,6 +5981,7 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd, */ if (sc->type != SC_USER) queue_work(dd->pport->hfi1_wq, &sc->halt_work); + spin_unlock_irqrestore(&dd->sc_lock, irq_flags); /* * Update the counters for the corresponding status bits. diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 14734d0d0b76..3a485f50fede 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -377,6 +377,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, hr_cq->set_ci_db = hr_cq->db.db_record; *hr_cq->set_ci_db = 0; + hr_cq->db_en = 1; } /* Init mmt table and write buff address to mtt table */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 47e1b6ac1e1a..8013d69c5ac4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -722,6 +722,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) free_mr->mr_free_pd = to_hr_pd(pd); free_mr->mr_free_pd->ibpd.device = &hr_dev->ib_dev; free_mr->mr_free_pd->ibpd.uobject = NULL; + free_mr->mr_free_pd->ibpd.__internal_mr = NULL; atomic_set(&free_mr->mr_free_pd->ibpd.usecnt, 0); attr.qp_access_flags = IB_ACCESS_REMOTE_WRITE; @@ -1036,7 +1037,7 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) do { ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc); - if (ret < 0) { + if (ret < 0 && hr_qp) { dev_err(dev, "(qp:0x%lx) starts, Poll cqe failed(%d) for mr 0x%x free! Remain %d cqe\n", hr_qp->qpn, ret, hr_mr->key, ne); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 25916e8522ed..1f0965bb64ee 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -142,8 +142,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, unsigned long flags; unsigned int ind; void *wqe = NULL; - u32 tmp_len = 0; bool loopback; + u32 tmp_len; int ret = 0; u8 *smac; int nreq; @@ -189,6 +189,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, owner_bit = ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); + tmp_len = 0; /* Corresponding to the QP type, wqe process separately */ if (ibqp->qp_type == IB_QPT_GSI) { @@ -547,16 +548,20 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, } if (i < hr_qp->rq.max_gs) { - dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); - dseg[i].addr = 0; + dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); + dseg->addr = 0; } /* rq support inline data */ - sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list; - hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt = (u32)wr->num_sge; - for (i = 0; i < wr->num_sge; i++) { - sge_list[i].addr = (void *)(u64)wr->sg_list[i].addr; - sge_list[i].len = wr->sg_list[i].length; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { + sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list; + hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt = + (u32)wr->num_sge; + for (i = 0; i < wr->num_sge; i++) { + sge_list[i].addr = + (void *)(u64)wr->sg_list[i].addr; + sge_list[i].len = wr->sg_list[i].length; + } } hr_qp->rq.wrid[ind] = wr->wr_id; @@ -613,6 +618,8 @@ static void hns_roce_free_cmq_desc(struct hns_roce_dev *hr_dev, dma_unmap_single(hr_dev->dev, ring->desc_dma_addr, ring->desc_num * sizeof(struct hns_roce_cmq_desc), DMA_BIDIRECTIONAL); + + ring->desc_dma_addr = 0; kfree(ring->desc); } @@ -1081,6 +1088,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) if (ret) { dev_err(hr_dev->dev, "Configure global param fail, ret = %d.\n", ret); + return ret; } /* Get pf resource owned by every pf */ @@ -1372,6 +1380,8 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, mr->type == MR_TYPE_MR ? 0 : 1); + roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_INNER_PA_VLD_S, + 1); mpt_entry->byte_12_mw_pa = cpu_to_le32(mpt_entry->byte_12_mw_pa); mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size)); @@ -2169,6 +2179,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); /* @@ -2281,7 +2292,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, context->rq_db_record_addr = hr_qp->rdb.dma >> 32; qpc_mask->rq_db_record_addr = 0; - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 1); + roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, + (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) ? 1 : 0); roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 0); roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M, @@ -4703,6 +4715,8 @@ static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { {0, } }; +MODULE_DEVICE_TABLE(pci, hns_roce_hw_v2_pci_tbl); + static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, struct hnae3_handle *handle) { diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 9d48bc07a9e6..96fb6a9ed93c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -199,7 +199,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev, memset(props, 0, sizeof(*props)); - props->sys_image_guid = cpu_to_be32(hr_dev->sys_image_guid); + props->sys_image_guid = cpu_to_be64(hr_dev->sys_image_guid); props->max_mr_size = (u64)(~(0ULL)); props->page_size_cap = hr_dev->caps.page_size_cap; props->vendor_id = hr_dev->vendor_id; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index d4aad34c21e2..baaf906f7c2e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -660,6 +660,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, goto err_rq_sge_list; } *hr_qp->rdb.db_record = 0; + hr_qp->rdb_en = 1; } /* Allocate QP buf */ @@ -955,7 +956,14 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } if (cur_state == new_state && cur_state == IB_QPS_RESET) { - ret = 0; + if (hr_dev->caps.min_wqes) { + ret = -EPERM; + dev_err(dev, "cur_state=%d new_state=%d\n", cur_state, + new_state); + } else { + ret = 0; + } + goto out; } diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index d5d8c1be345a..2f2b4426ded7 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -207,6 +207,7 @@ struct i40iw_msix_vector { u32 irq; u32 cpu_affinity; u32 ceq_id; + cpumask_t mask; }; struct l2params_work { diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 4cfa8f4647e2..f7c6fd9ff6e2 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2093,7 +2093,7 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev, if (netif_is_bond_slave(netdev)) netdev = netdev_master_upper_dev_get(netdev); - neigh = dst_neigh_lookup(dst, &dst_addr); + neigh = dst_neigh_lookup(dst, dst_addr.sin6_addr.in6_u.u6_addr32); rcu_read_lock(); if (neigh) { diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 6139836fb533..c9f62ca7643c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -331,7 +331,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) switch (info->ae_id) { case I40IW_AE_LLP_FIN_RECEIVED: if (qp->term_flags) - continue; + break; if (atomic_inc_return(&iwqp->close_timer_started) == 1) { iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSE_WAIT; if ((iwqp->hw_tcp_state == I40IW_TCP_STATE_CLOSE_WAIT) && @@ -360,7 +360,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) break; case I40IW_AE_LLP_CONNECTION_RESET: if (atomic_read(&iwqp->close_timer_started)) - continue; + break; i40iw_cm_disconn(iwqp); break; case I40IW_AE_QP_SUSPEND_COMPLETE: diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 9cd0d3ef9057..05001e6da1f8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -687,7 +687,6 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw struct i40iw_msix_vector *msix_vec) { enum i40iw_status_code status; - cpumask_t mask; if (iwdev->msix_shared && !ceq_id) { tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev); @@ -697,9 +696,9 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq); } - cpumask_clear(&mask); - cpumask_set_cpu(msix_vec->cpu_affinity, &mask); - irq_set_affinity_hint(msix_vec->irq, &mask); + cpumask_clear(&msix_vec->mask); + cpumask_set_cpu(msix_vec->cpu_affinity, &msix_vec->mask); + irq_set_affinity_hint(msix_vec->irq, &msix_vec->mask); if (status) { i40iw_pr_err("ceq irq config fail\n"); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 40e4f5ab2b46..68679ad4c6da 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -394,6 +394,7 @@ static struct i40iw_pbl *i40iw_get_pbl(unsigned long va, list_for_each_entry(iwpbl, pbl_list, list) { if (iwpbl->user_base == va) { + iwpbl->on_list = false; list_del(&iwpbl->list); return iwpbl; } @@ -614,6 +615,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, return ERR_PTR(-ENOMEM); iwqp = (struct i40iw_qp *)mem; + iwqp->allocated_buffer = mem; qp = &iwqp->sc_qp; qp->back_qp = (void *)iwqp; qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; @@ -642,7 +644,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, goto error; } - iwqp->allocated_buffer = mem; iwqp->iwdev = iwdev; iwqp->iwpd = iwpd; iwqp->ibqp.qp_num = qp_num; @@ -1898,6 +1899,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, goto error; spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list); + iwpbl->on_list = true; spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); break; case IW_MEMREG_TYPE_CQ: @@ -1908,6 +1910,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list); + iwpbl->on_list = true; spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); break; case IW_MEMREG_TYPE_MEM: @@ -2045,14 +2048,18 @@ static void i40iw_del_memlist(struct i40iw_mr *iwmr, switch (iwmr->type) { case IW_MEMREG_TYPE_CQ: spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); - if (!list_empty(&ucontext->cq_reg_mem_list)) + if (iwpbl->on_list) { + iwpbl->on_list = false; list_del(&iwpbl->list); + } spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); break; case IW_MEMREG_TYPE_QP: spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); - if (!list_empty(&ucontext->qp_reg_mem_list)) + if (iwpbl->on_list) { + iwpbl->on_list = false; list_del(&iwpbl->list); + } spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); break; default: diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h index 9067443cd311..76cf173377ab 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h @@ -78,6 +78,7 @@ struct i40iw_pbl { }; bool pbl_allocated; + bool on_list; u64 user_base; struct i40iw_pble_alloc pble_alloc; struct i40iw_mr *iwmr; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b4d8ff8ab807..69716a7ea993 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2416,7 +2416,7 @@ static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); } -static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val, +static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val, bool inner) { if (inner) { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 87b7c1be2a11..2193dc1765fb 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -484,11 +484,6 @@ static int qp_has_rq(struct ib_qp_init_attr *attr) return 1; } -static int first_med_bfreg(void) -{ - return 1; -} - enum { /* this is the first blue flame register in the array of bfregs assigned * to a processes. Since we do not use it for blue flame but rather @@ -514,6 +509,12 @@ static int num_med_bfreg(struct mlx5_ib_dev *dev, return n >= 0 ? n : 0; } +static int first_med_bfreg(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi) +{ + return num_med_bfreg(dev, bfregi) ? 1 : -ENOMEM; +} + static int first_hi_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi) { @@ -541,10 +542,13 @@ static int alloc_high_class_bfreg(struct mlx5_ib_dev *dev, static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi) { - int minidx = first_med_bfreg(); + int minidx = first_med_bfreg(dev, bfregi); int i; - for (i = first_med_bfreg(); i < first_hi_bfreg(dev, bfregi); i++) { + if (minidx < 0) + return minidx; + + for (i = minidx; i < first_hi_bfreg(dev, bfregi); i++) { if (bfregi->count[i] < bfregi->count[minidx]) minidx = i; if (!bfregi->count[minidx]) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 7d3763b2e01c..3f9afc02d166 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -401,49 +401,47 @@ int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { struct qedr_ucontext *ucontext = get_qedr_ucontext(context); struct qedr_dev *dev = get_qedr_dev(context->device); - unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT; - u64 unmapped_db = dev->db_phys_addr; + unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT; unsigned long len = (vma->vm_end - vma->vm_start); - int rc = 0; - bool found; + unsigned long dpi_start; + + dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size); DP_DEBUG(dev, QEDR_MSG_INIT, - "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n", - vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len); - if (vma->vm_start & (PAGE_SIZE - 1)) { - DP_ERR(dev, "Vma_start not page aligned = %ld\n", - vma->vm_start); + "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n", + (void *)vma->vm_start, (void *)vma->vm_end, + (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size); + + if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) { + DP_ERR(dev, + "failed mmap, adrresses must be page aligned: start=0x%pK, end=0x%pK\n", + (void *)vma->vm_start, (void *)vma->vm_end); return -EINVAL; } - found = qedr_search_mmap(ucontext, vm_page, len); - if (!found) { - DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n", + if (!qedr_search_mmap(ucontext, phys_addr, len)) { + DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n", vma->vm_pgoff); return -EINVAL; } - DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n"); - - if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db + - dev->db_size))) { - DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n"); - if (vma->vm_flags & VM_READ) { - DP_ERR(dev, "Trying to map doorbell bar for read\n"); - return -EPERM; - } - - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + if (phys_addr < dpi_start || + ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) { + DP_ERR(dev, + "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n", + (void *)phys_addr, (void *)dpi_start, + ucontext->dpi_size); + return -EINVAL; + } - rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - PAGE_SIZE, vma->vm_page_prot); - } else { - DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n"); - rc = remap_pfn_range(vma, vma->vm_start, - vma->vm_pgoff, len, vma->vm_page_prot); + if (vma->vm_flags & VM_READ) { + DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n"); + return -EINVAL; } - DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc); - return rc; + + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len, + vma->vm_page_prot); } struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev, diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 2cb52fd48cf1..73a00a1c06f6 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -761,7 +761,6 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr, unsigned int mask; unsigned int length = 0; int i; - int must_sched; while (wr) { mask = wr_opcode_mask(wr->opcode, qp); @@ -791,14 +790,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr, wr = wr->next; } - /* - * Must sched in case of GSI QP because ib_send_mad() hold irq lock, - * and the requester call ip_local_out_sk() that takes spin_lock_bh. - */ - must_sched = (qp_type(qp) == IB_QPT_GSI) || - (queue_count(qp->sq.queue) > 1); - - rxe_run_task(&qp->req.task, must_sched); + rxe_run_task(&qp->req.task, 1); if (unlikely(qp->req.state == QP_STATE_ERROR)) rxe_run_task(&qp->comp.task, 1); diff --git a/drivers/infiniband/ulp/srpt/Kconfig b/drivers/infiniband/ulp/srpt/Kconfig index fb8b7182f05e..25bf6955b6d0 100644 --- a/drivers/infiniband/ulp/srpt/Kconfig +++ b/drivers/infiniband/ulp/srpt/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_SRPT tristate "InfiniBand SCSI RDMA Protocol target support" - depends on INFINIBAND && INFINIBAND_ADDR_TRANS && TARGET_CORE + depends on INFINIBAND_ADDR_TRANS && TARGET_CORE ---help--- Support for the SCSI RDMA Protocol (SRP) Target driver. The diff --git a/drivers/isdn/hardware/eicon/diva.c b/drivers/isdn/hardware/eicon/diva.c index 944a7f338099..1b25d8bc153a 100644 --- a/drivers/isdn/hardware/eicon/diva.c +++ b/drivers/isdn/hardware/eicon/diva.c @@ -388,10 +388,10 @@ void divasa_xdi_driver_unload(void) ** Receive and process command from user mode utility */ void *diva_xdi_open_adapter(void *os_handle, const void __user *src, - int length, + int length, void *mptr, divas_xdi_copy_from_user_fn_t cp_fn) { - diva_xdi_um_cfg_cmd_t msg; + diva_xdi_um_cfg_cmd_t *msg = (diva_xdi_um_cfg_cmd_t *)mptr; diva_os_xdi_adapter_t *a = NULL; diva_os_spin_lock_magic_t old_irql; struct list_head *tmp; @@ -401,21 +401,21 @@ void *diva_xdi_open_adapter(void *os_handle, const void __user *src, length, sizeof(diva_xdi_um_cfg_cmd_t))) return NULL; } - if ((*cp_fn) (os_handle, &msg, src, sizeof(msg)) <= 0) { + if ((*cp_fn) (os_handle, msg, src, sizeof(*msg)) <= 0) { DBG_ERR(("A: A(?) open, write error")) return NULL; } diva_os_enter_spin_lock(&adapter_lock, &old_irql, "open_adapter"); list_for_each(tmp, &adapter_queue) { a = list_entry(tmp, diva_os_xdi_adapter_t, link); - if (a->controller == (int)msg.adapter) + if (a->controller == (int)msg->adapter) break; a = NULL; } diva_os_leave_spin_lock(&adapter_lock, &old_irql, "open_adapter"); if (!a) { - DBG_ERR(("A: A(%d) open, adapter not found", msg.adapter)) + DBG_ERR(("A: A(%d) open, adapter not found", msg->adapter)) } return (a); @@ -437,8 +437,10 @@ void diva_xdi_close_adapter(void *adapter, void *os_handle) int diva_xdi_write(void *adapter, void *os_handle, const void __user *src, - int length, divas_xdi_copy_from_user_fn_t cp_fn) + int length, void *mptr, + divas_xdi_copy_from_user_fn_t cp_fn) { + diva_xdi_um_cfg_cmd_t *msg = (diva_xdi_um_cfg_cmd_t *)mptr; diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) adapter; void *data; @@ -459,7 +461,13 @@ diva_xdi_write(void *adapter, void *os_handle, const void __user *src, return (-2); } - length = (*cp_fn) (os_handle, data, src, length); + if (msg) { + *(diva_xdi_um_cfg_cmd_t *)data = *msg; + length = (*cp_fn) (os_handle, (char *)data + sizeof(*msg), + src + sizeof(*msg), length - sizeof(*msg)); + } else { + length = (*cp_fn) (os_handle, data, src, length); + } if (length > 0) { if ((*(a->interface.cmd_proc)) (a, (diva_xdi_um_cfg_cmd_t *) data, length)) { diff --git a/drivers/isdn/hardware/eicon/diva.h b/drivers/isdn/hardware/eicon/diva.h index b067032093a8..1ad76650fbf9 100644 --- a/drivers/isdn/hardware/eicon/diva.h +++ b/drivers/isdn/hardware/eicon/diva.h @@ -20,10 +20,11 @@ int diva_xdi_read(void *adapter, void *os_handle, void __user *dst, int max_length, divas_xdi_copy_to_user_fn_t cp_fn); int diva_xdi_write(void *adapter, void *os_handle, const void __user *src, - int length, divas_xdi_copy_from_user_fn_t cp_fn); + int length, void *msg, + divas_xdi_copy_from_user_fn_t cp_fn); void *diva_xdi_open_adapter(void *os_handle, const void __user *src, - int length, + int length, void *msg, divas_xdi_copy_from_user_fn_t cp_fn); void diva_xdi_close_adapter(void *adapter, void *os_handle); diff --git a/drivers/isdn/hardware/eicon/divasmain.c b/drivers/isdn/hardware/eicon/divasmain.c index b9980e84f9db..b6a3950b2564 100644 --- a/drivers/isdn/hardware/eicon/divasmain.c +++ b/drivers/isdn/hardware/eicon/divasmain.c @@ -591,19 +591,22 @@ static int divas_release(struct inode *inode, struct file *file) static ssize_t divas_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { + diva_xdi_um_cfg_cmd_t msg; int ret = -EINVAL; if (!file->private_data) { file->private_data = diva_xdi_open_adapter(file, buf, - count, + count, &msg, xdi_copy_from_user); - } - if (!file->private_data) { - return (-ENODEV); + if (!file->private_data) + return (-ENODEV); + ret = diva_xdi_write(file->private_data, file, + buf, count, &msg, xdi_copy_from_user); + } else { + ret = diva_xdi_write(file->private_data, file, + buf, count, NULL, xdi_copy_from_user); } - ret = diva_xdi_write(file->private_data, file, - buf, count, xdi_copy_from_user); switch (ret) { case -1: /* Message should be removed from rx mailbox first */ ret = -EBUSY; @@ -622,11 +625,12 @@ static ssize_t divas_write(struct file *file, const char __user *buf, static ssize_t divas_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + diva_xdi_um_cfg_cmd_t msg; int ret = -EINVAL; if (!file->private_data) { file->private_data = diva_xdi_open_adapter(file, buf, - count, + count, &msg, xdi_copy_from_user); } if (!file->private_data) { diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c index 1b52b8557034..2060d1483043 100644 --- a/drivers/mfd/cros_ec_spi.c +++ b/drivers/mfd/cros_ec_spi.c @@ -419,10 +419,25 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev, /* Verify that EC can process command */ for (i = 0; i < len; i++) { rx_byte = rx_buf[i]; + /* + * Seeing the PAST_END, RX_BAD_DATA, or NOT_READY + * markers are all signs that the EC didn't fully + * receive our command. e.g., if the EC is flashing + * itself, it can't respond to any commands and instead + * clocks out EC_SPI_PAST_END from its SPI hardware + * buffer. Similar occurrences can happen if the AP is + * too slow to clock out data after asserting CS -- the + * EC will abort and fill its buffer with + * EC_SPI_RX_BAD_DATA. + * + * In all cases, these errors should be safe to retry. + * Report -EAGAIN and let the caller decide what to do + * about that. + */ if (rx_byte == EC_SPI_PAST_END || rx_byte == EC_SPI_RX_BAD_DATA || rx_byte == EC_SPI_NOT_READY) { - ret = -EREMOTEIO; + ret = -EAGAIN; break; } } @@ -431,7 +446,7 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev, if (!ret) ret = cros_ec_spi_receive_packet(ec_dev, ec_msg->insize + sizeof(*response)); - else + else if (ret != -EAGAIN) dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret); final_ret = terminate_request(ec_dev); @@ -537,10 +552,11 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, /* Verify that EC can process command */ for (i = 0; i < len; i++) { rx_byte = rx_buf[i]; + /* See comments in cros_ec_pkt_xfer_spi() */ if (rx_byte == EC_SPI_PAST_END || rx_byte == EC_SPI_RX_BAD_DATA || rx_byte == EC_SPI_NOT_READY) { - ret = -EREMOTEIO; + ret = -EAGAIN; break; } } @@ -549,7 +565,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, if (!ret) ret = cros_ec_spi_receive_response(ec_dev, ec_msg->insize + EC_MSG_TX_PROTO_BYTES); - else + else if (ret != -EAGAIN) dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret); final_ret = terminate_request(ec_dev); diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 9e923cd1d80e..38a7586b00cc 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2485,7 +2485,7 @@ static long mmc_rpmb_ioctl(struct file *filp, unsigned int cmd, break; } - return 0; + return ret; } #ifdef CONFIG_COMPAT diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c index 0ef741bc515d..d0e83db42ae5 100644 --- a/drivers/mmc/host/sdhci-iproc.c +++ b/drivers/mmc/host/sdhci-iproc.c @@ -33,6 +33,8 @@ struct sdhci_iproc_host { const struct sdhci_iproc_data *data; u32 shadow_cmd; u32 shadow_blk; + bool is_cmd_shadowed; + bool is_blk_shadowed; }; #define REG_OFFSET_IN_BITS(reg) ((reg) << 3 & 0x18) @@ -48,8 +50,22 @@ static inline u32 sdhci_iproc_readl(struct sdhci_host *host, int reg) static u16 sdhci_iproc_readw(struct sdhci_host *host, int reg) { - u32 val = sdhci_iproc_readl(host, (reg & ~3)); - u16 word = val >> REG_OFFSET_IN_BITS(reg) & 0xffff; + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_iproc_host *iproc_host = sdhci_pltfm_priv(pltfm_host); + u32 val; + u16 word; + + if ((reg == SDHCI_TRANSFER_MODE) && iproc_host->is_cmd_shadowed) { + /* Get the saved transfer mode */ + val = iproc_host->shadow_cmd; + } else if ((reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) && + iproc_host->is_blk_shadowed) { + /* Get the saved block info */ + val = iproc_host->shadow_blk; + } else { + val = sdhci_iproc_readl(host, (reg & ~3)); + } + word = val >> REG_OFFSET_IN_BITS(reg) & 0xffff; return word; } @@ -105,13 +121,15 @@ static void sdhci_iproc_writew(struct sdhci_host *host, u16 val, int reg) if (reg == SDHCI_COMMAND) { /* Write the block now as we are issuing a command */ - if (iproc_host->shadow_blk != 0) { + if (iproc_host->is_blk_shadowed) { sdhci_iproc_writel(host, iproc_host->shadow_blk, SDHCI_BLOCK_SIZE); - iproc_host->shadow_blk = 0; + iproc_host->is_blk_shadowed = false; } oldval = iproc_host->shadow_cmd; - } else if (reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) { + iproc_host->is_cmd_shadowed = false; + } else if ((reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) && + iproc_host->is_blk_shadowed) { /* Block size and count are stored in shadow reg */ oldval = iproc_host->shadow_blk; } else { @@ -123,9 +141,11 @@ static void sdhci_iproc_writew(struct sdhci_host *host, u16 val, int reg) if (reg == SDHCI_TRANSFER_MODE) { /* Save the transfer mode until the command is issued */ iproc_host->shadow_cmd = newval; + iproc_host->is_cmd_shadowed = true; } else if (reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) { /* Save the block info until the command is issued */ iproc_host->shadow_blk = newval; + iproc_host->is_blk_shadowed = true; } else { /* Command or other regular 32-bit write */ sdhci_iproc_writel(host, newval, reg & ~3); @@ -166,7 +186,7 @@ static const struct sdhci_ops sdhci_iproc_32only_ops = { static const struct sdhci_pltfm_data sdhci_iproc_cygnus_pltfm_data = { .quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK, - .quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN, + .quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN | SDHCI_QUIRK2_HOST_OFF_CARD_ON, .ops = &sdhci_iproc_32only_ops, }; @@ -206,7 +226,6 @@ static const struct sdhci_iproc_data iproc_data = { .caps1 = SDHCI_DRIVER_TYPE_C | SDHCI_DRIVER_TYPE_D | SDHCI_SUPPORT_DDR50, - .mmc_caps = MMC_CAP_1_8V_DDR, }; static const struct sdhci_pltfm_data sdhci_bcm2835_pltfm_data = { diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index a561705f232c..be198cc0b10c 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -1552,22 +1552,26 @@ pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent) if (!ioaddr) { if (pcnet32_debug & NETIF_MSG_PROBE) pr_err("card has no PCI IO resources, aborting\n"); - return -ENODEV; + err = -ENODEV; + goto err_disable_dev; } err = pci_set_dma_mask(pdev, PCNET32_DMA_MASK); if (err) { if (pcnet32_debug & NETIF_MSG_PROBE) pr_err("architecture does not support 32bit PCI busmaster DMA\n"); - return err; + goto err_disable_dev; } if (!request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_pci")) { if (pcnet32_debug & NETIF_MSG_PROBE) pr_err("io address range already allocated\n"); - return -EBUSY; + err = -EBUSY; + goto err_disable_dev; } err = pcnet32_probe1(ioaddr, 1, pdev); + +err_disable_dev: if (err < 0) pci_disable_device(pdev); diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 81684acf52af..8a8b12b720ef 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2747,11 +2747,11 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_master(pdev); /* Query PCI controller on system for DMA addressing - * limitation for the device. Try 64-bit first, and + * limitation for the device. Try 47-bit first, and * fail to 32-bit. */ - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(47)); if (err) { err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { @@ -2765,10 +2765,10 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_release_regions; } } else { - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(47)); if (err) { dev_err(dev, "Unable to obtain %u-bit DMA " - "for consistent allocations, aborting\n", 64); + "for consistent allocations, aborting\n", 47); goto err_out_release_regions; } using_dac = 1; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index d4604bc8eb5b..9d3eed46830d 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Fast Ethernet Controller (FEC) driver for Motorola MPC8xx. * Copyright (c) 1997 Dan Malek (dmalek@jlc.net) diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index f81439796ac7..43d973215040 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -1,20 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Fast Ethernet Controller (ENET) PTP driver for MX6x. * * Copyright (C) 2012 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 4bb4646a5f92..5ec1185808e5 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -796,9 +796,11 @@ static int ibmvnic_login(struct net_device *netdev) struct ibmvnic_adapter *adapter = netdev_priv(netdev); unsigned long timeout = msecs_to_jiffies(30000); int retry_count = 0; + bool retry; int rc; do { + retry = false; if (retry_count > IBMVNIC_MAX_QUEUES) { netdev_warn(netdev, "Login attempts exceeded\n"); return -1; @@ -822,6 +824,9 @@ static int ibmvnic_login(struct net_device *netdev) retry_count++; release_sub_crqs(adapter, 1); + retry = true; + netdev_dbg(netdev, + "Received partial success, retrying...\n"); adapter->init_done_rc = 0; reinit_completion(&adapter->init_done); send_cap_queries(adapter); @@ -849,7 +854,7 @@ static int ibmvnic_login(struct net_device *netdev) netdev_warn(netdev, "Adapter login failed\n"); return -1; } - } while (adapter->init_done_rc == PARTIALSUCCESS); + } while (retry); /* handle pending MAC address changes after successful login */ if (adapter->mac_change_pending) { @@ -2617,18 +2622,21 @@ static int enable_scrq_irq(struct ibmvnic_adapter *adapter, { struct device *dev = &adapter->vdev->dev; unsigned long rc; - u64 val; if (scrq->hw_irq > 0x100000000ULL) { dev_err(dev, "bad hw_irq = %lx\n", scrq->hw_irq); return 1; } - val = (0xff000000) | scrq->hw_irq; - rc = plpar_hcall_norets(H_EOI, val); - if (rc) - dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", - val, rc); + if (adapter->resetting && + adapter->reset_reason == VNIC_RESET_MOBILITY) { + u64 val = (0xff000000) | scrq->hw_irq; + + rc = plpar_hcall_norets(H_EOI, val); + if (rc) + dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", + val, rc); + } rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, H_ENABLE_VIO_INTERRUPT, scrq->hw_irq, 0, 0); diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index a822f7a56bc5..685337d58276 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -43,12 +43,12 @@ #include "fw.h" /* - * We allocate in as big chunks as we can, up to a maximum of 256 KB - * per chunk. + * We allocate in page size (default 4KB on many archs) chunks to avoid high + * order memory allocations in fragmented/high usage memory situation. */ enum { - MLX4_ICM_ALLOC_SIZE = 1 << 18, - MLX4_TABLE_CHUNK_SIZE = 1 << 18 + MLX4_ICM_ALLOC_SIZE = PAGE_SIZE, + MLX4_TABLE_CHUNK_SIZE = PAGE_SIZE, }; static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk) @@ -398,9 +398,11 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, u64 size; obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size; + if (WARN_ON(!obj_per_chunk)) + return -EINVAL; num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk; - table->icm = kcalloc(num_icm, sizeof(*table->icm), GFP_KERNEL); + table->icm = kvzalloc(num_icm * sizeof(*table->icm), GFP_KERNEL); if (!table->icm) return -ENOMEM; table->virt = virt; @@ -446,7 +448,7 @@ err: mlx4_free_icm(dev, table->icm[i], use_coherent); } - kfree(table->icm); + kvfree(table->icm); return -ENOMEM; } @@ -462,5 +464,5 @@ void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table) mlx4_free_icm(dev, table->icm[i], table->coherent); } - kfree(table->icm); + kvfree(table->icm); } diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index 2edcce98ab2d..65482f004e50 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -172,7 +172,7 @@ int mlx4_do_bond(struct mlx4_dev *dev, bool enable) list_add_tail(&dev_ctx->list, &priv->ctx_list); spin_unlock_irqrestore(&priv->ctx_lock, flags); - mlx4_dbg(dev, "Inrerface for protocol %d restarted with when bonded mode is %s\n", + mlx4_dbg(dev, "Interface for protocol %d restarted with bonded mode %s\n", dev_ctx->intf->protocol, enable ? "enabled" : "disabled"); } diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 3aaf4bad6c5a..427e7a31862c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -393,11 +393,11 @@ struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn) struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; struct mlx4_qp *qp; - spin_lock(&qp_table->lock); + spin_lock_irq(&qp_table->lock); qp = __mlx4_qp_lookup(dev, qpn); - spin_unlock(&qp_table->lock); + spin_unlock_irq(&qp_table->lock); return qp; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 176645762e49..1ff0b0e93804 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -615,6 +615,45 @@ static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth) return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6)); } +static __be32 mlx5e_get_fcs(struct sk_buff *skb) +{ + int last_frag_sz, bytes_in_prev, nr_frags; + u8 *fcs_p1, *fcs_p2; + skb_frag_t *last_frag; + __be32 fcs_bytes; + + if (!skb_is_nonlinear(skb)) + return *(__be32 *)(skb->data + skb->len - ETH_FCS_LEN); + + nr_frags = skb_shinfo(skb)->nr_frags; + last_frag = &skb_shinfo(skb)->frags[nr_frags - 1]; + last_frag_sz = skb_frag_size(last_frag); + + /* If all FCS data is in last frag */ + if (last_frag_sz >= ETH_FCS_LEN) + return *(__be32 *)(skb_frag_address(last_frag) + + last_frag_sz - ETH_FCS_LEN); + + fcs_p2 = (u8 *)skb_frag_address(last_frag); + bytes_in_prev = ETH_FCS_LEN - last_frag_sz; + + /* Find where the other part of the FCS is - Linear or another frag */ + if (nr_frags == 1) { + fcs_p1 = skb_tail_pointer(skb); + } else { + skb_frag_t *prev_frag = &skb_shinfo(skb)->frags[nr_frags - 2]; + + fcs_p1 = skb_frag_address(prev_frag) + + skb_frag_size(prev_frag); + } + fcs_p1 -= bytes_in_prev; + + memcpy(&fcs_bytes, fcs_p1, bytes_in_prev); + memcpy(((u8 *)&fcs_bytes) + bytes_in_prev, fcs_p2, last_frag_sz); + + return fcs_bytes; +} + static inline void mlx5e_handle_csum(struct net_device *netdev, struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, @@ -643,6 +682,9 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, skb->csum = csum_partial(skb->data + ETH_HLEN, network_depth - ETH_HLEN, skb->csum); + if (unlikely(netdev->features & NETIF_F_RXFCS)) + skb->csum = csum_add(skb->csum, + (__force __wsum)mlx5e_get_fcs(skb)); rq->stats.csum_complete++; return; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index 0f5da499a223..fad8c2e3804e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -237,19 +237,17 @@ static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev, context->buf.sg[0].data = &context->command; spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags); - list_add_tail(&context->list, &fdev->ipsec->pending_cmds); + res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf); + if (!res) + list_add_tail(&context->list, &fdev->ipsec->pending_cmds); spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags); - res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf); if (res) { - mlx5_fpga_warn(fdev, "Failure sending IPSec command: %d\n", - res); - spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags); - list_del(&context->list); - spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags); + mlx5_fpga_warn(fdev, "Failed to send IPSec command: %d\n", res); kfree(context); return ERR_PTR(res); } + /* Context will be freed by wait func after completion */ return context; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 00f41c145d4d..820b226d6ff8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -77,7 +77,7 @@ #define ILT_CFG_REG(cli, reg) PSWRQ2_REG_ ## cli ## _ ## reg ## _RT_OFFSET /* ILT entry structure */ -#define ILT_ENTRY_PHY_ADDR_MASK 0x000FFFFFFFFFFFULL +#define ILT_ENTRY_PHY_ADDR_MASK (~0ULL >> 12) #define ILT_ENTRY_PHY_ADDR_SHIFT 0 #define ILT_ENTRY_VALID_MASK 0x1ULL #define ILT_ENTRY_VALID_SHIFT 52 diff --git a/drivers/net/phy/bcm-cygnus.c b/drivers/net/phy/bcm-cygnus.c index 6838129839ca..e757b09f1889 100644 --- a/drivers/net/phy/bcm-cygnus.c +++ b/drivers/net/phy/bcm-cygnus.c @@ -61,17 +61,17 @@ static int bcm_cygnus_afe_config(struct phy_device *phydev) return rc; /* make rcal=100, since rdb default is 000 */ - rc = bcm_phy_write_exp(phydev, MII_BRCM_CORE_EXPB1, 0x10); + rc = bcm_phy_write_exp_sel(phydev, MII_BRCM_CORE_EXPB1, 0x10); if (rc < 0) return rc; /* CORE_EXPB0, Reset R_CAL/RC_CAL Engine */ - rc = bcm_phy_write_exp(phydev, MII_BRCM_CORE_EXPB0, 0x10); + rc = bcm_phy_write_exp_sel(phydev, MII_BRCM_CORE_EXPB0, 0x10); if (rc < 0) return rc; /* CORE_EXPB0, Disable Reset R_CAL/RC_CAL Engine */ - rc = bcm_phy_write_exp(phydev, MII_BRCM_CORE_EXPB0, 0x00); + rc = bcm_phy_write_exp_sel(phydev, MII_BRCM_CORE_EXPB0, 0x00); return 0; } diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c index 5ad130c3da43..d5e0833d69b9 100644 --- a/drivers/net/phy/bcm-phy-lib.c +++ b/drivers/net/phy/bcm-phy-lib.c @@ -56,7 +56,7 @@ int bcm54xx_auxctl_read(struct phy_device *phydev, u16 regnum) /* The register must be written to both the Shadow Register Select and * the Shadow Read Register Selector */ - phy_write(phydev, MII_BCM54XX_AUX_CTL, regnum | + phy_write(phydev, MII_BCM54XX_AUX_CTL, MII_BCM54XX_AUXCTL_SHDWSEL_MASK | regnum << MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT); return phy_read(phydev, MII_BCM54XX_AUX_CTL); } diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h index 7c73808cbbde..81cceaa412fe 100644 --- a/drivers/net/phy/bcm-phy-lib.h +++ b/drivers/net/phy/bcm-phy-lib.h @@ -14,11 +14,18 @@ #ifndef _LINUX_BCM_PHY_LIB_H #define _LINUX_BCM_PHY_LIB_H +#include <linux/brcmphy.h> #include <linux/phy.h> int bcm_phy_write_exp(struct phy_device *phydev, u16 reg, u16 val); int bcm_phy_read_exp(struct phy_device *phydev, u16 reg); +static inline int bcm_phy_write_exp_sel(struct phy_device *phydev, + u16 reg, u16 val) +{ + return bcm_phy_write_exp(phydev, reg | MII_BCM54XX_EXP_SEL_ER, val); +} + int bcm54xx_auxctl_write(struct phy_device *phydev, u16 regnum, u16 val); int bcm54xx_auxctl_read(struct phy_device *phydev, u16 regnum); diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 29b1c88b55cc..01d2ff2f6241 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -65,10 +65,10 @@ struct bcm7xxx_phy_priv { static void r_rc_cal_reset(struct phy_device *phydev) { /* Reset R_CAL/RC_CAL Engine */ - bcm_phy_write_exp(phydev, 0x00b0, 0x0010); + bcm_phy_write_exp_sel(phydev, 0x00b0, 0x0010); /* Disable Reset R_AL/RC_CAL Engine */ - bcm_phy_write_exp(phydev, 0x00b0, 0x0000); + bcm_phy_write_exp_sel(phydev, 0x00b0, 0x0000); } static int bcm7xxx_28nm_b0_afe_config_init(struct phy_device *phydev) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index dc7c7ec43202..02ad03a2fab7 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -605,30 +605,13 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (cmd == PPPIOCDETACH) { /* - * We have to be careful here... if the file descriptor - * has been dup'd, we could have another process in the - * middle of a poll using the same file *, so we had - * better not free the interface data structures - - * instead we fail the ioctl. Even in this case, we - * shut down the interface if we are the owner of it. - * Actually, we should get rid of PPPIOCDETACH, userland - * (i.e. pppd) could achieve the same effect by closing - * this fd and reopening /dev/ppp. + * PPPIOCDETACH is no longer supported as it was heavily broken, + * and is only known to have been used by pppd older than + * ppp-2.4.2 (released November 2003). */ + pr_warn_once("%s (%d) used obsolete PPPIOCDETACH ioctl\n", + current->comm, current->pid); err = -EINVAL; - if (pf->kind == INTERFACE) { - ppp = PF_TO_PPP(pf); - rtnl_lock(); - if (file == ppp->owner) - unregister_netdevice(ppp->dev); - rtnl_unlock(); - } - if (atomic_long_read(&file->f_count) < 2) { - ppp_release(NULL, file); - err = 0; - } else - pr_warn("PPPIOCDETACH file->f_count=%ld\n", - atomic_long_read(&file->f_count)); goto out; } diff --git a/drivers/net/tun.c b/drivers/net/tun.c index d45ac37e1287..45d807796a18 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1423,6 +1423,13 @@ static void tun_net_init(struct net_device *dev) dev->max_mtu = MAX_MTU - dev->hard_header_len; } +static bool tun_sock_writeable(struct tun_struct *tun, struct tun_file *tfile) +{ + struct sock *sk = tfile->socket.sk; + + return (tun->dev->flags & IFF_UP) && sock_writeable(sk); +} + /* Character device part */ /* Poll */ @@ -1445,10 +1452,14 @@ static __poll_t tun_chr_poll(struct file *file, poll_table *wait) if (!ptr_ring_empty(&tfile->tx_ring)) mask |= EPOLLIN | EPOLLRDNORM; - if (tun->dev->flags & IFF_UP && - (sock_writeable(sk) || - (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && - sock_writeable(sk)))) + /* Make sure SOCKWQ_ASYNC_NOSPACE is set if not writable to + * guarantee EPOLLOUT to be raised by either here or + * tun_sock_write_space(). Then process could get notification + * after it writes to a down device and meets -EIO. + */ + if (tun_sock_writeable(tun, tfile) || + (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && + tun_sock_writeable(tun, tfile))) mask |= EPOLLOUT | EPOLLWRNORM; if (tun->dev->reg_state != NETREG_REGISTERED) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 770422e953f7..032e1ac10a30 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -707,6 +707,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, void *data; u32 act; + /* Transient failure which in theory could occur if + * in-flight packets from before XDP was enabled reach + * the receive path after XDP is loaded. + */ + if (unlikely(hdr->hdr.gso_type)) + goto err_xdp; + /* This happens when rx buffer size is underestimated * or headroom is not enough because of the buffer * was refilled before XDP is set. This should only @@ -727,14 +734,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, xdp_page = page; } - /* Transient failure which in theory could occur if - * in-flight packets from before XDP was enabled reach - * the receive path after XDP is loaded. In practice I - * was not able to create this condition. - */ - if (unlikely(hdr->hdr.gso_type)) - goto err_xdp; - /* Allow consuming headroom but reserve enough space to push * the descriptor on if we get an XDP_TX return code. */ @@ -775,7 +774,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } *xdp_xmit = true; if (unlikely(xdp_page != page)) - goto err_xdp; + put_page(page); rcu_read_unlock(); goto xdp_xmit; case XDP_REDIRECT: @@ -787,7 +786,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } *xdp_xmit = true; if (unlikely(xdp_page != page)) - goto err_xdp; + put_page(page); rcu_read_unlock(); goto xdp_xmit; default: @@ -875,7 +874,7 @@ err_xdp: rcu_read_unlock(); err_skb: put_page(page); - while (--num_buf) { + while (num_buf-- > 1) { buf = virtqueue_get_buf(rq->vq, &len); if (unlikely(!buf)) { pr_debug("%s: rx error: %d buffers missing\n", diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 4a017a0d71ea..920c23e542a5 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3340,7 +3340,7 @@ out_err: static int hwsim_dump_radio_nl(struct sk_buff *skb, struct netlink_callback *cb) { - int last_idx = cb->args[0]; + int last_idx = cb->args[0] - 1; struct mac80211_hwsim_data *data = NULL; int res = 0; void *hdr; @@ -3368,7 +3368,7 @@ static int hwsim_dump_radio_nl(struct sk_buff *skb, last_idx = data->idx; } - cb->args[0] = last_idx; + cb->args[0] = last_idx + 1; /* list changed, but no new element sent, set interrupted flag */ if (skb->len == 0 && cb->prev_seq && cb->seq != cb->prev_seq) { diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 88a8b5916624..dbb7464c018c 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -27,7 +27,7 @@ config NVME_FABRICS config NVME_RDMA tristate "NVM Express over Fabrics RDMA host driver" - depends on INFINIBAND && INFINIBAND_ADDR_TRANS && BLOCK + depends on INFINIBAND_ADDR_TRANS && BLOCK select NVME_CORE select NVME_FABRICS select SG_POOL diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index 3c7b61ddb0d1..7595664ee753 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -27,7 +27,7 @@ config NVME_TARGET_LOOP config NVME_TARGET_RDMA tristate "NVMe over Fabrics RDMA target support" - depends on INFINIBAND && INFINIBAND_ADDR_TRANS + depends on INFINIBAND_ADDR_TRANS depends on NVME_TARGET select SGL_ALLOC help diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index e7bbdf947bbc..8350ca2311c7 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -91,6 +91,8 @@ static int send_command(struct cros_ec_device *ec_dev, usleep_range(10000, 11000); ret = (*xfer_fxn)(ec_dev, status_msg); + if (ret == -EAGAIN) + continue; if (ret < 0) break; diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index a8b831000b2d..18c4f933e8b9 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -4,7 +4,7 @@ * * Debug traces for zfcp. * - * Copyright IBM Corp. 2002, 2017 + * Copyright IBM Corp. 2002, 2018 */ #define KMSG_COMPONENT "zfcp" @@ -308,6 +308,27 @@ void zfcp_dbf_rec_trig(char *tag, struct zfcp_adapter *adapter, spin_unlock_irqrestore(&dbf->rec_lock, flags); } +/** + * zfcp_dbf_rec_trig_lock - trace event related to triggered recovery with lock + * @tag: identifier for event + * @adapter: adapter on which the erp_action should run + * @port: remote port involved in the erp_action + * @sdev: scsi device involved in the erp_action + * @want: wanted erp_action + * @need: required erp_action + * + * The adapter->erp_lock must not be held. + */ +void zfcp_dbf_rec_trig_lock(char *tag, struct zfcp_adapter *adapter, + struct zfcp_port *port, struct scsi_device *sdev, + u8 want, u8 need) +{ + unsigned long flags; + + read_lock_irqsave(&adapter->erp_lock, flags); + zfcp_dbf_rec_trig(tag, adapter, port, sdev, want, need); + read_unlock_irqrestore(&adapter->erp_lock, flags); +} /** * zfcp_dbf_rec_run_lvl - trace event related to running recovery diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index bf8ea4df2bb8..e5eed8aac0ce 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -4,7 +4,7 @@ * * External function declarations. * - * Copyright IBM Corp. 2002, 2016 + * Copyright IBM Corp. 2002, 2018 */ #ifndef ZFCP_EXT_H @@ -35,6 +35,9 @@ extern int zfcp_dbf_adapter_register(struct zfcp_adapter *); extern void zfcp_dbf_adapter_unregister(struct zfcp_adapter *); extern void zfcp_dbf_rec_trig(char *, struct zfcp_adapter *, struct zfcp_port *, struct scsi_device *, u8, u8); +extern void zfcp_dbf_rec_trig_lock(char *tag, struct zfcp_adapter *adapter, + struct zfcp_port *port, + struct scsi_device *sdev, u8 want, u8 need); extern void zfcp_dbf_rec_run(char *, struct zfcp_erp_action *); extern void zfcp_dbf_rec_run_lvl(int level, char *tag, struct zfcp_erp_action *erp); diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 4d2ba5682493..22f9562f415c 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -4,7 +4,7 @@ * * Interface to Linux SCSI midlayer. * - * Copyright IBM Corp. 2002, 2017 + * Copyright IBM Corp. 2002, 2018 */ #define KMSG_COMPONENT "zfcp" @@ -618,9 +618,9 @@ static void zfcp_scsi_rport_register(struct zfcp_port *port) ids.port_id = port->d_id; ids.roles = FC_RPORT_ROLE_FCP_TARGET; - zfcp_dbf_rec_trig("scpaddy", port->adapter, port, NULL, - ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD, - ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD); + zfcp_dbf_rec_trig_lock("scpaddy", port->adapter, port, NULL, + ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD, + ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD); rport = fc_remote_port_add(port->adapter->scsi_host, 0, &ids); if (!rport) { dev_err(&port->adapter->ccw_device->dev, @@ -642,9 +642,9 @@ static void zfcp_scsi_rport_block(struct zfcp_port *port) struct fc_rport *rport = port->rport; if (rport) { - zfcp_dbf_rec_trig("scpdely", port->adapter, port, NULL, - ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL, - ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL); + zfcp_dbf_rec_trig_lock("scpdely", port->adapter, port, NULL, + ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL, + ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL); fc_remote_port_delete(rport); port->rport = NULL; } diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index e29f9b8fd66d..56c940394729 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -182,7 +182,7 @@ zalon7xx-objs := zalon.o ncr53c8xx.o NCR_Q720_mod-objs := NCR_Q720.o ncr53c8xx.o # Files generated that shall be removed upon make clean -clean-files := 53c700_d.h 53c700_u.h +clean-files := 53c700_d.h 53c700_u.h scsi_devinfo_tbl.c $(obj)/53c700.o $(MODVERDIR)/$(obj)/53c700.ver: $(obj)/53c700_d.h diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index c198b96368dd..5c40d809830f 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1894,7 +1894,7 @@ retry: num = (rem_sz > scatter_elem_sz_prev) ? scatter_elem_sz_prev : rem_sz; - schp->pages[k] = alloc_pages(gfp_mask, order); + schp->pages[k] = alloc_pages(gfp_mask | __GFP_ZERO, order); if (!schp->pages[k]) goto out; diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c index 2a21f2d48592..35fab1e18adc 100644 --- a/drivers/scsi/sr_ioctl.c +++ b/drivers/scsi/sr_ioctl.c @@ -188,9 +188,13 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc) struct scsi_device *SDev; struct scsi_sense_hdr sshdr; int result, err = 0, retries = 0; + unsigned char sense_buffer[SCSI_SENSE_BUFFERSIZE], *senseptr = NULL; SDev = cd->device; + if (cgc->sense) + senseptr = sense_buffer; + retry: if (!scsi_block_when_processing_errors(SDev)) { err = -ENODEV; @@ -198,10 +202,12 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc) } result = scsi_execute(SDev, cgc->cmd, cgc->data_direction, - cgc->buffer, cgc->buflen, - (unsigned char *)cgc->sense, &sshdr, + cgc->buffer, cgc->buflen, senseptr, &sshdr, cgc->timeout, IOCTL_RETRIES, 0, 0, NULL); + if (cgc->sense) + memcpy(cgc->sense, sense_buffer, sizeof(*cgc->sense)); + /* Minimal error checking. Ignore cases we know about, and report the rest. */ if (driver_byte(result) != 0) { switch (sshdr.sense_key) { diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig index 9371651d8017..c574dd210500 100644 --- a/drivers/ssb/Kconfig +++ b/drivers/ssb/Kconfig @@ -117,7 +117,7 @@ config SSB_SERIAL config SSB_DRIVER_PCICORE_POSSIBLE bool - depends on SSB_PCIHOST && SSB = y + depends on SSB_PCIHOST default y config SSB_DRIVER_PCICORE @@ -131,7 +131,7 @@ config SSB_DRIVER_PCICORE config SSB_PCICORE_HOSTMODE bool "Hostmode support for SSB PCI core" - depends on SSB_DRIVER_PCICORE && SSB_DRIVER_MIPS + depends on SSB_DRIVER_PCICORE && SSB_DRIVER_MIPS && SSB = y help PCIcore hostmode operation (external PCI bus). diff --git a/drivers/staging/lustre/lnet/Kconfig b/drivers/staging/lustre/lnet/Kconfig index ad049e6f24e4..f3b1ad4bd3dc 100644 --- a/drivers/staging/lustre/lnet/Kconfig +++ b/drivers/staging/lustre/lnet/Kconfig @@ -34,7 +34,7 @@ config LNET_SELFTEST config LNET_XPRT_IB tristate "LNET infiniband support" - depends on LNET && PCI && INFINIBAND && INFINIBAND_ADDR_TRANS + depends on LNET && PCI && INFINIBAND_ADDR_TRANS default LNET && INFINIBAND help This option allows the LNET users to use infiniband as an diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 4ad89ea71a70..4f26bdc3d1dc 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -2121,6 +2121,8 @@ static ssize_t tcmu_qfull_time_out_store(struct config_item *item, if (val >= 0) { udev->qfull_time_out = val * MSEC_PER_SEC; + } else if (val == -1) { + udev->qfull_time_out = val; } else { printk(KERN_ERR "Invalid qfull timeout value %d\n", val); return -EINVAL; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index f3bd8e941224..f0be5f35ab28 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -981,6 +981,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev, { int ret = 0; + mutex_lock(&dev->mutex); vhost_dev_lock_vqs(dev); switch (msg->type) { case VHOST_IOTLB_UPDATE: @@ -1016,6 +1017,8 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev, } vhost_dev_unlock_vqs(dev); + mutex_unlock(&dev->mutex); + return ret; } ssize_t vhost_chr_write_iter(struct vhost_dev *dev, diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index e1c60899fdbc..a6f9ba85dc4b 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -351,7 +351,7 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, * physical address */ phys = xen_bus_to_phys(dev_addr); - if (((dev_addr + size - 1 > dma_mask)) || + if (((dev_addr + size - 1 <= dma_mask)) || range_straddles_page_boundary(phys, size)) xen_destroy_contiguous_region(phys, order); diff --git a/fs/affs/namei.c b/fs/affs/namei.c index d8aa0ae3d037..41c5749f4db7 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -201,14 +201,16 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) struct super_block *sb = dir->i_sb; struct buffer_head *bh; struct inode *inode = NULL; + struct dentry *res; pr_debug("%s(\"%pd\")\n", __func__, dentry); affs_lock_dir(dir); bh = affs_find_entry(dir, dentry); - affs_unlock_dir(dir); - if (IS_ERR(bh)) + if (IS_ERR(bh)) { + affs_unlock_dir(dir); return ERR_CAST(bh); + } if (bh) { u32 ino = bh->b_blocknr; @@ -222,11 +224,12 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) } affs_brelse(bh); inode = affs_iget(sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); } - d_add(dentry, inode); - return NULL; + res = d_splice_alias(inode, dentry); + if (!IS_ERR_OR_NULL(res)) + res->d_fsdata = dentry->d_fsdata; + affs_unlock_dir(dir); + return res; } int @@ -1078,8 +1078,8 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) ctx = rcu_dereference(table->table[id]); if (ctx && ctx->user_id == ctx_id) { - percpu_ref_get(&ctx->users); - ret = ctx; + if (percpu_ref_tryget_live(&ctx->users)) + ret = ctx; } out: rcu_read_unlock(); diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index af2832aaeec5..4700b4534439 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -198,23 +198,16 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) if (ret == BEFS_BT_NOT_FOUND) { befs_debug(sb, "<--- %s %pd not found", __func__, dentry); - d_add(dentry, NULL); - return ERR_PTR(-ENOENT); - + inode = NULL; } else if (ret != BEFS_OK || offset == 0) { befs_error(sb, "<--- %s Error", __func__); - return ERR_PTR(-ENODATA); + inode = ERR_PTR(-ENODATA); + } else { + inode = befs_iget(dir->i_sb, (ino_t) offset); } - - inode = befs_iget(dir->i_sb, (ino_t) offset); - if (IS_ERR(inode)) - return ERR_CAST(inode); - - d_add(dentry, inode); - befs_debug(sb, "<--- %s", __func__); - return NULL; + return d_splice_alias(inode, dentry); } static int diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8e604e7071f1..0b86cf10cf2a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6586,8 +6586,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, goto out_unlock_inode; } else { btrfs_update_inode(trans, root, inode); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); } out_unlock: @@ -6663,8 +6662,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock_inode; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); out_unlock: btrfs_end_transaction(trans); @@ -6809,12 +6807,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (err) goto out_fail_inode; - d_instantiate(dentry, inode); - /* - * mkdir is special. We're unlocking after we call d_instantiate - * to avoid a race with nfsd calling d_instantiate. - */ - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); drop_on_err = 0; out_fail: @@ -9124,7 +9117,8 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback) BTRFS_EXTENT_DATA_KEY); trans->block_rsv = &fs_info->trans_block_rsv; if (ret != -ENOSPC && ret != -EAGAIN) { - err = ret; + if (ret < 0) + err = ret; break; } @@ -10257,8 +10251,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, goto out_unlock_inode; } - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); out_unlock: btrfs_end_transaction(trans); diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 0daa1e3fe0df..ab0bbe93b398 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -572,6 +572,11 @@ lookup_again: if (ret < 0) goto create_error; + if (unlikely(d_unhashed(next))) { + dput(next); + inode_unlock(d_inode(dir)); + goto lookup_again; + } ASSERT(d_backing_inode(next)); _debug("mkdir -> %p{%p{ino=%lu}}", @@ -764,6 +769,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, /* search the current directory for the element name */ inode_lock(d_inode(dir)); +retry: start = jiffies; subdir = lookup_one_len(dirname, dir, strlen(dirname)); cachefiles_hist(cachefiles_lookup_histogram, start); @@ -793,6 +799,10 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, if (ret < 0) goto mkdir_error; + if (unlikely(d_unhashed(subdir))) { + dput(subdir); + goto retry; + } ASSERT(d_backing_inode(subdir)); _debug("mkdir -> %p{%p{ino=%lu}}", diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 5f132d59dfc2..d61e2de8d0eb 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -197,7 +197,7 @@ config CIFS_SMB311 config CIFS_SMB_DIRECT bool "SMB Direct support (Experimental)" - depends on CIFS=m && INFINIBAND && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y + depends on CIFS=m && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND_ADDR_TRANS=y help Enables SMB Direct experimental support for SMB 3.0, 3.02 and 3.1.1. SMB Direct allows transferring SMB packets over RDMA. If unsure, diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 017b0ab19bc4..124b093d14e5 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -492,7 +492,7 @@ static void cramfs_kill_sb(struct super_block *sb) { struct cramfs_sb_info *sbi = CRAMFS_SB(sb); - if (IS_ENABLED(CCONFIG_CRAMFS_MTD) && sb->s_mtd) { + if (IS_ENABLED(CONFIG_CRAMFS_MTD) && sb->s_mtd) { if (sbi && sbi->mtd_point_size) mtd_unpoint(sb->s_mtd, 0, sbi->mtd_point_size); kill_mtd_super(sb); diff --git a/fs/dcache.c b/fs/dcache.c index 86d2de63461e..2acfc69878f5 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1899,6 +1899,28 @@ void d_instantiate(struct dentry *entry, struct inode * inode) } EXPORT_SYMBOL(d_instantiate); +/* + * This should be equivalent to d_instantiate() + unlock_new_inode(), + * with lockdep-related part of unlock_new_inode() done before + * anything else. Use that instead of open-coding d_instantiate()/ + * unlock_new_inode() combinations. + */ +void d_instantiate_new(struct dentry *entry, struct inode *inode) +{ + BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); + BUG_ON(!inode); + lockdep_annotate_inode_mutex_key(inode); + security_d_instantiate(entry, inode); + spin_lock(&inode->i_lock); + __d_instantiate(entry, inode); + WARN_ON(!(inode->i_state & I_NEW)); + inode->i_state &= ~I_NEW; + smp_mb(); + wake_up_bit(&inode->i_state, __I_NEW); + spin_unlock(&inode->i_lock); +} +EXPORT_SYMBOL(d_instantiate_new); + /** * d_instantiate_no_diralias - instantiate a non-aliased dentry * @entry: dentry to complete diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 97d17eaeba07..49121e5a8de2 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -283,8 +283,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, iget_failed(ecryptfs_inode); goto out; } - unlock_new_inode(ecryptfs_inode); - d_instantiate(ecryptfs_dentry, ecryptfs_inode); + d_instantiate_new(ecryptfs_dentry, ecryptfs_inode); out: return rc; } diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 1e01fabef130..71635909df3b 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1264,21 +1264,11 @@ do_indirects: static void ext2_truncate_blocks(struct inode *inode, loff_t offset) { - /* - * XXX: it seems like a bug here that we don't allow - * IS_APPEND inode to have blocks-past-i_size trimmed off. - * review and fix this. - * - * Also would be nice to be able to handle IO errors and such, - * but that's probably too much to ask. - */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) return; if (ext2_inode_is_fast_symlink(inode)) return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; dax_sem_down_write(EXT2_I(inode)); __ext2_truncate_blocks(inode, offset); diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 55f7caadb093..152453a91877 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -41,8 +41,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) { int err = ext2_add_link(dentry, inode); if (!err) { - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; } inode_dec_link_count(inode); @@ -255,8 +254,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) if (err) goto out_fail; - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); out: return err; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index b1f21e3a0763..4a09063ce1d2 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2411,8 +2411,7 @@ static int ext4_add_nondir(handle_t *handle, int err = ext4_add_entry(handle, dentry, inode); if (!err) { ext4_mark_inode_dirty(handle, inode); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; } drop_nlink(inode); @@ -2651,8 +2650,7 @@ out_clear_inode: err = ext4_mark_inode_dirty(handle, dir); if (err) goto out_clear_inode; - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index d5098efe577c..75e37fd720b2 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -294,8 +294,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, alloc_nid_done(sbi, ino); - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); @@ -597,8 +596,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, err = page_symlink(inode, disk_link.name, disk_link.len); err_out: - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); /* * Let's flush symlink data in order to avoid broken symlink as much as @@ -661,8 +659,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) alloc_nid_done(sbi, inode->i_ino); - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); @@ -713,8 +710,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, alloc_nid_done(sbi, inode->i_ino); - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 0a754f38462e..e5a6deb38e1e 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -209,8 +209,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, __func__, inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->pino_nlink, inode->i_mapping->nrpages); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; fail: @@ -430,8 +429,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char mutex_unlock(&dir_f->sem); jffs2_complete_reservation(c); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; fail: @@ -575,8 +573,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode mutex_unlock(&dir_f->sem); jffs2_complete_reservation(c); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; fail: @@ -747,8 +744,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode mutex_unlock(&dir_f->sem); jffs2_complete_reservation(c); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; fail: diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index b41596d71858..56c3fcbfe80e 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -178,8 +178,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode, unlock_new_inode(ip); iput(ip); } else { - unlock_new_inode(ip); - d_instantiate(dentry, ip); + d_instantiate_new(dentry, ip); } out2: @@ -313,8 +312,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode) unlock_new_inode(ip); iput(ip); } else { - unlock_new_inode(ip); - d_instantiate(dentry, ip); + d_instantiate_new(dentry, ip); } out2: @@ -1059,8 +1057,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, unlock_new_inode(ip); iput(ip); } else { - unlock_new_inode(ip); - d_instantiate(dentry, ip); + d_instantiate_new(dentry, ip); } out2: @@ -1447,8 +1444,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, unlock_new_inode(ip); iput(ip); } else { - unlock_new_inode(ip); - d_instantiate(dentry, ip); + d_instantiate_new(dentry, ip); } out1: diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 26dd9a50f383..ff2716f9322e 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -316,6 +316,7 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, info->root = root; info->ns = ns; + INIT_LIST_HEAD(&info->node); sb = sget_userns(fs_type, kernfs_test_super, kernfs_set_super, flags, &init_user_ns, info); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 2410b093a2e6..b0555d7d8200 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1201,6 +1201,28 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, break; case S_IFDIR: host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); + if (!host_err && unlikely(d_unhashed(dchild))) { + struct dentry *d; + d = lookup_one_len(dchild->d_name.name, + dchild->d_parent, + dchild->d_name.len); + if (IS_ERR(d)) { + host_err = PTR_ERR(d); + break; + } + if (unlikely(d_is_negative(d))) { + dput(d); + err = nfserr_serverfault; + goto out; + } + dput(resfhp->fh_dentry); + resfhp->fh_dentry = dget(d); + err = fh_update(resfhp); + dput(dchild); + dchild = d; + if (err) + goto out; + } break; case S_IFCHR: case S_IFBLK: diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 1a2894aa0194..dd52d3f82e8d 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -46,8 +46,7 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode) int err = nilfs_add_link(dentry, inode); if (!err) { - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); return 0; } inode_dec_link_count(inode); @@ -243,8 +242,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out_fail; nilfs_mark_inode_dirty(inode); - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); out: if (!err) err = nilfs_transaction_commit(dir->i_sb); diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 91a8889abf9b..ea8c551bcd7e 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -570,16 +570,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg, current_page, vec_len, vec_start); len = bio_add_page(bio, page, vec_len, vec_start); - if (len != vec_len) { - mlog(ML_ERROR, "Adding page[%d] to bio failed, " - "page %p, len %d, vec_len %u, vec_start %u, " - "bi_sector %llu\n", current_page, page, len, - vec_len, vec_start, - (unsigned long long)bio->bi_iter.bi_sector); - bio_put(bio); - bio = ERR_PTR(-EIO); - return bio; - } + if (len != vec_len) break; cs += vec_len / (PAGE_SIZE/spp); vec_start = 0; diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index 6e3134e6d98a..1b5707c44c3f 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -75,8 +75,7 @@ static int orangefs_create(struct inode *dir, get_khandle_from_ino(inode), dentry); - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); orangefs_set_timeout(dentry); ORANGEFS_I(inode)->getattr_time = jiffies - 1; ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS; @@ -332,8 +331,7 @@ static int orangefs_symlink(struct inode *dir, "Assigned symlink inode new number of %pU\n", get_khandle_from_ino(inode)); - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); orangefs_set_timeout(dentry); ORANGEFS_I(inode)->getattr_time = jiffies - 1; ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS; @@ -402,8 +400,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode "Assigned dir inode new number of %pU\n", get_khandle_from_ino(inode)); - d_instantiate(dentry, inode); - unlock_new_inode(inode); + d_instantiate_new(dentry, inode); orangefs_set_timeout(dentry); ORANGEFS_I(inode)->getattr_time = jiffies - 1; ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS; diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index bd39a998843d..5089dac02660 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -687,8 +687,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod reiserfs_update_inode_transaction(inode); reiserfs_update_inode_transaction(dir); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); retval = journal_end(&th); out_failed: @@ -771,8 +770,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode goto out_failed; } - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); retval = journal_end(&th); out_failed: @@ -871,8 +869,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode /* the above add_entry did not update dir's stat data */ reiserfs_update_sd(&th, dir); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); retval = journal_end(&th); out_failed: reiserfs_write_unlock(dir->i_sb); @@ -1187,8 +1184,7 @@ static int reiserfs_symlink(struct inode *parent_dir, goto out_failed; } - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); retval = journal_end(&th); out_failed: reiserfs_write_unlock(parent_dir->i_sb); diff --git a/fs/seq_file.c b/fs/seq_file.c index c6c27f1f9c98..4cc090b50cc5 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -709,11 +709,6 @@ void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter, if (m->count + width >= m->size) goto overflow; - if (num < 10) { - m->buf[m->count++] = num + '0'; - return; - } - len = num_to_str(m->buf + m->count, m->size - m->count, num, width); if (!len) goto overflow; diff --git a/fs/super.c b/fs/super.c index 122c402049a2..4b5b562176d0 100644 --- a/fs/super.c +++ b/fs/super.c @@ -121,13 +121,23 @@ static unsigned long super_cache_count(struct shrinker *shrink, sb = container_of(shrink, struct super_block, s_shrink); /* - * Don't call trylock_super as it is a potential - * scalability bottleneck. The counts could get updated - * between super_cache_count and super_cache_scan anyway. - * Call to super_cache_count with shrinker_rwsem held - * ensures the safety of call to list_lru_shrink_count() and - * s_op->nr_cached_objects(). + * We don't call trylock_super() here as it is a scalability bottleneck, + * so we're exposed to partial setup state. The shrinker rwsem does not + * protect filesystem operations backing list_lru_shrink_count() or + * s_op->nr_cached_objects(). Counts can change between + * super_cache_count and super_cache_scan, so we really don't need locks + * here. + * + * However, if we are currently mounting the superblock, the underlying + * filesystem might be in a state of partial construction and hence it + * is dangerous to access it. trylock_super() uses a SB_BORN check to + * avoid this situation, so do the same here. The memory barrier is + * matched with the one in mount_fs() as we don't hold locks here. */ + if (!(sb->s_flags & SB_BORN)) + return 0; + smp_rmb(); + if (sb->s_op && sb->s_op->nr_cached_objects) total_objects = sb->s_op->nr_cached_objects(sb, sc); @@ -1272,6 +1282,14 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) sb = root->d_sb; BUG_ON(!sb); WARN_ON(!sb->s_bdi); + + /* + * Write barrier is for super_cache_count(). We place it before setting + * SB_BORN as the data dependency between the two functions is the + * superblock structure contents that we just set up, not the SB_BORN + * flag. + */ + smp_wmb(); sb->s_flags |= SB_BORN; error = security_sb_kern_mount(sb, flags, secdata); diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index b428d317ae92..92682fcc41f6 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -25,7 +25,7 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, { struct dentry *root; void *ns; - bool new_sb; + bool new_sb = false; if (!(flags & SB_KERNMOUNT)) { if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) @@ -35,9 +35,9 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); root = kernfs_mount_ns(fs_type, flags, sysfs_root, SYSFS_MAGIC, &new_sb, ns); - if (IS_ERR(root) || !new_sb) + if (!new_sb) kobj_ns_drop(KOBJ_NS_TYPE_NET, ns); - else if (new_sb) + else if (!IS_ERR(root)) root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE; return root; diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 0458dd47e105..c586026508db 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -622,8 +622,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode) if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; } @@ -733,8 +732,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inc_nlink(dir); dir->i_ctime = dir->i_mtime = current_time(dir); mark_inode_dirty(dir); - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 32545cd00ceb..d5f43ba76c59 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -39,8 +39,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode) { int err = ufs_add_link(dentry, inode); if (!err) { - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; } inode_dec_link_count(inode); @@ -193,8 +192,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) if (err) goto out_fail; - unlock_new_inode(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); return 0; out_fail: diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 65cfc2f59db9..df36b1b08af0 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -142,7 +142,7 @@ struct bpf_verifier_state_list { struct bpf_insn_aux_data { union { enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ - struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ + unsigned long map_state; /* pointer/poison value for maps */ s32 call_imm; /* saved imm field of call insn */ }; int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 94acbde17bb1..66c6e17e61e5 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -224,6 +224,7 @@ extern seqlock_t rename_lock; * These are the low-level FS interfaces to the dcache.. */ extern void d_instantiate(struct dentry *, struct inode *); +extern void d_instantiate_new(struct dentry *, struct inode *); extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); extern struct dentry * d_instantiate_anon(struct dentry *, struct inode *); extern int d_instantiate_no_diralias(struct dentry *, struct inode *); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 1a4582b44d32..fc5ab85278d5 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -464,7 +464,7 @@ static inline struct page * __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); - VM_WARN_ON(!node_online(nid)); + VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid)); return __alloc_pages(gfp_mask, order, nid); } diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index e0e49b5b1ee1..2b0265265c28 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -216,6 +216,9 @@ void put_online_mems(void); void mem_hotplug_begin(void); void mem_hotplug_done(void); +extern void set_zone_contiguous(struct zone *zone); +extern void clear_zone_contiguous(struct zone *zone); + #else /* ! CONFIG_MEMORY_HOTPLUG */ #define pfn_to_online_page(pfn) \ ({ \ diff --git a/include/linux/mm.h b/include/linux/mm.h index c6fa9a255dbf..02a616e2f17d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2109,7 +2109,6 @@ extern void setup_per_cpu_pageset(void); extern void zone_pcp_update(struct zone *zone); extern void zone_pcp_reset(struct zone *zone); -extern void setup_zone_pageset(struct zone *zone); /* page_alloc.c */ extern int min_free_kbytes; diff --git a/include/linux/node.h b/include/linux/node.h index 41f171861dcc..6d336e38d155 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -32,9 +32,11 @@ extern struct node *node_devices[]; typedef void (*node_registration_func_t)(struct node *); #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA) -extern int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages); +extern int link_mem_sections(int nid, unsigned long start_pfn, + unsigned long nr_pages, bool check_nid); #else -static inline int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages) +static inline int link_mem_sections(int nid, unsigned long start_pfn, + unsigned long nr_pages, bool check_nid) { return 0; } @@ -57,7 +59,7 @@ static inline int register_one_node(int nid) if (error) return error; /* link memory sections under this node */ - error = link_mem_sections(nid, pgdat->node_start_pfn, pgdat->node_spanned_pages); + error = link_mem_sections(nid, pgdat->node_start_pfn, pgdat->node_spanned_pages, true); } return error; diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 28b996d63490..35498e613ff5 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -103,6 +103,8 @@ void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int); /* * sctp/socket.c */ +int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags); int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb); int sctp_inet_listen(struct socket *sock, int backlog); void sctp_write_space(struct sock *sk); diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 23159dd5be18..a1fd63871d17 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -48,7 +48,6 @@ struct ib_umem { int writable; int hugetlb; struct work_struct work; - struct pid *pid; struct mm_struct *mm; unsigned long diff; struct ib_umem_odp *odp_data; diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 4a4201d997a7..095383a4bd1a 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -411,13 +411,13 @@ static inline int uverbs_attr_get_enum_id(const struct uverbs_attr_bundle *attrs static inline void *uverbs_attr_get_obj(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) { - struct ib_uobject *uobj = - uverbs_attr_get(attrs_bundle, idx)->obj_attr.uobject; + const struct uverbs_attr *attr; - if (IS_ERR(uobj)) - return uobj; + attr = uverbs_attr_get(attrs_bundle, idx); + if (IS_ERR(attr)) + return ERR_CAST(attr); - return uobj->object; + return attr->obj_attr.uobject->object; } static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index bc01e06bc716..0be866c91f62 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -435,7 +435,9 @@ TRACE_EVENT(sched_pi_setprio, memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); __entry->pid = tsk->pid; __entry->oldprio = tsk->prio; - __entry->newprio = pi_task ? pi_task->prio : tsk->prio; + __entry->newprio = pi_task ? + min(tsk->normal_prio, pi_task->prio) : + tsk->normal_prio; /* XXX SCHED_DEADLINE bits missing */ ), diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 9c3630146cec..271b93783d28 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2698,7 +2698,7 @@ enum nl80211_attrs { #define NL80211_ATTR_KEYS NL80211_ATTR_KEYS #define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS -#define NL80211_WIPHY_NAME_MAXLEN 128 +#define NL80211_WIPHY_NAME_MAXLEN 64 #define NL80211_MAX_SUPP_RATES 32 #define NL80211_MAX_SUPP_HT_RATES 77 diff --git a/include/uapi/linux/ppp-ioctl.h b/include/uapi/linux/ppp-ioctl.h index b19a9c249b15..784c2e3e572e 100644 --- a/include/uapi/linux/ppp-ioctl.h +++ b/include/uapi/linux/ppp-ioctl.h @@ -106,7 +106,7 @@ struct pppol2tp_ioc_stats { #define PPPIOCGIDLE _IOR('t', 63, struct ppp_idle) /* get idle time */ #define PPPIOCNEWUNIT _IOWR('t', 62, int) /* create new ppp unit */ #define PPPIOCATTACH _IOW('t', 61, int) /* attach to ppp unit */ -#define PPPIOCDETACH _IOW('t', 60, int) /* detach from ppp unit/chan */ +#define PPPIOCDETACH _IOW('t', 60, int) /* obsolete, do not use */ #define PPPIOCSMRRU _IOW('t', 59, int) /* set multilink MRU */ #define PPPIOCCONNECT _IOW('t', 58, int) /* connect channel to unit */ #define PPPIOCDISCONN _IO('t', 57) /* disconnect channel */ diff --git a/init/main.c b/init/main.c index fd37315835b4..3b4ada11ed52 100644 --- a/init/main.c +++ b/init/main.c @@ -91,6 +91,7 @@ #include <linux/cache.h> #include <linux/rodata_test.h> #include <linux/jump_label.h> +#include <linux/mem_encrypt.h> #include <asm/io.h> #include <asm/bugs.h> diff --git a/ipc/shm.c b/ipc/shm.c index 3cf48988d68c..d73269381ec7 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1363,14 +1363,17 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, if (addr) { if (addr & (shmlba - 1)) { - /* - * Round down to the nearest multiple of shmlba. - * For sane do_mmap_pgoff() parameters, avoid - * round downs that trigger nil-page and MAP_FIXED. - */ - if ((shmflg & SHM_RND) && addr >= shmlba) - addr &= ~(shmlba - 1); - else + if (shmflg & SHM_RND) { + addr &= ~(shmlba - 1); /* round down */ + + /* + * Ensure that the round-down is non-nil + * when remapping. This can happen for + * cases when addr < shmlba. + */ + if (!addr && (shmflg & SHM_REMAP)) + goto out; + } else #ifndef __ARCH_FORCE_SHMLBA if (addr & ~PAGE_MASK) #endif diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2ce967a63ede..1904e814f282 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -156,7 +156,29 @@ struct bpf_verifier_stack_elem { #define BPF_COMPLEXITY_LIMIT_INSNS 131072 #define BPF_COMPLEXITY_LIMIT_STACK 1024 -#define BPF_MAP_PTR_POISON ((void *)0xeB9F + POISON_POINTER_DELTA) +#define BPF_MAP_PTR_UNPRIV 1UL +#define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \ + POISON_POINTER_DELTA)) +#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV)) + +static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) +{ + return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON; +} + +static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux) +{ + return aux->map_state & BPF_MAP_PTR_UNPRIV; +} + +static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux, + const struct bpf_map *map, bool unpriv) +{ + BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV); + unpriv |= bpf_map_ptr_unpriv(aux); + aux->map_state = (unsigned long)map | + (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL); +} struct bpf_call_arg_meta { struct bpf_map *map_ptr; @@ -2358,6 +2380,29 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) return 0; } +static int +record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, + int func_id, int insn_idx) +{ + struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; + + if (func_id != BPF_FUNC_tail_call && + func_id != BPF_FUNC_map_lookup_elem) + return 0; + if (meta->map_ptr == NULL) { + verbose(env, "kernel subsystem misconfigured verifier\n"); + return -EINVAL; + } + + if (!BPF_MAP_PTR(aux->map_state)) + bpf_map_ptr_store(aux, meta->map_ptr, + meta->map_ptr->unpriv_array); + else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr) + bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON, + meta->map_ptr->unpriv_array); + return 0; +} + static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx) { const struct bpf_func_proto *fn = NULL; @@ -2412,13 +2457,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta); if (err) return err; - if (func_id == BPF_FUNC_tail_call) { - if (meta.map_ptr == NULL) { - verbose(env, "verifier bug\n"); - return -EINVAL; - } - env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr; - } err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta); if (err) return err; @@ -2429,6 +2467,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn if (err) return err; + err = record_func_map(env, &meta, func_id, insn_idx); + if (err) + return err; + /* Mark slots with STACK_MISC in case of raw mode, stack offset * is inferred from register state. */ @@ -2453,8 +2495,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn } else if (fn->ret_type == RET_VOID) { regs[BPF_REG_0].type = NOT_INIT; } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) { - struct bpf_insn_aux_data *insn_aux; - regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; /* There is no offset yet applied, variable or fixed */ mark_reg_known_zero(env, regs, BPF_REG_0); @@ -2470,11 +2510,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn } regs[BPF_REG_0].map_ptr = meta.map_ptr; regs[BPF_REG_0].id = ++env->id_gen; - insn_aux = &env->insn_aux_data[insn_idx]; - if (!insn_aux->map_ptr) - insn_aux->map_ptr = meta.map_ptr; - else if (insn_aux->map_ptr != meta.map_ptr) - insn_aux->map_ptr = BPF_MAP_PTR_POISON; } else { verbose(env, "unknown return type %d of func %s#%d\n", fn->ret_type, func_id_name(func_id), func_id); @@ -5470,6 +5505,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) struct bpf_insn *insn = prog->insnsi; const struct bpf_func_proto *fn; const int insn_cnt = prog->len; + struct bpf_insn_aux_data *aux; struct bpf_insn insn_buf[16]; struct bpf_prog *new_prog; struct bpf_map *map_ptr; @@ -5544,19 +5580,22 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) insn->imm = 0; insn->code = BPF_JMP | BPF_TAIL_CALL; + aux = &env->insn_aux_data[i + delta]; + if (!bpf_map_ptr_unpriv(aux)) + continue; + /* instead of changing every JIT dealing with tail_call * emit two extra insns: * if (index >= max_entries) goto out; * index &= array->index_mask; * to avoid out-of-bounds cpu speculation */ - map_ptr = env->insn_aux_data[i + delta].map_ptr; - if (map_ptr == BPF_MAP_PTR_POISON) { + if (bpf_map_ptr_poisoned(aux)) { verbose(env, "tail_call abusing map_ptr\n"); return -EINVAL; } - if (!map_ptr->unpriv_array) - continue; + + map_ptr = BPF_MAP_PTR(aux->map_state); insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, map_ptr->max_entries, 2); insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, @@ -5580,9 +5619,12 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) */ if (prog->jit_requested && BITS_PER_LONG == 64 && insn->imm == BPF_FUNC_map_lookup_elem) { - map_ptr = env->insn_aux_data[i + delta].map_ptr; - if (map_ptr == BPF_MAP_PTR_POISON || - !map_ptr->ops->map_gen_lookup) + aux = &env->insn_aux_data[i + delta]; + if (bpf_map_ptr_poisoned(aux)) + goto patch_call_imm; + + map_ptr = BPF_MAP_PTR(aux->map_state); + if (!map_ptr->ops->map_gen_lookup) goto patch_call_imm; cnt = map_ptr->ops->map_gen_lookup(map_ptr, insn_buf); diff --git a/kernel/kthread.c b/kernel/kthread.c index 2017a39ab490..481951bf091d 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -193,7 +193,7 @@ EXPORT_SYMBOL_GPL(kthread_parkme); void kthread_park_complete(struct task_struct *k) { - complete(&to_kthread(k)->parked); + complete_all(&to_kthread(k)->parked); } static int kthread(void *_create) @@ -459,6 +459,7 @@ void kthread_unpark(struct task_struct *k) if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) __kthread_bind(k, kthread->cpu, TASK_PARKED); + reinit_completion(&kthread->parked); clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); wake_up_state(k, TASK_PARKED); } @@ -483,9 +484,6 @@ int kthread_park(struct task_struct *k) if (WARN_ON(k->flags & PF_EXITING)) return -ENOSYS; - if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags))) - return -EBUSY; - set_bit(KTHREAD_SHOULD_PARK, &kthread->flags); if (k != current) { wake_up_process(k); diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 64cc564f5255..61a1125c1ae4 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1708,7 +1708,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att rcu_read_unlock(); if (rq && sched_debug_enabled) { - pr_info("span: %*pbl (max cpu_capacity = %lu)\n", + pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n", cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity); } diff --git a/kernel/sys.c b/kernel/sys.c index b0eee418ee0d..d1b2b8d934bb 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -71,6 +71,9 @@ #include <asm/io.h> #include <asm/unistd.h> +/* Hardening for Spectre-v1 */ +#include <linux/nospec.h> + #include "uid16.h" #ifndef SET_UNALIGN_CTL @@ -1453,6 +1456,7 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, if (resource >= RLIM_NLIMITS) return -EINVAL; + resource = array_index_nospec(resource, RLIM_NLIMITS); task_lock(current->group_leader); x = current->signal->rlim[resource]; task_unlock(current->group_leader); @@ -1472,6 +1476,7 @@ COMPAT_SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, if (resource >= RLIM_NLIMITS) return -EINVAL; + resource = array_index_nospec(resource, RLIM_NLIMITS); task_lock(current->group_leader); r = current->signal->rlim[resource]; task_unlock(current->group_leader); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 970212670b6a..fdae394172fa 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1012,7 +1012,7 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) } EXPORT_SYMBOL(iov_iter_gap_alignment); -static inline size_t __pipe_get_pages(struct iov_iter *i, +static inline ssize_t __pipe_get_pages(struct iov_iter *i, size_t maxsize, struct page **pages, int idx, @@ -1102,7 +1102,7 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, size_t *start) { struct page **p; - size_t n; + ssize_t n; int idx; int npages; diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 43e0cbedc3a0..a9e41aed6de4 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -2034,10 +2034,12 @@ void *radix_tree_delete_item(struct radix_tree_root *root, unsigned long index, void *item) { struct radix_tree_node *node = NULL; - void __rcu **slot; + void __rcu **slot = NULL; void *entry; entry = __radix_tree_lookup(root, index, &node, &slot); + if (!slot) + return NULL; if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE, get_slot_offset(node, slot)))) return NULL; @@ -39,7 +39,6 @@ #include <trace/events/cma.h> #include "cma.h" -#include "internal.h" struct cma cma_areas[MAX_CMA_AREAS]; unsigned cma_area_count; @@ -110,25 +109,23 @@ static int __init cma_activate_area(struct cma *cma) if (!cma->bitmap) return -ENOMEM; + WARN_ON_ONCE(!pfn_valid(pfn)); + zone = page_zone(pfn_to_page(pfn)); + do { unsigned j; base_pfn = pfn; - if (!pfn_valid(base_pfn)) - goto err; - - zone = page_zone(pfn_to_page(base_pfn)); for (j = pageblock_nr_pages; j; --j, pfn++) { - if (!pfn_valid(pfn)) - goto err; - + WARN_ON_ONCE(!pfn_valid(pfn)); /* - * In init_cma_reserved_pageblock(), present_pages - * is adjusted with assumption that all pages in - * the pageblock come from a single zone. + * alloc_contig_range requires the pfn range + * specified to be in the same zone. Make this + * simple by forcing the entire CMA resv range + * to be in the same zone. */ if (page_zone(pfn_to_page(pfn)) != zone) - goto err; + goto not_in_zone; } init_cma_reserved_pageblock(pfn_to_page(base_pfn)); } while (--i); @@ -142,7 +139,7 @@ static int __init cma_activate_area(struct cma *cma) return 0; -err: +not_in_zone: pr_err("CMA area %s could not be activated\n", cma->name); kfree(cma->bitmap); cma->count = 0; @@ -152,41 +149,6 @@ err: static int __init cma_init_reserved_areas(void) { int i; - struct zone *zone; - pg_data_t *pgdat; - - if (!cma_area_count) - return 0; - - for_each_online_pgdat(pgdat) { - unsigned long start_pfn = UINT_MAX, end_pfn = 0; - - zone = &pgdat->node_zones[ZONE_MOVABLE]; - - /* - * In this case, we cannot adjust the zone range - * since it is now maximum node span and we don't - * know original zone range. - */ - if (populated_zone(zone)) - continue; - - for (i = 0; i < cma_area_count; i++) { - if (pfn_to_nid(cma_areas[i].base_pfn) != - pgdat->node_id) - continue; - - start_pfn = min(start_pfn, cma_areas[i].base_pfn); - end_pfn = max(end_pfn, cma_areas[i].base_pfn + - cma_areas[i].count); - } - - if (!end_pfn) - continue; - - zone->zone_start_pfn = start_pfn; - zone->spanned_pages = end_pfn - start_pfn; - } for (i = 0; i < cma_area_count; i++) { int ret = cma_activate_area(&cma_areas[i]); @@ -195,32 +157,9 @@ static int __init cma_init_reserved_areas(void) return ret; } - /* - * Reserved pages for ZONE_MOVABLE are now activated and - * this would change ZONE_MOVABLE's managed page counter and - * the other zones' present counter. We need to re-calculate - * various zone information that depends on this initialization. - */ - build_all_zonelists(NULL); - for_each_populated_zone(zone) { - if (zone_idx(zone) == ZONE_MOVABLE) { - zone_pcp_reset(zone); - setup_zone_pageset(zone); - } else - zone_pcp_update(zone); - - set_zone_contiguous(zone); - } - - /* - * We need to re-init per zone wmark by calling - * init_per_zone_wmark_min() but doesn't call here because it is - * registered on core_initcall and it will be called later than us. - */ - return 0; } -pure_initcall(cma_init_reserved_areas); +core_initcall(cma_init_reserved_areas); /** * cma_init_reserved_mem() - create custom contiguous area from reserved memory diff --git a/mm/compaction.c b/mm/compaction.c index 028b7210a669..29bd1df18b98 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1450,12 +1450,14 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order, * if compaction succeeds. * For costly orders, we require low watermark instead of min for * compaction to proceed to increase its chances. + * ALLOC_CMA is used, as pages in CMA pageblocks are considered + * suitable migration targets */ watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ? low_wmark_pages(zone) : min_wmark_pages(zone); watermark += compact_gap(order); if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx, - 0, wmark_target)) + ALLOC_CMA, wmark_target)) return COMPACT_SKIPPED; return COMPACT_CONTINUE; diff --git a/mm/internal.h b/mm/internal.h index 62d8c34e63d5..502d14189794 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -168,9 +168,6 @@ extern void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags); extern int user_min_free_kbytes; -extern void set_zone_contiguous(struct zone *zone); -extern void clear_zone_contiguous(struct zone *zone); - #if defined CONFIG_COMPACTION || defined CONFIG_CMA /* @@ -498,6 +495,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, #define ALLOC_HARDER 0x10 /* try to alloc harder */ #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ +#define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ enum ttu_flags; struct tlbflush_unmap_batch; diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index bc0e68f7dc75..f185455b3406 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -792,6 +792,40 @@ DEFINE_ASAN_SET_SHADOW(f5); DEFINE_ASAN_SET_SHADOW(f8); #ifdef CONFIG_MEMORY_HOTPLUG +static bool shadow_mapped(unsigned long addr) +{ + pgd_t *pgd = pgd_offset_k(addr); + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + if (pgd_none(*pgd)) + return false; + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) + return false; + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) + return false; + + /* + * We can't use pud_large() or pud_huge(), the first one is + * arch-specific, the last one depends on HUGETLB_PAGE. So let's abuse + * pud_bad(), if pud is bad then it's bad because it's huge. + */ + if (pud_bad(*pud)) + return true; + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return false; + + if (pmd_bad(*pmd)) + return true; + pte = pte_offset_kernel(pmd, addr); + return !pte_none(*pte); +} + static int __meminit kasan_mem_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -813,6 +847,14 @@ static int __meminit kasan_mem_notifier(struct notifier_block *nb, case MEM_GOING_ONLINE: { void *ret; + /* + * If shadow is mapped already than it must have been mapped + * during the boot. This could happen if we onlining previously + * offlined memory. + */ + if (shadow_mapped(shadow_start)) + return NOTIFY_OK; + ret = __vmalloc_node_range(shadow_size, PAGE_SIZE, shadow_start, shadow_end, GFP_KERNEL, PAGE_KERNEL, VM_NO_GUARD, @@ -824,8 +866,26 @@ static int __meminit kasan_mem_notifier(struct notifier_block *nb, kmemleak_ignore(ret); return NOTIFY_OK; } - case MEM_OFFLINE: - vfree((void *)shadow_start); + case MEM_CANCEL_ONLINE: + case MEM_OFFLINE: { + struct vm_struct *vm; + + /* + * shadow_start was either mapped during boot by kasan_init() + * or during memory online by __vmalloc_node_range(). + * In the latter case we can use vfree() to free shadow. + * Non-NULL result of the find_vm_area() will tell us if + * that was the second case. + * + * Currently it's not possible to free shadow mapped + * during boot by kasan_init(). It's because the code + * to do that hasn't been written yet. So we'll just + * leak the memory. + */ + vm = find_vm_area((void *)shadow_start); + if (vm) + vfree((void *)shadow_start); + } } return NOTIFY_OK; @@ -838,5 +898,5 @@ static int __init kasan_memhotplug_init(void) return 0; } -module_init(kasan_memhotplug_init); +core_initcall(kasan_memhotplug_init); #endif diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index f74826cdceea..25982467800b 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1158,7 +1158,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online) * nodes have to go through register_node. * TODO clean up this mess. */ - ret = link_mem_sections(nid, start_pfn, nr_pages); + ret = link_mem_sections(nid, start_pfn, nr_pages, false); register_fail: /* * If sysfs file of new node can't create, cpu on the node diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 905db9d7962f..22320ea27489 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1743,38 +1743,16 @@ void __init page_alloc_init_late(void) } #ifdef CONFIG_CMA -static void __init adjust_present_page_count(struct page *page, long count) -{ - struct zone *zone = page_zone(page); - - /* We don't need to hold a lock since it is boot-up process */ - zone->present_pages += count; -} - /* Free whole pageblock and set its migration type to MIGRATE_CMA. */ void __init init_cma_reserved_pageblock(struct page *page) { unsigned i = pageblock_nr_pages; - unsigned long pfn = page_to_pfn(page); struct page *p = page; - int nid = page_to_nid(page); - - /* - * ZONE_MOVABLE will steal present pages from other zones by - * changing page links so page_zone() is changed. Before that, - * we need to adjust previous zone's page count first. - */ - adjust_present_page_count(page, -pageblock_nr_pages); do { __ClearPageReserved(p); set_page_count(p, 0); - - /* Steal pages from other zones */ - set_page_links(p, ZONE_MOVABLE, nid, pfn); - } while (++p, ++pfn, --i); - - adjust_present_page_count(page, pageblock_nr_pages); + } while (++p, --i); set_pageblock_migratetype(page, MIGRATE_CMA); @@ -2889,7 +2867,7 @@ int __isolate_free_page(struct page *page, unsigned int order) * exists. */ watermark = min_wmark_pages(zone) + (1UL << order); - if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) + if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA)) return 0; __mod_zone_freepage_state(zone, -(1UL << order), mt); @@ -3165,6 +3143,12 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, } +#ifdef CONFIG_CMA + /* If allocation can't use CMA areas don't use free CMA pages */ + if (!(alloc_flags & ALLOC_CMA)) + free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES); +#endif + /* * Check watermarks for an order-0 allocation request. If these * are not met, then a high-order request also cannot go ahead @@ -3191,8 +3175,10 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, } #ifdef CONFIG_CMA - if (!list_empty(&area->free_list[MIGRATE_CMA])) + if ((alloc_flags & ALLOC_CMA) && + !list_empty(&area->free_list[MIGRATE_CMA])) { return true; + } #endif if (alloc_harder && !list_empty(&area->free_list[MIGRATE_HIGHATOMIC])) @@ -3212,6 +3198,13 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order, unsigned long mark, int classzone_idx, unsigned int alloc_flags) { long free_pages = zone_page_state(z, NR_FREE_PAGES); + long cma_pages = 0; + +#ifdef CONFIG_CMA + /* If allocation can't use CMA areas don't use free CMA pages */ + if (!(alloc_flags & ALLOC_CMA)) + cma_pages = zone_page_state(z, NR_FREE_CMA_PAGES); +#endif /* * Fast check for order-0 only. If this fails then the reserves @@ -3220,7 +3213,7 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order, * the caller is !atomic then it'll uselessly search the free * list. That corner case is then slower but it is harmless. */ - if (!order && free_pages > mark + z->lowmem_reserve[classzone_idx]) + if (!order && (free_pages - cma_pages) > mark + z->lowmem_reserve[classzone_idx]) return true; return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, @@ -3856,6 +3849,10 @@ gfp_to_alloc_flags(gfp_t gfp_mask) } else if (unlikely(rt_task(current)) && !in_interrupt()) alloc_flags |= ALLOC_HARDER; +#ifdef CONFIG_CMA + if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) + alloc_flags |= ALLOC_CMA; +#endif return alloc_flags; } @@ -4322,6 +4319,9 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order, if (should_fail_alloc_page(gfp_mask, order)) return false; + if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE) + *alloc_flags |= ALLOC_CMA; + return true; } @@ -6204,7 +6204,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) { enum zone_type j; int nid = pgdat->node_id; - unsigned long node_end_pfn = 0; pgdat_resize_init(pgdat); #ifdef CONFIG_NUMA_BALANCING @@ -6232,13 +6231,9 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) struct zone *zone = pgdat->node_zones + j; unsigned long size, realsize, freesize, memmap_pages; unsigned long zone_start_pfn = zone->zone_start_pfn; - unsigned long movable_size = 0; size = zone->spanned_pages; realsize = freesize = zone->present_pages; - if (zone_end_pfn(zone) > node_end_pfn) - node_end_pfn = zone_end_pfn(zone); - /* * Adjust freesize so that it accounts for how much memory @@ -6287,30 +6282,12 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) zone_seqlock_init(zone); zone_pcp_init(zone); - /* - * The size of the CMA area is unknown now so we need to - * prepare the memory for the usemap at maximum. - */ - if (IS_ENABLED(CONFIG_CMA) && j == ZONE_MOVABLE && - pgdat->node_spanned_pages) { - movable_size = node_end_pfn - pgdat->node_start_pfn; - } - - if (!size && !movable_size) + if (!size) continue; set_pageblock_order(); - if (movable_size) { - zone->zone_start_pfn = pgdat->node_start_pfn; - zone->spanned_pages = movable_size; - setup_usemap(pgdat, zone, - pgdat->node_start_pfn, movable_size); - init_currently_empty_zone(zone, - pgdat->node_start_pfn, movable_size); - } else { - setup_usemap(pgdat, zone, zone_start_pfn, size); - init_currently_empty_zone(zone, zone_start_pfn, size); - } + setup_usemap(pgdat, zone, zone_start_pfn, size); + init_currently_empty_zone(zone, zone_start_pfn, size); memmap_init(size, nid, j, zone_start_pfn); } } @@ -7621,11 +7598,12 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, unsigned long pfn, iter, found; /* - * For avoiding noise data, lru_add_drain_all() should be called - * If ZONE_MOVABLE, the zone never contains unmovable pages + * TODO we could make this much more efficient by not checking every + * page in the range if we know all of them are in MOVABLE_ZONE and + * that the movable zone guarantees that pages are migratable but + * the later is not the case right now unfortunatelly. E.g. movablecore + * can still lead to having bootmem allocations in zone_movable. */ - if (zone_idx(zone) == ZONE_MOVABLE) - return false; /* * CMA allocations (alloc_contig_range) really need to mark isolate @@ -7646,7 +7624,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, page = pfn_to_page(check); if (PageReserved(page)) - return true; + goto unmovable; /* * Hugepages are not in LRU lists, but they're movable. @@ -7696,9 +7674,12 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, * page at boot. */ if (found > count) - return true; + goto unmovable; } return false; +unmovable: + WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); + return true; } bool is_pageblock_removable_nolock(struct page *page) @@ -7951,7 +7932,7 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages) } #endif -#if defined CONFIG_MEMORY_HOTPLUG || defined CONFIG_CMA +#ifdef CONFIG_MEMORY_HOTPLUG /* * The zone indicated has a new number of managed_pages; batch sizes and percpu * page high values need to be recalulated. diff --git a/mm/swapfile.c b/mm/swapfile.c index cc2cf04d9018..78a015fcec3b 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3112,6 +3112,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) unsigned long *frontswap_map = NULL; struct page *page = NULL; struct inode *inode = NULL; + bool inced_nr_rotate_swap = false; if (swap_flags & ~SWAP_FLAGS_VALID) return -EINVAL; @@ -3215,8 +3216,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) cluster = per_cpu_ptr(p->percpu_cluster, cpu); cluster_set_null(&cluster->index); } - } else + } else { atomic_inc(&nr_rotate_swap); + inced_nr_rotate_swap = true; + } error = swap_cgroup_swapon(p->type, maxpages); if (error) @@ -3307,6 +3310,8 @@ bad_swap: vfree(swap_map); kvfree(cluster_info); kvfree(frontswap_map); + if (inced_nr_rotate_swap) + atomic_dec(&nr_rotate_swap); if (swap_file) { if (inode && S_ISREG(inode->i_mode)) { inode_unlock(inode); diff --git a/net/9p/Kconfig b/net/9p/Kconfig index e6014e0e51f7..46c39f7da444 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -32,7 +32,7 @@ config NET_9P_XEN config NET_9P_RDMA - depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS + depends on INET && INFINIBAND_ADDR_TRANS tristate "9P RDMA Transport (Experimental)" help This builds support for an RDMA transport. diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index a11d3d89f012..a35f597e8c8b 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -1536,7 +1536,7 @@ out: if (!ret && primary_if) *primary_if = hard_iface; - else + else if (hard_iface) batadv_hardif_put(hard_iface); return ret; diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 0225616d5771..3986551397ca 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -862,7 +862,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, struct batadv_orig_node_vlan *vlan; u8 *tt_change_ptr; - rcu_read_lock(); + spin_lock_bh(&orig_node->vlan_list_lock); hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) { num_vlan++; num_entries += atomic_read(&vlan->tt.num_entries); @@ -900,7 +900,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; out: - rcu_read_unlock(); + spin_unlock_bh(&orig_node->vlan_list_lock); return tvlv_len; } @@ -931,15 +931,20 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_vlan_data *tt_vlan; struct batadv_softif_vlan *vlan; u16 num_vlan = 0; - u16 num_entries = 0; + u16 vlan_entries = 0; + u16 total_entries = 0; u16 tvlv_len; u8 *tt_change_ptr; int change_offset; - rcu_read_lock(); + spin_lock_bh(&bat_priv->softif_vlan_list_lock); hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + vlan_entries = atomic_read(&vlan->tt.num_entries); + if (vlan_entries < 1) + continue; + num_vlan++; - num_entries += atomic_read(&vlan->tt.num_entries); + total_entries += vlan_entries; } change_offset = sizeof(**tt_data); @@ -947,7 +952,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, /* if tt_len is negative, allocate the space needed by the full table */ if (*tt_len < 0) - *tt_len = batadv_tt_len(num_entries); + *tt_len = batadv_tt_len(total_entries); tvlv_len = *tt_len; tvlv_len += change_offset; @@ -964,6 +969,10 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1); hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + vlan_entries = atomic_read(&vlan->tt.num_entries); + if (vlan_entries < 1) + continue; + tt_vlan->vid = htons(vlan->vid); tt_vlan->crc = htonl(vlan->tt.crc); @@ -974,7 +983,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; out: - rcu_read_unlock(); + spin_unlock_bh(&bat_priv->softif_vlan_list_lock); return tvlv_len; } @@ -1538,6 +1547,8 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, * handled by a given originator * @entry: the TT global entry to check * @orig_node: the originator to search in the list + * @flags: a pointer to store TT flags for the given @entry received + * from @orig_node * * find out if an orig_node is already in the list of a tt_global_entry. * @@ -1545,7 +1556,8 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, */ static bool batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, - const struct batadv_orig_node *orig_node) + const struct batadv_orig_node *orig_node, + u8 *flags) { struct batadv_tt_orig_list_entry *orig_entry; bool found = false; @@ -1553,6 +1565,10 @@ batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, orig_entry = batadv_tt_global_orig_entry_find(entry, orig_node); if (orig_entry) { found = true; + + if (flags) + *flags = orig_entry->flags; + batadv_tt_orig_list_entry_put(orig_entry); } @@ -1731,7 +1747,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, if (!(common->flags & BATADV_TT_CLIENT_TEMP)) goto out; if (batadv_tt_global_entry_has_orig(tt_global_entry, - orig_node)) + orig_node, NULL)) goto out_remove; batadv_tt_global_del_orig_list(tt_global_entry); goto add_orig_entry; @@ -2880,23 +2896,46 @@ unlock: } /** - * batadv_tt_local_valid() - verify that given tt entry is a valid one + * batadv_tt_local_valid() - verify local tt entry and get flags * @entry_ptr: to be checked local tt entry * @data_ptr: not used but definition required to satisfy the callback prototype + * @flags: a pointer to store TT flags for this client to + * + * Checks the validity of the given local TT entry. If it is, then the provided + * flags pointer is updated. * * Return: true if the entry is a valid, false otherwise. */ -static bool batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr) +static bool batadv_tt_local_valid(const void *entry_ptr, + const void *data_ptr, + u8 *flags) { const struct batadv_tt_common_entry *tt_common_entry = entry_ptr; if (tt_common_entry->flags & BATADV_TT_CLIENT_NEW) return false; + + if (flags) + *flags = tt_common_entry->flags; + return true; } +/** + * batadv_tt_global_valid() - verify global tt entry and get flags + * @entry_ptr: to be checked global tt entry + * @data_ptr: an orig_node object (may be NULL) + * @flags: a pointer to store TT flags for this client to + * + * Checks the validity of the given global TT entry. If it is, then the provided + * flags pointer is updated either with the common (summed) TT flags if data_ptr + * is NULL or the specific, per originator TT flags otherwise. + * + * Return: true if the entry is a valid, false otherwise. + */ static bool batadv_tt_global_valid(const void *entry_ptr, - const void *data_ptr) + const void *data_ptr, + u8 *flags) { const struct batadv_tt_common_entry *tt_common_entry = entry_ptr; const struct batadv_tt_global_entry *tt_global_entry; @@ -2910,7 +2949,8 @@ static bool batadv_tt_global_valid(const void *entry_ptr, struct batadv_tt_global_entry, common); - return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node); + return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node, + flags); } /** @@ -2920,25 +2960,34 @@ static bool batadv_tt_global_valid(const void *entry_ptr, * @hash: hash table containing the tt entries * @tt_len: expected tvlv tt data buffer length in number of bytes * @tvlv_buff: pointer to the buffer to fill with the TT data - * @valid_cb: function to filter tt change entries + * @valid_cb: function to filter tt change entries and to return TT flags * @cb_data: data passed to the filter function as argument + * + * Fills the tvlv buff with the tt entries from the specified hash. If valid_cb + * is not provided then this becomes a no-op. */ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, struct batadv_hashtable *hash, void *tvlv_buff, u16 tt_len, bool (*valid_cb)(const void *, - const void *), + const void *, + u8 *flags), void *cb_data) { struct batadv_tt_common_entry *tt_common_entry; struct batadv_tvlv_tt_change *tt_change; struct hlist_head *head; u16 tt_tot, tt_num_entries = 0; + u8 flags; + bool ret; u32 i; tt_tot = batadv_tt_entries(tt_len); tt_change = (struct batadv_tvlv_tt_change *)tvlv_buff; + if (!valid_cb) + return; + rcu_read_lock(); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -2948,11 +2997,12 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, if (tt_tot == tt_num_entries) break; - if ((valid_cb) && (!valid_cb(tt_common_entry, cb_data))) + ret = valid_cb(tt_common_entry, cb_data, &flags); + if (!ret) continue; ether_addr_copy(tt_change->addr, tt_common_entry->addr); - tt_change->flags = tt_common_entry->flags; + tt_change->flags = flags; tt_change->vid = htons(tt_common_entry->vid); memset(tt_change->reserved, 0, sizeof(tt_change->reserved)); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 84cd4e3fd01b..0d56e36a6db7 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -283,9 +283,7 @@ int dccp_disconnect(struct sock *sk, int flags) dccp_clear_xmit_timers(sk); ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); - ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); dp->dccps_hc_rx_ccid = NULL; - dp->dccps_hc_tx_ccid = NULL; __skb_queue_purge(&sk->sk_receive_queue); __skb_queue_purge(&sk->sk_write_queue); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 4d622112bf95..e66172aaf241 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -649,6 +649,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_ENCAP] = { .type = NLA_NESTED }, [RTA_UID] = { .type = NLA_U32 }, [RTA_MARK] = { .type = NLA_U32 }, + [RTA_TABLE] = { .type = NLA_U32 }, }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 5ad2d8ed3a3f..57bbb060faaf 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -505,8 +505,6 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) int err; int copied; - WARN_ON_ONCE(sk->sk_family == AF_INET6); - err = -EAGAIN; skb = sock_dequeue_err_skb(sk); if (!skb) diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 4fe97723b53f..30221701614c 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -43,7 +43,10 @@ mr_table_alloc(struct net *net, u32 id, write_pnet(&mrt->net, net); mrt->ops = *ops; - rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params); + if (rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params)) { + kfree(mrt); + return NULL; + } INIT_LIST_HEAD(&mrt->mfc_cache_list); INIT_LIST_HEAD(&mrt->mfc_unres_queue); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 0f6c9ca59062..5b5b0f95ffd1 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -401,7 +401,7 @@ u32 mesh_plink_deactivate(struct sta_info *sta) static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, - struct ieee802_11_elems *elems, bool insert) + struct ieee802_11_elems *elems) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; @@ -447,7 +447,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, sta->sta.bandwidth = IEEE80211_STA_RX_BW_20; } - if (insert) + if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) rate_control_rate_init(sta); else rate_control_rate_update(local, sband, sta, changed); @@ -551,7 +551,7 @@ mesh_sta_info_get(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); sta = sta_info_get(sdata, addr); if (sta) { - mesh_sta_info_init(sdata, sta, elems, false); + mesh_sta_info_init(sdata, sta, elems); } else { rcu_read_unlock(); /* can't run atomic */ @@ -561,7 +561,7 @@ mesh_sta_info_get(struct ieee80211_sub_if_data *sdata, return NULL; } - mesh_sta_info_init(sdata, sta, elems, true); + mesh_sta_info_init(sdata, sta, elems); if (sta_info_insert_rcu(sta)) return NULL; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e9422fe45179..acb7b86574cd 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2911,7 +2911,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (unlikely(offset < 0)) goto out_free; } else if (reserve) { - skb_push(skb, reserve); + skb_reserve(skb, -reserve); } /* Returns -EFAULT on error */ diff --git a/net/rds/Kconfig b/net/rds/Kconfig index bffde4b46c5d..1a31502ee7db 100644 --- a/net/rds/Kconfig +++ b/net/rds/Kconfig @@ -8,7 +8,7 @@ config RDS config RDS_RDMA tristate "RDS over Infiniband" - depends on RDS && INFINIBAND && INFINIBAND_ADDR_TRANS + depends on RDS && INFINIBAND_ADDR_TRANS ---help--- Allow RDS to use Infiniband as a transport. This transport supports RDMA operations. diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 963e4bf0aab8..a57e112d9b3e 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1588,7 +1588,7 @@ int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts, return ret; ok_count = ret; - if (!exts) + if (!exts || ok_count) return ok_count; ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop); if (ret < 0) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 42247110d842..0cd2e764f47f 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -1006,7 +1006,7 @@ static const struct proto_ops inet6_seqpacket_ops = { .owner = THIS_MODULE, .release = inet6_release, .bind = inet6_bind, - .connect = inet_dgram_connect, + .connect = sctp_inet_connect, .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = sctp_getname, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index d685f8456762..6bf0a9971888 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1012,7 +1012,7 @@ static const struct proto_ops inet_seqpacket_ops = { .owner = THIS_MODULE, .release = inet_release, /* Needs to be wrapped... */ .bind = inet_bind, - .connect = inet_dgram_connect, + .connect = sctp_inet_connect, .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet_getname, /* Semantics are different. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 80835ac26d2c..ae7e7c606f72 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1086,7 +1086,7 @@ out: */ static int __sctp_connect(struct sock *sk, struct sockaddr *kaddrs, - int addrs_size, + int addrs_size, int flags, sctp_assoc_t *assoc_id) { struct net *net = sock_net(sk); @@ -1104,7 +1104,6 @@ static int __sctp_connect(struct sock *sk, union sctp_addr *sa_addr = NULL; void *addr_buf; unsigned short port; - unsigned int f_flags = 0; sp = sctp_sk(sk); ep = sp->ep; @@ -1254,13 +1253,7 @@ static int __sctp_connect(struct sock *sk, sp->pf->to_sk_daddr(sa_addr, sk); sk->sk_err = 0; - /* in-kernel sockets don't generally have a file allocated to them - * if all they do is call sock_create_kern(). - */ - if (sk->sk_socket->file) - f_flags = sk->sk_socket->file->f_flags; - - timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK); + timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); if (assoc_id) *assoc_id = asoc->assoc_id; @@ -1348,7 +1341,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk, sctp_assoc_t *assoc_id) { struct sockaddr *kaddrs; - int err = 0; + int err = 0, flags = 0; pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n", __func__, sk, addrs, addrs_size); @@ -1367,7 +1360,13 @@ static int __sctp_setsockopt_connectx(struct sock *sk, if (err) goto out_free; - err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id); + /* in-kernel sockets don't generally have a file allocated to them + * if all they do is call sock_create_kern(). + */ + if (sk->sk_socket->file) + flags = sk->sk_socket->file->f_flags; + + err = __sctp_connect(sk, kaddrs, addrs_size, flags, assoc_id); out_free: kvfree(kaddrs); @@ -4397,16 +4396,26 @@ out_nounlock: * len: the size of the address. */ static int sctp_connect(struct sock *sk, struct sockaddr *addr, - int addr_len) + int addr_len, int flags) { - int err = 0; + struct inet_sock *inet = inet_sk(sk); struct sctp_af *af; + int err = 0; lock_sock(sk); pr_debug("%s: sk:%p, sockaddr:%p, addr_len:%d\n", __func__, sk, addr, addr_len); + /* We may need to bind the socket. */ + if (!inet->inet_num) { + if (sk->sk_prot->get_port(sk, 0)) { + release_sock(sk); + return -EAGAIN; + } + inet->inet_sport = htons(inet->inet_num); + } + /* Validate addr_len before calling common connect/connectx routine. */ af = sctp_get_af_specific(addr->sa_family); if (!af || addr_len < af->sockaddr_len) { @@ -4415,13 +4424,25 @@ static int sctp_connect(struct sock *sk, struct sockaddr *addr, /* Pass correct addr len to common routine (so it knows there * is only one address being passed. */ - err = __sctp_connect(sk, addr, af->sockaddr_len, NULL); + err = __sctp_connect(sk, addr, af->sockaddr_len, flags, NULL); } release_sock(sk); return err; } +int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) +{ + if (addr_len < sizeof(uaddr->sa_family)) + return -EINVAL; + + if (uaddr->sa_family == AF_UNSPEC) + return -EOPNOTSUPP; + + return sctp_connect(sock->sk, uaddr, addr_len, flags); +} + /* FIXME: Write comments. */ static int sctp_disconnect(struct sock *sk, int flags) { @@ -8724,7 +8745,6 @@ struct proto sctp_prot = { .name = "SCTP", .owner = THIS_MODULE, .close = sctp_close, - .connect = sctp_connect, .disconnect = sctp_disconnect, .accept = sctp_accept, .ioctl = sctp_ioctl, @@ -8767,7 +8787,6 @@ struct proto sctpv6_prot = { .name = "SCTPv6", .owner = THIS_MODULE, .close = sctp_close, - .connect = sctp_connect, .disconnect = sctp_disconnect, .accept = sctp_accept, .ioctl = sctp_ioctl, diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index ac09ca803296..6358e5271070 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -50,7 +50,7 @@ config SUNRPC_DEBUG config SUNRPC_XPRT_RDMA tristate "RPC-over-RDMA transport" - depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS + depends on SUNRPC && INFINIBAND_ADDR_TRANS default SUNRPC && INFINIBAND select SG_POOL help diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a052693c2e85..7c5135a92d76 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -15555,7 +15555,8 @@ void cfg80211_ft_event(struct net_device *netdev, if (!ft_event->target_ap) return; - msg = nlmsg_new(100 + ft_event->ric_ies_len, GFP_KERNEL); + msg = nlmsg_new(100 + ft_event->ies_len + ft_event->ric_ies_len, + GFP_KERNEL); if (!msg) return; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index ac3e12c32aa3..5fcec5c94eb7 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -916,6 +916,9 @@ int reg_query_regdb_wmm(char *alpha2, int freq, u32 *dbptr, const struct fwdb_header *hdr = regdb; const struct fwdb_country *country; + if (!regdb) + return -ENODATA; + if (IS_ERR(regdb)) return PTR_ERR(regdb); diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index e16d6713f236..2d42eb9cd1a5 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5041,7 +5041,7 @@ sub process { $tmp_stmt =~ s/\b(typeof|__typeof__|__builtin\w+|typecheck\s*\(\s*$Type\s*,|\#+)\s*\(*\s*$arg\s*\)*\b//g; $tmp_stmt =~ s/\#+\s*$arg\b//g; $tmp_stmt =~ s/\b$arg\s*\#\#//g; - my $use_cnt = $tmp_stmt =~ s/\b$arg\b//g; + my $use_cnt = () = $tmp_stmt =~ /\b$arg\b/g; if ($use_cnt > 1) { CHK("MACRO_ARG_REUSE", "Macro argument reuse '$arg' - possible side-effects?\n" . "$herectx"); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index be5817df0a9d..179dd20bec0a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1568,8 +1568,15 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent /* Called from d_instantiate or d_splice_alias. */ dentry = dget(opt_dentry); } else { - /* Called from selinux_complete_init, try to find a dentry. */ + /* + * Called from selinux_complete_init, try to find a dentry. + * Some filesystems really want a connected one, so try + * that first. We could split SECURITY_FS_USE_XATTR in + * two, depending upon that... + */ dentry = d_find_alias(inode); + if (!dentry) + dentry = d_find_any_alias(inode); } if (!dentry) { /* @@ -1674,14 +1681,19 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent if ((sbsec->flags & SE_SBGENFS) && !S_ISLNK(inode->i_mode)) { /* We must have a dentry to determine the label on * procfs inodes */ - if (opt_dentry) + if (opt_dentry) { /* Called from d_instantiate or * d_splice_alias. */ dentry = dget(opt_dentry); - else + } else { /* Called from selinux_complete_init, try to - * find a dentry. */ + * find a dentry. Some filesystems really want + * a connected one, so try that first. + */ dentry = d_find_alias(inode); + if (!dentry) + dentry = d_find_any_alias(inode); + } /* * This can be hit on boot when a file is accessed * before the policy is loaded. When we load policy we diff --git a/sound/core/timer.c b/sound/core/timer.c index dc87728c5b74..0ddcae495838 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -592,7 +592,7 @@ static int snd_timer_stop1(struct snd_timer_instance *timeri, bool stop) else timeri->flags |= SNDRV_TIMER_IFLG_PAUSED; snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP : - SNDRV_TIMER_EVENT_CONTINUE); + SNDRV_TIMER_EVENT_PAUSE); unlock: spin_unlock_irqrestore(&timer->lock, flags); return result; @@ -614,7 +614,7 @@ static int snd_timer_stop_slave(struct snd_timer_instance *timeri, bool stop) list_del_init(&timeri->ack_list); list_del_init(&timeri->active_list); snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP : - SNDRV_TIMER_EVENT_CONTINUE); + SNDRV_TIMER_EVENT_PAUSE); spin_unlock(&timeri->timer->lock); } spin_unlock_irqrestore(&slave_active_lock, flags); diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h index 321e78baa63c..9bd935216c18 100644 --- a/sound/pci/hda/hda_local.h +++ b/sound/pci/hda/hda_local.h @@ -622,8 +622,10 @@ snd_hda_check_power_state(struct hda_codec *codec, hda_nid_t nid, { return snd_hdac_check_power_state(&codec->core, nid, target_state); } -static inline bool snd_hda_sync_power_state(struct hda_codec *codec, - hda_nid_t nid, unsigned int target_state) + +static inline unsigned int snd_hda_sync_power_state(struct hda_codec *codec, + hda_nid_t nid, + unsigned int target_state) { return snd_hdac_sync_power_state(&codec->core, nid, target_state); } diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 6c645eb77d42..ee820fcc29b0 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -252,6 +252,13 @@ void idr_checks(void) idr_remove(&idr, 3); idr_remove(&idr, 0); + assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == 0); + idr_remove(&idr, 1); + for (i = 1; i < RADIX_TREE_MAP_SIZE; i++) + assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == i); + idr_remove(&idr, 1 << 30); + idr_destroy(&idr); + for (i = INT_MAX - 3UL; i < INT_MAX + 1UL; i++) { struct item *item = item_create(i, 0); assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i); diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 983dd25d49f4..1eefe211a4a8 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -5,3 +5,5 @@ CONFIG_BPF_EVENTS=y CONFIG_TEST_BPF=m CONFIG_CGROUP_BPF=y CONFIG_NETDEVSIM=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_SCH_INGRESS=y diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 6a75a3ea44ad..7ba089b33e8b 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -7,3 +7,8 @@ CONFIG_NET_L3_MASTER_DEV=y CONFIG_IPV6=y CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_VETH=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_NET_IPVTI=y +CONFIG_INET6_XFRM_MODE_TUNNEL=y +CONFIG_IPV6_VTI=y +CONFIG_DUMMY=y diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c index 365c32e84189..c9f478b40996 100644 --- a/tools/testing/selftests/net/reuseport_bpf_numa.c +++ b/tools/testing/selftests/net/reuseport_bpf_numa.c @@ -23,6 +23,8 @@ #include <unistd.h> #include <numa.h> +#include "../kselftest.h" + static const int PORT = 8888; static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto) @@ -229,7 +231,7 @@ int main(void) int *rcv_fd, nodes; if (numa_available() < 0) - error(1, errno, "no numa api support"); + ksft_exit_skip("no numa api support\n"); nodes = numa_max_node() + 1; |