diff options
73 files changed, 774 insertions, 400 deletions
@@ -212,6 +212,8 @@ Manivannan Sadhasivam <mani@kernel.org> <manivannanece23@gmail.com> Manivannan Sadhasivam <mani@kernel.org> <manivannan.sadhasivam@linaro.org> Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com> Marc Zyngier <maz@kernel.org> <marc.zyngier@arm.com> +Marek Behún <kabel@kernel.org> <marek.behun@nic.cz> +Marek Behún <kabel@kernel.org> Marek Behun <marek.behun@nic.cz> Mark Brown <broonie@sirena.org.uk> Mark Starovoytov <mstarovo@pm.me> <mstarovoitov@marvell.com> Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com> diff --git a/MAINTAINERS b/MAINTAINERS index 8c5ee008301a..0cce91cd5624 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1816,7 +1816,7 @@ F: drivers/pinctrl/pinctrl-gemini.c F: drivers/rtc/rtc-ftrtc010.c ARM/CZ.NIC TURRIS SUPPORT -M: Marek Behun <kabel@kernel.org> +M: Marek Behún <kabel@kernel.org> S: Maintained W: https://www.turris.cz/ F: Documentation/ABI/testing/debugfs-moxtet @@ -7354,7 +7354,6 @@ F: drivers/net/ethernet/freescale/fs_enet/ F: include/linux/fs_enet_pd.h FREESCALE SOC SOUND DRIVERS -M: Timur Tabi <timur@kernel.org> M: Nicolin Chen <nicoleotsuka@gmail.com> M: Xiubo Li <Xiubo.Lee@gmail.com> R: Fabio Estevam <festevam@gmail.com> @@ -10946,7 +10945,7 @@ F: include/linux/mv643xx.h MARVELL MV88X3310 PHY DRIVER M: Russell King <linux@armlinux.org.uk> -M: Marek Behun <marek.behun@nic.cz> +M: Marek Behún <kabel@kernel.org> L: netdev@vger.kernel.org S: Maintained F: drivers/net/phy/marvell10g.c diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 1a5edf562e85..73ca7797b92f 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -545,9 +545,11 @@ void notrace cpu_init(void) * In Thumb-2, msr with an immediate value is not allowed. */ #ifdef CONFIG_THUMB2_KERNEL -#define PLC "r" +#define PLC_l "l" +#define PLC_r "r" #else -#define PLC "I" +#define PLC_l "I" +#define PLC_r "I" #endif /* @@ -569,15 +571,15 @@ void notrace cpu_init(void) "msr cpsr_c, %9" : : "r" (stk), - PLC (PSR_F_BIT | PSR_I_BIT | IRQ_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | IRQ_MODE), "I" (offsetof(struct stack, irq[0])), - PLC (PSR_F_BIT | PSR_I_BIT | ABT_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | ABT_MODE), "I" (offsetof(struct stack, abt[0])), - PLC (PSR_F_BIT | PSR_I_BIT | UND_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | UND_MODE), "I" (offsetof(struct stack, und[0])), - PLC (PSR_F_BIT | PSR_I_BIT | FIQ_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | FIQ_MODE), "I" (offsetof(struct stack, fiq[0])), - PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE) + PLC_l (PSR_F_BIT | PSR_I_BIT | SVC_MODE) : "r14"); #endif } diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 813b6e93dc83..c8841f476e91 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -140,7 +140,12 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm) /* Allocate variable storage */ vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE); vlen += uv_info.guest_virt_base_stor_len; - kvm->arch.pv.stor_var = vzalloc(vlen); + /* + * The Create Secure Configuration Ultravisor Call does not support + * using large pages for the virtual memory area. + * This is a hardware limitation. + */ + kvm->arch.pv.stor_var = vmalloc_no_huge(vlen); if (!kvm->arch.pv.stor_var) goto out_err; return 0; diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 7b2542b13ebd..04bce95bc7e3 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -130,8 +130,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) /* User code screwed up. */ regs->ax = -EFAULT; - instrumentation_end(); local_irq_disable(); + instrumentation_end(); irqentry_exit_to_user_mode(regs); return false; } @@ -269,15 +269,16 @@ __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs) irqentry_state_t state = irqentry_enter(regs); bool inhcall; + instrumentation_begin(); run_sysvec_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs); inhcall = get_and_clear_inhcall(); if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) { - instrumentation_begin(); irqentry_exit_cond_resched(); instrumentation_end(); restore_inhcall(inhcall); } else { + instrumentation_end(); irqentry_exit(regs, state); } } diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 4409d2cccfda..e8453de7a964 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -731,7 +731,8 @@ void reserve_lbr_buffers(void) if (!kmem_cache || cpuc->lbr_xsave) continue; - cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, GFP_KERNEL, + cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, + GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu)); } } diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index fdee23ea4e17..16bf4d4a8159 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -204,6 +204,14 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); } +static inline void fxsave(struct fxregs_state *fx) +{ + if (IS_ENABLED(CONFIG_X86_32)) + asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx)); + else + asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx)); +} + /* These macros all use (%edi)/(%rdi) as the single memory argument. */ #define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" #define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" @@ -272,28 +280,6 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. */ -static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) -{ - u64 mask = xfeatures_mask_all; - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - WARN_ON(system_state != SYSTEM_BOOTING); - - if (boot_cpu_has(X86_FEATURE_XSAVES)) - XSTATE_OP(XSAVES, xstate, lmask, hmask, err); - else - XSTATE_OP(XSAVE, xstate, lmask, hmask, err); - - /* We should never fault when copying to a kernel buffer: */ - WARN_ON_FPU(err); -} - -/* - * This function is called only during boot time when x86 caps are not set - * up and alternative can not be used yet. - */ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) { u64 mask = -1; diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index ca840fec7776..4bde0dc66100 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -75,7 +75,7 @@ void copy_page(void *to, void *from); * * With page table isolation enabled, we map the LDT in ... [stay tuned] */ -static inline unsigned long task_size_max(void) +static __always_inline unsigned long task_size_max(void) { unsigned long ret; diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index ec3ae3054792..b7b92cdf3add 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -221,28 +221,18 @@ sanitize_restored_user_xstate(union fpregs_state *state, if (use_xsave()) { /* - * Note: we don't need to zero the reserved bits in the - * xstate_header here because we either didn't copy them at all, - * or we checked earlier that they aren't set. + * Clear all feature bits which are not set in + * user_xfeatures and clear all extended features + * for fx_only mode. */ + u64 mask = fx_only ? XFEATURE_MASK_FPSSE : user_xfeatures; /* - * 'user_xfeatures' might have bits clear which are - * set in header->xfeatures. This represents features that - * were in init state prior to a signal delivery, and need - * to be reset back to the init state. Clear any user - * feature bits which are set in the kernel buffer to get - * them back to the init state. - * - * Supervisor state is unchanged by input from userspace. - * Ensure supervisor state bits stay set and supervisor - * state is not modified. + * Supervisor state has to be preserved. The sigframe + * restore can only modify user features, i.e. @mask + * cannot contain them. */ - if (fx_only) - header->xfeatures = XFEATURE_MASK_FPSSE; - else - header->xfeatures &= user_xfeatures | - xfeatures_mask_supervisor(); + header->xfeatures &= mask | xfeatures_mask_supervisor(); } if (use_fxsr()) { diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index d0eef963aad1..1cadb2faf740 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -441,12 +441,35 @@ static void __init print_xstate_offset_size(void) } /* + * All supported features have either init state all zeros or are + * handled in setup_init_fpu() individually. This is an explicit + * feature list and does not use XFEATURE_MASK*SUPPORTED to catch + * newly added supported features at build time and make people + * actually look at the init state for the new feature. + */ +#define XFEATURES_INIT_FPSTATE_HANDLED \ + (XFEATURE_MASK_FP | \ + XFEATURE_MASK_SSE | \ + XFEATURE_MASK_YMM | \ + XFEATURE_MASK_OPMASK | \ + XFEATURE_MASK_ZMM_Hi256 | \ + XFEATURE_MASK_Hi16_ZMM | \ + XFEATURE_MASK_PKRU | \ + XFEATURE_MASK_BNDREGS | \ + XFEATURE_MASK_BNDCSR | \ + XFEATURE_MASK_PASID) + +/* * setup the xstate image representing the init state */ static void __init setup_init_fpu_buf(void) { static int on_boot_cpu __initdata = 1; + BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED | + XFEATURE_MASK_SUPERVISOR_SUPPORTED) != + XFEATURES_INIT_FPSTATE_HANDLED); + WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; @@ -466,10 +489,22 @@ static void __init setup_init_fpu_buf(void) copy_kernel_to_xregs_booting(&init_fpstate.xsave); /* - * Dump the init state again. This is to identify the init state - * of any feature which is not represented by all zero's. + * All components are now in init state. Read the state back so + * that init_fpstate contains all non-zero init state. This only + * works with XSAVE, but not with XSAVEOPT and XSAVES because + * those use the init optimization which skips writing data for + * components in init state. + * + * XSAVE could be used, but that would require to reshuffle the + * data when XSAVES is available because XSAVES uses xstate + * compaction. But doing so is a pointless exercise because most + * components have an all zeros init state except for the legacy + * ones (FP and SSE). Those can be saved with FXSAVE into the + * legacy area. Adding new features requires to ensure that init + * state is all zeroes or if not to add the necessary handling + * here. */ - copy_xregs_to_kernel_booting(&init_fpstate.xsave); + fxsave(&init_fpstate.fxsave); } static int xfeature_uncompacted_offset(int xfeature_nr) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 4d32cb06ffd5..ec9922cba30a 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -58,12 +58,16 @@ SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg) 2: .skip 5-(2b-1b), 0x90 SYM_FUNC_END(__x86_indirect_alt_call_\reg) +STACK_FRAME_NON_STANDARD(__x86_indirect_alt_call_\reg) + SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg) ANNOTATE_RETPOLINE_SAFE 1: jmp *%\reg 2: .skip 5-(2b-1b), 0x90 SYM_FUNC_END(__x86_indirect_alt_jmp_\reg) +STACK_FRAME_NON_STANDARD(__x86_indirect_alt_jmp_\reg) + .endm /* diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index e87699aa2dc8..03149422dce2 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -592,8 +592,10 @@ DEFINE_IDTENTRY_RAW(xenpv_exc_debug) DEFINE_IDTENTRY_RAW(exc_xen_unknown_trap) { /* This should never happen and there is no way to handle it. */ + instrumentation_begin(); pr_err("Unknown trap in Xen PV mode."); BUG(); + instrumentation_end(); } #ifdef CONFIG_X86_MCE diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c index 3cc11b813f28..d1f1a8240120 100644 --- a/drivers/base/swnode.c +++ b/drivers/base/swnode.c @@ -1045,7 +1045,15 @@ int device_add_software_node(struct device *dev, const struct software_node *nod } set_secondary_fwnode(dev, &swnode->fwnode); - software_node_notify(dev, KOBJ_ADD); + + /* + * If the device has been fully registered by the time this function is + * called, software_node_notify() must be called separately so that the + * symlinks get created and the reference count of the node is kept in + * balance. + */ + if (device_is_registered(dev)) + software_node_notify(dev, KOBJ_ADD); return 0; } @@ -1065,7 +1073,8 @@ void device_remove_software_node(struct device *dev) if (!swnode) return; - software_node_notify(dev, KOBJ_REMOVE); + if (device_is_registered(dev)) + software_node_notify(dev, KOBJ_REMOVE); set_secondary_fwnode(dev, NULL); kobject_put(&swnode->kobj); } @@ -1119,8 +1128,7 @@ int software_node_notify(struct device *dev, unsigned long action) switch (action) { case KOBJ_ADD: - ret = sysfs_create_link_nowarn(&dev->kobj, &swnode->kobj, - "software_node"); + ret = sysfs_create_link(&dev->kobj, &swnode->kobj, "software_node"); if (ret) break; diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 1dd0ec6727fd..3c69b785cb79 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -1383,6 +1383,7 @@ config GPIO_TPS68470 config GPIO_TQMX86 tristate "TQ-Systems QTMX86 GPIO" depends on MFD_TQMX86 || COMPILE_TEST + depends on HAS_IOPORT_MAP select GPIOLIB_IRQCHIP help This driver supports GPIO on the TQMX86 IO controller. @@ -1450,6 +1451,7 @@ menu "PCI GPIO expanders" config GPIO_AMD8111 tristate "AMD 8111 GPIO driver" depends on X86 || COMPILE_TEST + depends on HAS_IOPORT_MAP help The AMD 8111 south bridge contains 32 GPIO pins which can be used. diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c index 157106e1e438..b9fdf05d7669 100644 --- a/drivers/gpio/gpio-mxc.c +++ b/drivers/gpio/gpio-mxc.c @@ -334,7 +334,7 @@ static int mxc_gpio_init_gc(struct mxc_gpio_port *port, int irq_base) ct->chip.irq_unmask = irq_gc_mask_set_bit; ct->chip.irq_set_type = gpio_set_irq_type; ct->chip.irq_set_wake = gpio_set_wake_irq; - ct->chip.flags = IRQCHIP_MASK_ON_SUSPEND; + ct->chip.flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND; ct->regs.ack = GPIO_ISR; ct->regs.mask = GPIO_IMR; diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index 1631727bf0da..c7b5446d01fd 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -1880,6 +1880,7 @@ static void gpio_v2_line_info_changed_to_v1( struct gpio_v2_line_info_changed *lic_v2, struct gpioline_info_changed *lic_v1) { + memset(lic_v1, 0, sizeof(*lic_v1)); gpio_v2_line_info_to_v1(&lic_v2->info, &lic_v1->info); lic_v1->timestamp = lic_v2->timestamp_ns; lic_v1->event_type = lic_v2->event_type; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index c13985fb35be..2a4cd7d377bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1047,11 +1047,12 @@ int amdgpu_display_gem_fb_init(struct drm_device *dev, rfb->base.obj[0] = obj; drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); - ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); + + ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj); if (ret) goto err; - ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj); + ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); if (ret) goto err; @@ -1071,9 +1072,6 @@ int amdgpu_display_gem_fb_verify_and_init( rfb->base.obj[0] = obj; drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); - ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); - if (ret) - goto err; /* Verify that the modifier is supported. */ if (!drm_any_plane_has_format(dev, mode_cmd->pixel_format, mode_cmd->modifier[0])) { @@ -1092,6 +1090,10 @@ int amdgpu_display_gem_fb_verify_and_init( if (ret) goto err; + ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); + if (ret) + goto err; + return 0; err: drm_dbg_kms(dev, "Failed to verify and init gem fb: %d\n", ret); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index baa980a477d9..37ec59365080 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -214,9 +214,21 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) { struct drm_gem_object *obj = attach->dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + int r; /* pin buffer into GTT */ - return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + if (r) + return r; + + if (bo->tbo.moving) { + r = dma_fence_wait(bo->tbo.moving, true); + if (r) { + amdgpu_bo_unpin(bo); + return r; + } + } + return 0; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 327b1f8213a8..0597aeb5f0e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6871,12 +6871,8 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring) if (ring->use_doorbell) { WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, (adev->doorbell_index.kiq * 2) << 2); - /* If GC has entered CGPG, ringing doorbell > first page doesn't - * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround - * this issue. - */ WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, - (adev->doorbell.size - 4)); + (adev->doorbell_index.userqueue_end * 2) << 2); } WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index c09225d065c2..516467e962b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3673,12 +3673,8 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) if (ring->use_doorbell) { WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, (adev->doorbell_index.kiq * 2) << 2); - /* If GC has entered CGPG, ringing doorbell > first page doesn't - * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround - * this issue. - */ WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, - (adev->doorbell.size - 4)); + (adev->doorbell_index.userqueue_end * 2) << 2); } WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c index 05ad75d155e8..cfe4fc69277e 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c @@ -232,7 +232,6 @@ static void atmel_hlcdc_crtc_atomic_enable(struct drm_crtc *c, pm_runtime_put_sync(dev->dev); - drm_crtc_vblank_on(c); } #define ATMEL_HLCDC_RGB444_OUTPUT BIT(0) @@ -344,7 +343,16 @@ static int atmel_hlcdc_crtc_atomic_check(struct drm_crtc *c, static void atmel_hlcdc_crtc_atomic_begin(struct drm_crtc *c, struct drm_atomic_state *state) { + drm_crtc_vblank_on(c); +} + +static void atmel_hlcdc_crtc_atomic_flush(struct drm_crtc *c, + struct drm_atomic_state *state) +{ struct atmel_hlcdc_crtc *crtc = drm_crtc_to_atmel_hlcdc_crtc(c); + unsigned long flags; + + spin_lock_irqsave(&c->dev->event_lock, flags); if (c->state->event) { c->state->event->pipe = drm_crtc_index(c); @@ -354,12 +362,7 @@ static void atmel_hlcdc_crtc_atomic_begin(struct drm_crtc *c, crtc->event = c->state->event; c->state->event = NULL; } -} - -static void atmel_hlcdc_crtc_atomic_flush(struct drm_crtc *crtc, - struct drm_atomic_state *state) -{ - /* TODO: write common plane control register if available */ + spin_unlock_irqrestore(&c->dev->event_lock, flags); } static const struct drm_crtc_helper_funcs lcdc_crtc_helper_funcs = { diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c index 65af56e47129..f09b6dd8754c 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c @@ -593,6 +593,7 @@ static int atmel_hlcdc_dc_modeset_init(struct drm_device *dev) dev->mode_config.max_width = dc->desc->max_width; dev->mode_config.max_height = dc->desc->max_height; dev->mode_config.funcs = &mode_config_funcs; + dev->mode_config.async_page_flip = true; return 0; } diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c index f64e06e1067d..96ea1a2c11dd 100644 --- a/drivers/gpu/drm/kmb/kmb_drv.c +++ b/drivers/gpu/drm/kmb/kmb_drv.c @@ -137,6 +137,7 @@ static int kmb_hw_init(struct drm_device *drm, unsigned long flags) /* Allocate LCD interrupt resources */ irq_lcd = platform_get_irq(pdev, 0); if (irq_lcd < 0) { + ret = irq_lcd; drm_err(&kmb->drm, "irq_lcd not found"); goto setup_fail; } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 3e09df0472ce..170aba99a110 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -546,7 +546,7 @@ nouveau_bo_sync_for_device(struct nouveau_bo *nvbo) struct ttm_tt *ttm_dma = (struct ttm_tt *)nvbo->bo.ttm; int i, j; - if (!ttm_dma) + if (!ttm_dma || !ttm_dma->dma_address) return; if (!ttm_dma->pages) { NV_DEBUG(drm, "ttm_dma 0x%p: pages NULL\n", ttm_dma); @@ -582,7 +582,7 @@ nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo) struct ttm_tt *ttm_dma = (struct ttm_tt *)nvbo->bo.ttm; int i, j; - if (!ttm_dma) + if (!ttm_dma || !ttm_dma->dma_address) return; if (!ttm_dma->pages) { NV_DEBUG(drm, "ttm_dma 0x%p: pages NULL\n", ttm_dma); diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c index 347488685f74..60019d0532fc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_prime.c +++ b/drivers/gpu/drm/nouveau/nouveau_prime.c @@ -93,7 +93,22 @@ int nouveau_gem_prime_pin(struct drm_gem_object *obj) if (ret) return -EINVAL; - return 0; + ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL); + if (ret) + goto error; + + if (nvbo->bo.moving) + ret = dma_fence_wait(nvbo->bo.moving, true); + + ttm_bo_unreserve(&nvbo->bo); + if (ret) + goto error; + + return ret; + +error: + nouveau_bo_unpin(nvbo); + return ret; } void nouveau_gem_prime_unpin(struct drm_gem_object *obj) diff --git a/drivers/gpu/drm/panel/panel-samsung-ld9040.c b/drivers/gpu/drm/panel/panel-samsung-ld9040.c index f484147fc3a6..c4b388850a13 100644 --- a/drivers/gpu/drm/panel/panel-samsung-ld9040.c +++ b/drivers/gpu/drm/panel/panel-samsung-ld9040.c @@ -383,6 +383,7 @@ MODULE_DEVICE_TABLE(spi, ld9040_ids); static struct spi_driver ld9040_driver = { .probe = ld9040_probe, .remove = ld9040_remove, + .id_table = ld9040_ids, .driver = { .name = "panel-samsung-ld9040", .of_match_table = ld9040_of_match, diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c index 42a87948e28c..4a90807351e7 100644 --- a/drivers/gpu/drm/radeon/radeon_prime.c +++ b/drivers/gpu/drm/radeon/radeon_prime.c @@ -77,9 +77,19 @@ int radeon_gem_prime_pin(struct drm_gem_object *obj) /* pin buffer into GTT */ ret = radeon_bo_pin(bo, RADEON_GEM_DOMAIN_GTT, NULL); - if (likely(ret == 0)) - bo->prime_shared_count++; - + if (unlikely(ret)) + goto error; + + if (bo->tbo.moving) { + ret = dma_fence_wait(bo->tbo.moving, false); + if (unlikely(ret)) { + radeon_bo_unpin(bo); + goto error; + } + } + + bo->prime_shared_count++; +error: radeon_bo_unreserve(bo); return ret; } diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 1fda574579af..8106b5634fe1 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -159,6 +159,8 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force) struct vc4_hdmi *vc4_hdmi = connector_to_vc4_hdmi(connector); bool connected = false; + WARN_ON(pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev)); + if (vc4_hdmi->hpd_gpio) { if (gpio_get_value_cansleep(vc4_hdmi->hpd_gpio) ^ vc4_hdmi->hpd_active_low) @@ -180,10 +182,12 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force) } } + pm_runtime_put(&vc4_hdmi->pdev->dev); return connector_status_connected; } cec_phys_addr_invalidate(vc4_hdmi->cec_adap); + pm_runtime_put(&vc4_hdmi->pdev->dev); return connector_status_disconnected; } @@ -473,7 +477,6 @@ static void vc4_hdmi_encoder_post_crtc_powerdown(struct drm_encoder *encoder, HDMI_READ(HDMI_VID_CTL) & ~VC4_HD_VID_CTL_ENABLE); clk_disable_unprepare(vc4_hdmi->pixel_bvb_clock); - clk_disable_unprepare(vc4_hdmi->hsm_clock); clk_disable_unprepare(vc4_hdmi->pixel_clock); ret = pm_runtime_put(&vc4_hdmi->pdev->dev); @@ -784,13 +787,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder, return; } - ret = clk_prepare_enable(vc4_hdmi->hsm_clock); - if (ret) { - DRM_ERROR("Failed to turn on HSM clock: %d\n", ret); - clk_disable_unprepare(vc4_hdmi->pixel_clock); - return; - } - vc4_hdmi_cec_update_clk_div(vc4_hdmi); /* @@ -801,7 +797,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder, (hsm_rate > VC4_HSM_MID_CLOCK ? 150000000 : 75000000)); if (ret) { DRM_ERROR("Failed to set pixel bvb clock rate: %d\n", ret); - clk_disable_unprepare(vc4_hdmi->hsm_clock); clk_disable_unprepare(vc4_hdmi->pixel_clock); return; } @@ -809,7 +804,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder, ret = clk_prepare_enable(vc4_hdmi->pixel_bvb_clock); if (ret) { DRM_ERROR("Failed to turn on pixel bvb clock: %d\n", ret); - clk_disable_unprepare(vc4_hdmi->hsm_clock); clk_disable_unprepare(vc4_hdmi->pixel_clock); return; } @@ -1929,6 +1923,29 @@ static int vc5_hdmi_init_resources(struct vc4_hdmi *vc4_hdmi) return 0; } +#ifdef CONFIG_PM +static int vc4_hdmi_runtime_suspend(struct device *dev) +{ + struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); + + clk_disable_unprepare(vc4_hdmi->hsm_clock); + + return 0; +} + +static int vc4_hdmi_runtime_resume(struct device *dev) +{ + struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); + int ret; + + ret = clk_prepare_enable(vc4_hdmi->hsm_clock); + if (ret) + return ret; + + return 0; +} +#endif + static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) { const struct vc4_hdmi_variant *variant = of_device_get_match_data(dev); @@ -2165,11 +2182,18 @@ static const struct of_device_id vc4_hdmi_dt_match[] = { {} }; +static const struct dev_pm_ops vc4_hdmi_pm_ops = { + SET_RUNTIME_PM_OPS(vc4_hdmi_runtime_suspend, + vc4_hdmi_runtime_resume, + NULL) +}; + struct platform_driver vc4_hdmi_driver = { .probe = vc4_hdmi_dev_probe, .remove = vc4_hdmi_dev_remove, .driver = { .name = "vc4_hdmi", .of_match_table = vc4_hdmi_dt_match, + .pm = &vc4_hdmi_pm_ops, }, }; diff --git a/drivers/i2c/busses/i2c-cp2615.c b/drivers/i2c/busses/i2c-cp2615.c index 78cfecd1ea76..3ded28632e4c 100644 --- a/drivers/i2c/busses/i2c-cp2615.c +++ b/drivers/i2c/busses/i2c-cp2615.c @@ -138,17 +138,23 @@ cp2615_i2c_send(struct usb_interface *usbif, struct cp2615_i2c_transfer *i2c_w) static int cp2615_i2c_recv(struct usb_interface *usbif, unsigned char tag, void *buf) { - struct cp2615_iop_msg *msg = kzalloc(sizeof(*msg), GFP_KERNEL); - struct cp2615_i2c_transfer_result *i2c_r = (struct cp2615_i2c_transfer_result *)&msg->data; struct usb_device *usbdev = interface_to_usbdev(usbif); - int res = usb_bulk_msg(usbdev, usb_rcvbulkpipe(usbdev, IOP_EP_IN), - msg, sizeof(struct cp2615_iop_msg), NULL, 0); + struct cp2615_iop_msg *msg; + struct cp2615_i2c_transfer_result *i2c_r; + int res; + + msg = kzalloc(sizeof(*msg), GFP_KERNEL); + if (!msg) + return -ENOMEM; + res = usb_bulk_msg(usbdev, usb_rcvbulkpipe(usbdev, IOP_EP_IN), msg, + sizeof(struct cp2615_iop_msg), NULL, 0); if (res < 0) { kfree(msg); return res; } + i2c_r = (struct cp2615_i2c_transfer_result *)&msg->data; if (msg->msg != htons(iop_I2cTransferResult) || i2c_r->tag != tag) { kfree(msg); return -EIO; diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index f9e1c2ceaac0..04a1e38f2a6f 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -978,6 +978,9 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr, } out: + /* Unlock the SMBus device for use by BIOS/ACPI */ + outb_p(SMBHSTSTS_INUSE_STS, SMBHSTSTS(priv)); + pm_runtime_mark_last_busy(&priv->pci_dev->dev); pm_runtime_put_autosuspend(&priv->pci_dev->dev); mutex_unlock(&priv->acpi_lock); diff --git a/drivers/i2c/busses/i2c-robotfuzz-osif.c b/drivers/i2c/busses/i2c-robotfuzz-osif.c index a39f7d092797..66dfa211e736 100644 --- a/drivers/i2c/busses/i2c-robotfuzz-osif.c +++ b/drivers/i2c/busses/i2c-robotfuzz-osif.c @@ -83,7 +83,7 @@ static int osif_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, } } - ret = osif_usb_read(adapter, OSIFI2C_STOP, 0, 0, NULL, 0); + ret = osif_usb_write(adapter, OSIFI2C_STOP, 0, 0, NULL, 0); if (ret) { dev_err(&adapter->dev, "failure sending STOP\n"); return -EREMOTEIO; @@ -153,7 +153,7 @@ static int osif_probe(struct usb_interface *interface, * Set bus frequency. The frequency is: * 120,000,000 / ( 16 + 2 * div * 4^prescale). * Using dev = 52, prescale = 0 give 100KHz */ - ret = osif_usb_read(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0, + ret = osif_usb_write(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0, NULL, 0); if (ret) { dev_err(&interface->dev, "failure sending bit rate"); diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index 6ef38a8ee95c..cb64fe649390 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -526,7 +526,7 @@ static long compat_i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned lo return put_user(funcs, (compat_ulong_t __user *)arg); case I2C_RDWR: { struct i2c_rdwr_ioctl_data32 rdwr_arg; - struct i2c_msg32 *p; + struct i2c_msg32 __user *p; struct i2c_msg *rdwr_pa; int i; diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index 016a6106151a..3f28eb4d17fe 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -165,6 +165,7 @@ struct meson_host { unsigned int bounce_buf_size; void *bounce_buf; + void __iomem *bounce_iomem_buf; dma_addr_t bounce_dma_addr; struct sd_emmc_desc *descs; dma_addr_t descs_dma_addr; @@ -745,6 +746,47 @@ static void meson_mmc_desc_chain_transfer(struct mmc_host *mmc, u32 cmd_cfg) writel(start, host->regs + SD_EMMC_START); } +/* local sg copy to buffer version with _to/fromio usage for dram_access_quirk */ +static void meson_mmc_copy_buffer(struct meson_host *host, struct mmc_data *data, + size_t buflen, bool to_buffer) +{ + unsigned int sg_flags = SG_MITER_ATOMIC; + struct scatterlist *sgl = data->sg; + unsigned int nents = data->sg_len; + struct sg_mapping_iter miter; + unsigned int offset = 0; + + if (to_buffer) + sg_flags |= SG_MITER_FROM_SG; + else + sg_flags |= SG_MITER_TO_SG; + + sg_miter_start(&miter, sgl, nents, sg_flags); + + while ((offset < buflen) && sg_miter_next(&miter)) { + unsigned int len; + + len = min(miter.length, buflen - offset); + + /* When dram_access_quirk, the bounce buffer is a iomem mapping */ + if (host->dram_access_quirk) { + if (to_buffer) + memcpy_toio(host->bounce_iomem_buf + offset, miter.addr, len); + else + memcpy_fromio(miter.addr, host->bounce_iomem_buf + offset, len); + } else { + if (to_buffer) + memcpy(host->bounce_buf + offset, miter.addr, len); + else + memcpy(miter.addr, host->bounce_buf + offset, len); + } + + offset += len; + } + + sg_miter_stop(&miter); +} + static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) { struct meson_host *host = mmc_priv(mmc); @@ -788,8 +830,7 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) if (data->flags & MMC_DATA_WRITE) { cmd_cfg |= CMD_CFG_DATA_WR; WARN_ON(xfer_bytes > host->bounce_buf_size); - sg_copy_to_buffer(data->sg, data->sg_len, - host->bounce_buf, xfer_bytes); + meson_mmc_copy_buffer(host, data, xfer_bytes, true); dma_wmb(); } @@ -958,8 +999,7 @@ static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id) if (meson_mmc_bounce_buf_read(data)) { xfer_bytes = data->blksz * data->blocks; WARN_ON(xfer_bytes > host->bounce_buf_size); - sg_copy_from_buffer(data->sg, data->sg_len, - host->bounce_buf, xfer_bytes); + meson_mmc_copy_buffer(host, data, xfer_bytes, false); } next_cmd = meson_mmc_get_next_command(cmd); @@ -1179,7 +1219,7 @@ static int meson_mmc_probe(struct platform_device *pdev) * instead of the DDR memory */ host->bounce_buf_size = SD_EMMC_SRAM_DATA_BUF_LEN; - host->bounce_buf = host->regs + SD_EMMC_SRAM_DATA_BUF_OFF; + host->bounce_iomem_buf = host->regs + SD_EMMC_SRAM_DATA_BUF_OFF; host->bounce_dma_addr = res->start + SD_EMMC_SRAM_DATA_BUF_OFF; } else { /* data bounce buffer */ diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index b717680377a9..8d4ebe095d0c 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1900,11 +1900,21 @@ static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags) int err; int i, bars = 0; - if (atomic_inc_return(&dev->enable_cnt) > 1) { - pci_update_current_state(dev, dev->current_state); - return 0; /* already enabled */ + /* + * Power state could be unknown at this point, either due to a fresh + * boot or a device removal call. So get the current power state + * so that things like MSI message writing will behave as expected + * (e.g. if the device really is in D0 at enable time). + */ + if (dev->pm_cap) { + u16 pmcsr; + pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr); + dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK); } + if (atomic_inc_return(&dev->enable_cnt) > 1) + return 0; /* already enabled */ + bridge = pci_upstream_bridge(dev); if (bridge) pci_enable_bridge(bridge); diff --git a/drivers/pinctrl/pinctrl-microchip-sgpio.c b/drivers/pinctrl/pinctrl-microchip-sgpio.c index c12fa57ebd12..165cb7a59715 100644 --- a/drivers/pinctrl/pinctrl-microchip-sgpio.c +++ b/drivers/pinctrl/pinctrl-microchip-sgpio.c @@ -845,8 +845,10 @@ static int microchip_sgpio_probe(struct platform_device *pdev) i = 0; device_for_each_child_node(dev, fwnode) { ret = microchip_sgpio_register_bank(dev, priv, fwnode, i++); - if (ret) + if (ret) { + fwnode_handle_put(fwnode); return ret; + } } if (priv->in.gpio.ngpio != priv->out.gpio.ngpio) { diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index ad9eb5ed8e81..c14d12d54cc5 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -1224,7 +1224,7 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, struct device *dev = pctl->dev; struct resource res; int npins = STM32_GPIO_PINS_PER_BANK; - int bank_nr, err; + int bank_nr, err, i = 0; if (!IS_ERR(bank->rstc)) reset_control_deassert(bank->rstc); @@ -1246,9 +1246,14 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, of_property_read_string(np, "st,bank-name", &bank->gpio_chip.label); - if (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, 0, &args)) { + if (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, i, &args)) { bank_nr = args.args[1] / STM32_GPIO_PINS_PER_BANK; bank->gpio_chip.base = args.args[1]; + + npins = args.args[2]; + while (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, + ++i, &args)) + npins += args.args[2]; } else { bank_nr = pctl->nbanks; bank->gpio_chip.base = bank_nr * STM32_GPIO_PINS_PER_BANK; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index cb3c37d1e009..a2c3d9ad9ee4 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1387,6 +1387,22 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt) } } +static bool sd_need_revalidate(struct block_device *bdev, + struct scsi_disk *sdkp) +{ + if (sdkp->device->removable || sdkp->write_prot) { + if (bdev_check_media_change(bdev)) + return true; + } + + /* + * Force a full rescan after ioctl(BLKRRPART). While the disk state has + * nothing to do with partitions, BLKRRPART is used to force a full + * revalidate after things like a format for historical reasons. + */ + return test_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); +} + /** * sd_open - open a scsi disk device * @bdev: Block device of the scsi disk to open @@ -1423,10 +1439,8 @@ static int sd_open(struct block_device *bdev, fmode_t mode) if (!scsi_block_when_processing_errors(sdev)) goto error_out; - if (sdev->removable || sdkp->write_prot) { - if (bdev_check_media_change(bdev)) - sd_revalidate_disk(bdev->bd_disk); - } + if (sd_need_revalidate(bdev, sdkp)) + sd_revalidate_disk(bdev->bd_disk); /* * If the drive is empty, just let the open fail. diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index e4633b84c556..7815ed642d43 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -220,6 +220,8 @@ static unsigned int sr_get_events(struct scsi_device *sdev) return DISK_EVENT_EJECT_REQUEST; else if (med->media_event_code == 2) return DISK_EVENT_MEDIA_CHANGE; + else if (med->media_event_code == 3) + return DISK_EVENT_EJECT_REQUEST; return 0; } diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c index 6e6c2403944d..a66fa97046ee 100644 --- a/drivers/spi/spi-nxp-fspi.c +++ b/drivers/spi/spi-nxp-fspi.c @@ -1124,12 +1124,6 @@ static int nxp_fspi_probe(struct platform_device *pdev) goto err_put_ctrl; } - /* Clear potential interrupts */ - reg = fspi_readl(f, f->iobase + FSPI_INTR); - if (reg) - fspi_writel(f, reg, f->iobase + FSPI_INTR); - - /* find the resources - controller memory mapped space */ if (is_acpi_node(f->dev->fwnode)) res = platform_get_resource(pdev, IORESOURCE_MEM, 1); @@ -1167,6 +1161,11 @@ static int nxp_fspi_probe(struct platform_device *pdev) } } + /* Clear potential interrupts */ + reg = fspi_readl(f, f->iobase + FSPI_INTR); + if (reg) + fspi_writel(f, reg, f->iobase + FSPI_INTR); + /* find the irq */ ret = platform_get_irq(pdev, 0); if (ret < 0) diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index f7c832fd4003..6a726c95ac7a 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -1118,6 +1118,11 @@ static int tegra_slink_probe(struct platform_device *pdev) pm_runtime_put_noidle(&pdev->dev); goto exit_pm_disable; } + + reset_control_assert(tspi->rst); + udelay(2); + reset_control_deassert(tspi->rst); + tspi->def_command_reg = SLINK_M_S; tspi->def_command2_reg = SLINK_CS_ACTIVE_BETWEEN; tegra_slink_writel(tspi, tspi->def_command_reg, SLINK_COMMAND); diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 7bbfd58958bc..d7e361fb0548 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -642,6 +642,9 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) } info->eoi_time = 0; + + /* is_active hasn't been reset yet, do it now. */ + smp_store_release(&info->is_active, 0); do_unmask(info, EVT_MASK_REASON_EOI_PENDING); } @@ -811,6 +814,7 @@ static void xen_evtchn_close(evtchn_port_t port) BUG(); } +/* Not called for lateeoi events. */ static void event_handler_exit(struct irq_info *info) { smp_store_release(&info->is_active, 0); @@ -1883,7 +1887,12 @@ static void lateeoi_ack_dynirq(struct irq_data *data) if (VALID_EVTCHN(evtchn)) { do_mask(info, EVT_MASK_REASON_EOI_PENDING); - event_handler_exit(info); + /* + * Don't call event_handler_exit(). + * Need to keep is_active non-zero in order to ignore re-raised + * events after cpu affinity changes while a lateeoi is pending. + */ + clear_evtchn(evtchn); } } diff --git a/fs/afs/write.c b/fs/afs/write.c index e9ccaa3baf2e..3104b62c2082 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -118,6 +118,15 @@ int afs_write_end(struct file *file, struct address_space *mapping, _enter("{%llx:%llu},{%lx}", vnode->fid.vid, vnode->fid.vnode, page->index); + if (!PageUptodate(page)) { + if (copied < len) { + copied = 0; + goto out; + } + + SetPageUptodate(page); + } + if (copied == 0) goto out; @@ -132,8 +141,6 @@ int afs_write_end(struct file *file, struct address_space *mapping, write_sequnlock(&vnode->cb_lock); } - ASSERT(PageUptodate(page)); - if (PagePrivate(page)) { priv = page_private(page); f = afs_page_dirty_from(page, priv); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 5624fae7a603..9ba79b6531fb 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -668,14 +668,13 @@ out: * Handle lookups for the hidden .snap directory. */ struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req, - struct dentry *dentry, int err) + struct dentry *dentry) { struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); struct inode *parent = d_inode(dentry->d_parent); /* we hold i_mutex */ /* .snap dir? */ - if (err == -ENOENT && - ceph_snap(parent) == CEPH_NOSNAP && + if (ceph_snap(parent) == CEPH_NOSNAP && strcmp(dentry->d_name.name, fsc->mount_options->snapdir_name) == 0) { struct dentry *res; struct inode *inode = ceph_get_snapdir(parent); @@ -742,7 +741,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_mds_request *req; - struct dentry *res; int op; int mask; int err; @@ -793,12 +791,16 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, req->r_parent = dir; set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); err = ceph_mdsc_do_request(mdsc, NULL, req); - res = ceph_handle_snapdir(req, dentry, err); - if (IS_ERR(res)) { - err = PTR_ERR(res); - } else { - dentry = res; - err = 0; + if (err == -ENOENT) { + struct dentry *res; + + res = ceph_handle_snapdir(req, dentry); + if (IS_ERR(res)) { + err = PTR_ERR(res); + } else { + dentry = res; + err = 0; + } } dentry = ceph_finish_lookup(req, dentry, err); ceph_mdsc_put_request(req); /* will dput(dentry) */ diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 77fc037d5beb..d51af3698032 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -578,6 +578,7 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, struct ceph_inode_info *ci = ceph_inode(dir); struct inode *inode; struct timespec64 now; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_vino vino = { .ino = req->r_deleg_ino, .snap = CEPH_NOSNAP }; @@ -615,8 +616,10 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, ceph_file_layout_to_legacy(lo, &in.layout); + down_read(&mdsc->snap_rwsem); ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session, req->r_fmode, NULL); + up_read(&mdsc->snap_rwsem); if (ret) { dout("%s failed to fill inode: %d\n", __func__, ret); ceph_dir_clear_complete(dir); @@ -739,14 +742,16 @@ retry: err = ceph_mdsc_do_request(mdsc, (flags & (O_CREAT|O_TRUNC)) ? dir : NULL, req); - dentry = ceph_handle_snapdir(req, dentry, err); - if (IS_ERR(dentry)) { - err = PTR_ERR(dentry); - goto out_req; + if (err == -ENOENT) { + dentry = ceph_handle_snapdir(req, dentry); + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); + goto out_req; + } + err = 0; } - err = 0; - if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry) + if (!err && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) err = ceph_handle_notrace_create(dir, dentry); if (d_in_lookup(dentry)) { diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e1c63adb196d..df0c8a724609 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -777,6 +777,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, umode_t mode = le32_to_cpu(info->mode); dev_t rdev = le32_to_cpu(info->rdev); + lockdep_assert_held(&mdsc->snap_rwsem); + dout("%s %p ino %llx.%llx v %llu had %llu\n", __func__, inode, ceph_vinop(inode), le64_to_cpu(info->version), ci->i_version); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index db80d89556b1..839e6b0239ee 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1218,7 +1218,7 @@ extern const struct dentry_operations ceph_dentry_ops; extern loff_t ceph_make_fpos(unsigned high, unsigned off, bool hash_order); extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry); extern struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req, - struct dentry *dentry, int err); + struct dentry *dentry); extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, struct dentry *dentry, int err); diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c index 725614625ed4..0b6cd3b8734c 100644 --- a/fs/netfs/read_helper.c +++ b/fs/netfs/read_helper.c @@ -1011,12 +1011,42 @@ out: } EXPORT_SYMBOL(netfs_readpage); -static void netfs_clear_thp(struct page *page) +/** + * netfs_skip_page_read - prep a page for writing without reading first + * @page: page being prepared + * @pos: starting position for the write + * @len: length of write + * + * In some cases, write_begin doesn't need to read at all: + * - full page write + * - write that lies in a page that is completely beyond EOF + * - write that covers the the page from start to EOF or beyond it + * + * If any of these criteria are met, then zero out the unwritten parts + * of the page and return true. Otherwise, return false. + */ +static bool netfs_skip_page_read(struct page *page, loff_t pos, size_t len) { - unsigned int i; + struct inode *inode = page->mapping->host; + loff_t i_size = i_size_read(inode); + size_t offset = offset_in_thp(page, pos); + + /* Full page write */ + if (offset == 0 && len >= thp_size(page)) + return true; + + /* pos beyond last page in the file */ + if (pos - offset >= i_size) + goto zero_out; + + /* Write that covers from the start of the page to EOF or beyond */ + if (offset == 0 && (pos + len) >= i_size) + goto zero_out; - for (i = 0; i < thp_nr_pages(page); i++) - clear_highpage(page + i); + return false; +zero_out: + zero_user_segments(page, 0, offset, offset + len, thp_size(page)); + return true; } /** @@ -1024,7 +1054,7 @@ static void netfs_clear_thp(struct page *page) * @file: The file to read from * @mapping: The mapping to read from * @pos: File position at which the write will begin - * @len: The length of the write in this page + * @len: The length of the write (may extend beyond the end of the page chosen) * @flags: AOP_* flags * @_page: Where to put the resultant page * @_fsdata: Place for the netfs to store a cookie @@ -1061,8 +1091,6 @@ int netfs_write_begin(struct file *file, struct address_space *mapping, struct inode *inode = file_inode(file); unsigned int debug_index = 0; pgoff_t index = pos >> PAGE_SHIFT; - int pos_in_page = pos & ~PAGE_MASK; - loff_t size; int ret; DEFINE_READAHEAD(ractl, file, NULL, mapping, index); @@ -1090,13 +1118,8 @@ retry: * within the cache granule containing the EOF, in which case we need * to preload the granule. */ - size = i_size_read(inode); if (!ops->is_cache_enabled(inode) && - ((pos_in_page == 0 && len == thp_size(page)) || - (pos >= size) || - (pos_in_page == 0 && (pos + len) >= size))) { - netfs_clear_thp(page); - SetPageUptodate(page); + netfs_skip_page_read(page, pos, len)) { netfs_stat(&netfs_n_rh_write_zskip); goto have_page_no_wait; } diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 303d71430bdd..9c6c0e2e5880 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -1053,6 +1053,7 @@ void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs) nilfs_sysfs_delete_superblock_group(nilfs); nilfs_sysfs_delete_segctor_group(nilfs); kobject_del(&nilfs->ns_dev_kobj); + kobject_put(&nilfs->ns_dev_kobj); kfree(nilfs->ns_dev_subgroups); } diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h index 71b5d481c653..6b138fa97db8 100644 --- a/include/linux/ceph/auth.h +++ b/include/linux/ceph/auth.h @@ -50,7 +50,7 @@ struct ceph_auth_client_ops { * another request. */ int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end); - int (*handle_reply)(struct ceph_auth_client *ac, int result, + int (*handle_reply)(struct ceph_auth_client *ac, u64 global_id, void *buf, void *end, u8 *session_key, int *session_key_len, u8 *con_secret, int *con_secret_len); @@ -104,6 +104,8 @@ struct ceph_auth_client { struct mutex mutex; }; +void ceph_auth_set_global_id(struct ceph_auth_client *ac, u64 global_id); + struct ceph_auth_client *ceph_auth_init(const char *name, const struct ceph_crypto_key *key, const int *con_modes); diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 2915f56ad421..edb5c186b0b7 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -27,8 +27,10 @@ extern int debug_locks_off(void); int __ret = 0; \ \ if (!oops_in_progress && unlikely(c)) { \ + instrumentation_begin(); \ if (debug_locks_off() && !debug_locks_silent) \ WARN(1, "DEBUG_LOCKS_WARN_ON(%s)", #c); \ + instrumentation_end(); \ __ret = 1; \ } \ __ret; \ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 6504346a1947..3c0117656745 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -741,17 +741,6 @@ static inline int hstate_index(struct hstate *h) return h - hstates; } -pgoff_t __basepage_index(struct page *page); - -/* Return page->index in PAGE_SIZE units */ -static inline pgoff_t basepage_index(struct page *page) -{ - if (!PageCompound(page)) - return page->index; - - return __basepage_index(page); -} - extern int dissolve_free_huge_page(struct page *page); extern int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn); @@ -988,11 +977,6 @@ static inline int hstate_index(struct hstate *h) return 0; } -static inline pgoff_t basepage_index(struct page *page) -{ - return page->index; -} - static inline int dissolve_free_huge_page(struct page *page) { return 0; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e89df447fae3..0f1b34dbf3a2 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -516,7 +516,7 @@ static inline struct page *read_mapping_page(struct address_space *mapping, } /* - * Get index of the page with in radix-tree + * Get index of the page within radix-tree (but not for hugetlb pages). * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE) */ static inline pgoff_t page_to_index(struct page *page) @@ -535,15 +535,16 @@ static inline pgoff_t page_to_index(struct page *page) return pgoff; } +extern pgoff_t hugetlb_basepage_index(struct page *page); + /* - * Get the offset in PAGE_SIZE. - * (TODO: hugepage should have ->index in PAGE_SIZE) + * Get the offset in PAGE_SIZE (even for hugetlb pages). + * (TODO: hugetlb pages should have ->index in PAGE_SIZE) */ static inline pgoff_t page_to_pgoff(struct page *page) { - if (unlikely(PageHeadHuge(page))) - return page->index << compound_order(page); - + if (unlikely(PageHuge(page))) + return hugetlb_basepage_index(page); return page_to_index(page); } diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 4d668abb6391..bfaaf0b6fa76 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -135,6 +135,7 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align, const void *caller); void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, int node, const void *caller); +void *vmalloc_no_huge(unsigned long size); extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index bafbeb1a2624..650480f41f1d 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -80,8 +80,8 @@ struct uffdio_zeropage) #define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ struct uffdio_writeprotect) -#define UFFDIO_CONTINUE _IOR(UFFDIO, _UFFDIO_CONTINUE, \ - struct uffdio_continue) +#define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \ + struct uffdio_continue) /* read() structure */ struct uffd_msg { diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 8ca7d505d61c..e50df8d8f87e 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -335,6 +335,14 @@ void __init swiotlb_exit(void) } /* + * Return the offset into a iotlb slot required to keep the device happy. + */ +static unsigned int swiotlb_align_offset(struct device *dev, u64 addr) +{ + return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1); +} + +/* * Bounce: copy the swiotlb buffer from or back to the original dma location */ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size, @@ -346,10 +354,17 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size size_t alloc_size = mem->slots[index].alloc_size; unsigned long pfn = PFN_DOWN(orig_addr); unsigned char *vaddr = phys_to_virt(tlb_addr); + unsigned int tlb_offset; if (orig_addr == INVALID_PHYS_ADDR) return; + tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) - + swiotlb_align_offset(dev, orig_addr); + + orig_addr += tlb_offset; + alloc_size -= tlb_offset; + if (size > alloc_size) { dev_WARN_ONCE(dev, 1, "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu.\n", @@ -391,14 +406,6 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size #define slot_addr(start, idx) ((start) + ((idx) << IO_TLB_SHIFT)) /* - * Return the offset into a iotlb slot required to keep the device happy. - */ -static unsigned int swiotlb_align_offset(struct device *dev, u64 addr) -{ - return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1); -} - -/* * Carefully handle integer overflow which can occur when boundary_mask == ~0UL. */ static inline unsigned long get_max_slots(unsigned long boundary_mask) diff --git a/kernel/futex.c b/kernel/futex.c index 4938a00bc785..408cad5e8968 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -35,7 +35,6 @@ #include <linux/jhash.h> #include <linux/pagemap.h> #include <linux/syscalls.h> -#include <linux/hugetlb.h> #include <linux/freezer.h> #include <linux/memblock.h> #include <linux/fault-inject.h> @@ -650,7 +649,7 @@ again: key->both.offset |= FUT_OFF_INODE; /* inode-based key */ key->shared.i_seq = get_inode_sequence_number(inode); - key->shared.pgoff = basepage_index(tail); + key->shared.pgoff = page_to_pgoff(tail); rcu_read_unlock(); } diff --git a/kernel/kthread.c b/kernel/kthread.c index fe3f2a40d61e..0fccf7d0c6a1 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1093,8 +1093,38 @@ void kthread_flush_work(struct kthread_work *work) EXPORT_SYMBOL_GPL(kthread_flush_work); /* - * This function removes the work from the worker queue. Also it makes sure - * that it won't get queued later via the delayed work's timer. + * Make sure that the timer is neither set nor running and could + * not manipulate the work list_head any longer. + * + * The function is called under worker->lock. The lock is temporary + * released but the timer can't be set again in the meantime. + */ +static void kthread_cancel_delayed_work_timer(struct kthread_work *work, + unsigned long *flags) +{ + struct kthread_delayed_work *dwork = + container_of(work, struct kthread_delayed_work, work); + struct kthread_worker *worker = work->worker; + + /* + * del_timer_sync() must be called to make sure that the timer + * callback is not running. The lock must be temporary released + * to avoid a deadlock with the callback. In the meantime, + * any queuing is blocked by setting the canceling counter. + */ + work->canceling++; + raw_spin_unlock_irqrestore(&worker->lock, *flags); + del_timer_sync(&dwork->timer); + raw_spin_lock_irqsave(&worker->lock, *flags); + work->canceling--; +} + +/* + * This function removes the work from the worker queue. + * + * It is called under worker->lock. The caller must make sure that + * the timer used by delayed work is not running, e.g. by calling + * kthread_cancel_delayed_work_timer(). * * The work might still be in use when this function finishes. See the * current_work proceed by the worker. @@ -1102,28 +1132,8 @@ EXPORT_SYMBOL_GPL(kthread_flush_work); * Return: %true if @work was pending and successfully canceled, * %false if @work was not pending */ -static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork, - unsigned long *flags) +static bool __kthread_cancel_work(struct kthread_work *work) { - /* Try to cancel the timer if exists. */ - if (is_dwork) { - struct kthread_delayed_work *dwork = - container_of(work, struct kthread_delayed_work, work); - struct kthread_worker *worker = work->worker; - - /* - * del_timer_sync() must be called to make sure that the timer - * callback is not running. The lock must be temporary released - * to avoid a deadlock with the callback. In the meantime, - * any queuing is blocked by setting the canceling counter. - */ - work->canceling++; - raw_spin_unlock_irqrestore(&worker->lock, *flags); - del_timer_sync(&dwork->timer); - raw_spin_lock_irqsave(&worker->lock, *flags); - work->canceling--; - } - /* * Try to remove the work from a worker list. It might either * be from worker->work_list or from worker->delayed_work_list. @@ -1176,11 +1186,23 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker, /* Work must not be used with >1 worker, see kthread_queue_work() */ WARN_ON_ONCE(work->worker != worker); - /* Do not fight with another command that is canceling this work. */ + /* + * Temporary cancel the work but do not fight with another command + * that is canceling the work as well. + * + * It is a bit tricky because of possible races with another + * mod_delayed_work() and cancel_delayed_work() callers. + * + * The timer must be canceled first because worker->lock is released + * when doing so. But the work can be removed from the queue (list) + * only when it can be queued again so that the return value can + * be used for reference counting. + */ + kthread_cancel_delayed_work_timer(work, &flags); if (work->canceling) goto out; + ret = __kthread_cancel_work(work); - ret = __kthread_cancel_work(work, true, &flags); fast_queue: __kthread_queue_delayed_work(worker, dwork, delay); out: @@ -1202,7 +1224,10 @@ static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork) /* Work must not be used with >1 worker, see kthread_queue_work(). */ WARN_ON_ONCE(work->worker != worker); - ret = __kthread_cancel_work(work, is_dwork, &flags); + if (is_dwork) + kthread_cancel_delayed_work_timer(work, &flags); + + ret = __kthread_cancel_work(work); if (worker->current_work != work) goto out_fast; diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 7641bd407239..e32313072506 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -843,7 +843,7 @@ static int count_matching_names(struct lock_class *new_class) } /* used from NMI context -- must be lockless */ -static __always_inline struct lock_class * +static noinstr struct lock_class * look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass) { struct lockdep_subclass_key *key; @@ -851,12 +851,14 @@ look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass) struct lock_class *class; if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { + instrumentation_begin(); debug_locks_off(); printk(KERN_ERR "BUG: looking up invalid subclass: %u\n", subclass); printk(KERN_ERR "turning off the locking correctness validator.\n"); dump_stack(); + instrumentation_end(); return NULL; } diff --git a/kernel/module.c b/kernel/module.c index 7e78dfabca97..927d46cb8eb9 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -266,9 +266,18 @@ static void module_assert_mutex_or_preempt(void) #endif } +#ifdef CONFIG_MODULE_SIG static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE); module_param(sig_enforce, bool_enable_only, 0644); +void set_module_sig_enforced(void) +{ + sig_enforce = true; +} +#else +#define sig_enforce false +#endif + /* * Export sig_enforce kernel cmdline parameter to allow other subsystems rely * on that instead of directly to CONFIG_MODULE_SIG_FORCE config. @@ -279,11 +288,6 @@ bool is_module_sig_enforced(void) } EXPORT_SYMBOL(is_module_sig_enforced); -void set_module_sig_enforced(void) -{ - sig_enforce = true; -} - /* Block module loading/unloading? */ int modules_disabled = 0; core_param(nomodule, modules_disabled, bint, 0); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bfaa6e1f6067..23663318fb81 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3298,6 +3298,31 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags) #ifdef CONFIG_SMP #ifdef CONFIG_FAIR_GROUP_SCHED +/* + * Because list_add_leaf_cfs_rq always places a child cfs_rq on the list + * immediately before a parent cfs_rq, and cfs_rqs are removed from the list + * bottom-up, we only have to test whether the cfs_rq before us on the list + * is our child. + * If cfs_rq is not on the list, test whether a child needs its to be added to + * connect a branch to the tree * (see list_add_leaf_cfs_rq() for details). + */ +static inline bool child_cfs_rq_on_list(struct cfs_rq *cfs_rq) +{ + struct cfs_rq *prev_cfs_rq; + struct list_head *prev; + + if (cfs_rq->on_list) { + prev = cfs_rq->leaf_cfs_rq_list.prev; + } else { + struct rq *rq = rq_of(cfs_rq); + + prev = rq->tmp_alone_branch; + } + + prev_cfs_rq = container_of(prev, struct cfs_rq, leaf_cfs_rq_list); + + return (prev_cfs_rq->tg->parent == cfs_rq->tg); +} static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq) { @@ -3313,6 +3338,9 @@ static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq) if (cfs_rq->avg.runnable_sum) return false; + if (child_cfs_rq_on_list(cfs_rq)) + return false; + return true; } diff --git a/kernel/signal.c b/kernel/signal.c index f7c6ffcbd044..f1ecd8f0c11d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -435,6 +435,12 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags, * Preallocation does not hold sighand::siglock so it can't * use the cache. The lockless caching requires that only * one consumer and only one producer run at a time. + * + * For the regular allocation case it is sufficient to + * check @q for NULL because this code can only be called + * if the target task @t has not been reaped yet; which + * means this code can never observe the error pointer which is + * written to @t->sigqueue_cache in exit_task_sigqueue_cache(). */ q = READ_ONCE(t->sigqueue_cache); if (!q || sigqueue_flags) @@ -463,13 +469,18 @@ void exit_task_sigqueue_cache(struct task_struct *tsk) struct sigqueue *q = tsk->sigqueue_cache; if (q) { - tsk->sigqueue_cache = NULL; /* * Hand it back to the cache as the task might * be self reaping which would leak the object. */ kmem_cache_free(sigqueue_cachep, q); } + + /* + * Set an error pointer to ensure that @tsk will not cache a + * sigqueue when it is reaping it's child tasks + */ + tsk->sigqueue_cache = ERR_PTR(-1); } static void sigqueue_cache_or_free(struct sigqueue *q) @@ -481,6 +492,10 @@ static void sigqueue_cache_or_free(struct sigqueue *q) * is intentional when run without holding current->sighand->siglock, * which is fine as current obviously cannot run __sigqueue_free() * concurrently. + * + * The NULL check is safe even if current has been reaped already, + * in which case exit_task_sigqueue_cache() wrote an error pointer + * into current->sigqueue_cache. */ if (!READ_ONCE(current->sigqueue_cache)) WRITE_ONCE(current->sigqueue_cache, q); diff --git a/lib/debug_locks.c b/lib/debug_locks.c index 06d3135bd184..a75ee30b77cb 100644 --- a/lib/debug_locks.c +++ b/lib/debug_locks.c @@ -36,7 +36,7 @@ EXPORT_SYMBOL_GPL(debug_locks_silent); /* * Generic 'turn off all lock debugging' function: */ -noinstr int debug_locks_off(void) +int debug_locks_off(void) { if (debug_locks && __debug_locks_off()) { if (!debug_locks_silent) { diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e0a5f9cbbece..5ba5a0da6d57 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1588,15 +1588,12 @@ struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage) return NULL; } -pgoff_t __basepage_index(struct page *page) +pgoff_t hugetlb_basepage_index(struct page *page) { struct page *page_head = compound_head(page); pgoff_t index = page_index(page_head); unsigned long compound_idx; - if (!PageHuge(page_head)) - return page_index(page); - if (compound_order(page_head) >= MAX_ORDER) compound_idx = page_to_pfn(page) - page_to_pfn(page_head); else diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 0143d32bc666..6f5f78885ab4 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -658,6 +658,7 @@ static int truncate_error_page(struct page *p, unsigned long pfn, */ static int me_kernel(struct page *p, unsigned long pfn) { + unlock_page(p); return MF_IGNORED; } @@ -667,6 +668,7 @@ static int me_kernel(struct page *p, unsigned long pfn) static int me_unknown(struct page *p, unsigned long pfn) { pr_err("Memory failure: %#lx: Unknown page state\n", pfn); + unlock_page(p); return MF_FAILED; } @@ -675,6 +677,7 @@ static int me_unknown(struct page *p, unsigned long pfn) */ static int me_pagecache_clean(struct page *p, unsigned long pfn) { + int ret; struct address_space *mapping; delete_from_lru_cache(p); @@ -683,8 +686,10 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) * For anonymous pages we're done the only reference left * should be the one m_f() holds. */ - if (PageAnon(p)) - return MF_RECOVERED; + if (PageAnon(p)) { + ret = MF_RECOVERED; + goto out; + } /* * Now truncate the page in the page cache. This is really @@ -698,7 +703,8 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) /* * Page has been teared down in the meanwhile */ - return MF_FAILED; + ret = MF_FAILED; + goto out; } /* @@ -706,7 +712,10 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) * * Open: to take i_mutex or not for this? Right now we don't. */ - return truncate_error_page(p, pfn, mapping); + ret = truncate_error_page(p, pfn, mapping); +out: + unlock_page(p); + return ret; } /* @@ -782,24 +791,26 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn) */ static int me_swapcache_dirty(struct page *p, unsigned long pfn) { + int ret; + ClearPageDirty(p); /* Trigger EIO in shmem: */ ClearPageUptodate(p); - if (!delete_from_lru_cache(p)) - return MF_DELAYED; - else - return MF_FAILED; + ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED; + unlock_page(p); + return ret; } static int me_swapcache_clean(struct page *p, unsigned long pfn) { + int ret; + delete_from_swap_cache(p); - if (!delete_from_lru_cache(p)) - return MF_RECOVERED; - else - return MF_FAILED; + ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED; + unlock_page(p); + return ret; } /* @@ -820,6 +831,7 @@ static int me_huge_page(struct page *p, unsigned long pfn) mapping = page_mapping(hpage); if (mapping) { res = truncate_error_page(hpage, pfn, mapping); + unlock_page(hpage); } else { res = MF_FAILED; unlock_page(hpage); @@ -834,7 +846,6 @@ static int me_huge_page(struct page *p, unsigned long pfn) page_ref_inc(p); res = MF_RECOVERED; } - lock_page(hpage); } return res; @@ -866,6 +877,8 @@ static struct page_state { unsigned long mask; unsigned long res; enum mf_action_page_type type; + + /* Callback ->action() has to unlock the relevant page inside it. */ int (*action)(struct page *p, unsigned long pfn); } error_states[] = { { reserved, reserved, MF_MSG_KERNEL, me_kernel }, @@ -929,6 +942,7 @@ static int page_action(struct page_state *ps, struct page *p, int result; int count; + /* page p should be unlocked after returning from ps->action(). */ result = ps->action(p, pfn); count = page_count(p) - 1; @@ -1253,7 +1267,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) if (TestSetPageHWPoison(head)) { pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); - return 0; + return -EHWPOISON; } num_poisoned_pages_inc(); @@ -1313,7 +1327,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) goto out; } - res = identify_page_state(pfn, p, page_flags); + return identify_page_state(pfn, p, page_flags); out: unlock_page(head); return res; @@ -1429,9 +1443,10 @@ int memory_failure(unsigned long pfn, int flags) struct page *hpage; struct page *orig_head; struct dev_pagemap *pgmap; - int res; + int res = 0; unsigned long page_flags; bool retry = true; + static DEFINE_MUTEX(mf_mutex); if (!sysctl_memory_failure_recovery) panic("Memory failure on page %lx", pfn); @@ -1449,13 +1464,19 @@ int memory_failure(unsigned long pfn, int flags) return -ENXIO; } + mutex_lock(&mf_mutex); + try_again: - if (PageHuge(p)) - return memory_failure_hugetlb(pfn, flags); + if (PageHuge(p)) { + res = memory_failure_hugetlb(pfn, flags); + goto unlock_mutex; + } + if (TestSetPageHWPoison(p)) { pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); - return 0; + res = -EHWPOISON; + goto unlock_mutex; } orig_head = hpage = compound_head(p); @@ -1488,17 +1509,19 @@ try_again: res = MF_FAILED; } action_result(pfn, MF_MSG_BUDDY, res); - return res == MF_RECOVERED ? 0 : -EBUSY; + res = res == MF_RECOVERED ? 0 : -EBUSY; } else { action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED); - return -EBUSY; + res = -EBUSY; } + goto unlock_mutex; } if (PageTransHuge(hpage)) { if (try_to_split_thp_page(p, "Memory Failure") < 0) { action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED); - return -EBUSY; + res = -EBUSY; + goto unlock_mutex; } VM_BUG_ON_PAGE(!page_count(p), p); } @@ -1522,7 +1545,7 @@ try_again: if (PageCompound(p) && compound_head(p) != orig_head) { action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED); res = -EBUSY; - goto out; + goto unlock_page; } /* @@ -1542,14 +1565,14 @@ try_again: num_poisoned_pages_dec(); unlock_page(p); put_page(p); - return 0; + goto unlock_mutex; } if (hwpoison_filter(p)) { if (TestClearPageHWPoison(p)) num_poisoned_pages_dec(); unlock_page(p); put_page(p); - return 0; + goto unlock_mutex; } /* @@ -1573,7 +1596,7 @@ try_again: if (!hwpoison_user_mappings(p, pfn, flags, &p)) { action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); res = -EBUSY; - goto out; + goto unlock_page; } /* @@ -1582,13 +1605,17 @@ try_again: if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) { action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED); res = -EBUSY; - goto out; + goto unlock_page; } identify_page_state: res = identify_page_state(pfn, p, page_flags); -out: + mutex_unlock(&mf_mutex); + return res; +unlock_page: unlock_page(p); +unlock_mutex: + mutex_unlock(&mf_mutex); return res; } EXPORT_SYMBOL_GPL(memory_failure); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d1f5de1c1283..ef2265f86b91 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5053,9 +5053,13 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, * Skip populated array elements to determine if any pages need * to be allocated before disabling IRQs. */ - while (page_array && page_array[nr_populated] && nr_populated < nr_pages) + while (page_array && nr_populated < nr_pages && page_array[nr_populated]) nr_populated++; + /* Already populated array? */ + if (unlikely(page_array && nr_pages - nr_populated == 0)) + return 0; + /* Use the single page allocator for one page. */ if (nr_pages - nr_populated == 1) goto failed; diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index e37bd43904af..a4435311754b 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -116,6 +116,13 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw) return pfn_is_match(pvmw->page, pfn); } +static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size) +{ + pvmw->address = (pvmw->address + size) & ~(size - 1); + if (!pvmw->address) + pvmw->address = ULONG_MAX; +} + /** * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at * @pvmw->address @@ -144,6 +151,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) { struct mm_struct *mm = pvmw->vma->vm_mm; struct page *page = pvmw->page; + unsigned long end; pgd_t *pgd; p4d_t *p4d; pud_t *pud; @@ -153,10 +161,11 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) if (pvmw->pmd && !pvmw->pte) return not_found(pvmw); - if (pvmw->pte) - goto next_pte; + if (unlikely(PageHuge(page))) { + /* The only possible mapping was handled on last iteration */ + if (pvmw->pte) + return not_found(pvmw); - if (unlikely(PageHuge(pvmw->page))) { /* when pud is not present, pte will be NULL */ pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page)); if (!pvmw->pte) @@ -168,89 +177,108 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) return not_found(pvmw); return true; } -restart: - pgd = pgd_offset(mm, pvmw->address); - if (!pgd_present(*pgd)) - return false; - p4d = p4d_offset(pgd, pvmw->address); - if (!p4d_present(*p4d)) - return false; - pud = pud_offset(p4d, pvmw->address); - if (!pud_present(*pud)) - return false; - pvmw->pmd = pmd_offset(pud, pvmw->address); + /* - * Make sure the pmd value isn't cached in a register by the - * compiler and used as a stale value after we've observed a - * subsequent update. + * Seek to next pte only makes sense for THP. + * But more important than that optimization, is to filter out + * any PageKsm page: whose page->index misleads vma_address() + * and vma_address_end() to disaster. */ - pmde = READ_ONCE(*pvmw->pmd); - if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { - pvmw->ptl = pmd_lock(mm, pvmw->pmd); - if (likely(pmd_trans_huge(*pvmw->pmd))) { - if (pvmw->flags & PVMW_MIGRATION) - return not_found(pvmw); - if (pmd_page(*pvmw->pmd) != page) - return not_found(pvmw); - return true; - } else if (!pmd_present(*pvmw->pmd)) { - if (thp_migration_supported()) { - if (!(pvmw->flags & PVMW_MIGRATION)) + end = PageTransCompound(page) ? + vma_address_end(page, pvmw->vma) : + pvmw->address + PAGE_SIZE; + if (pvmw->pte) + goto next_pte; +restart: + do { + pgd = pgd_offset(mm, pvmw->address); + if (!pgd_present(*pgd)) { + step_forward(pvmw, PGDIR_SIZE); + continue; + } + p4d = p4d_offset(pgd, pvmw->address); + if (!p4d_present(*p4d)) { + step_forward(pvmw, P4D_SIZE); + continue; + } + pud = pud_offset(p4d, pvmw->address); + if (!pud_present(*pud)) { + step_forward(pvmw, PUD_SIZE); + continue; + } + + pvmw->pmd = pmd_offset(pud, pvmw->address); + /* + * Make sure the pmd value isn't cached in a register by the + * compiler and used as a stale value after we've observed a + * subsequent update. + */ + pmde = READ_ONCE(*pvmw->pmd); + + if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { + pvmw->ptl = pmd_lock(mm, pvmw->pmd); + pmde = *pvmw->pmd; + if (likely(pmd_trans_huge(pmde))) { + if (pvmw->flags & PVMW_MIGRATION) return not_found(pvmw); - if (is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) { - swp_entry_t entry = pmd_to_swp_entry(*pvmw->pmd); + if (pmd_page(pmde) != page) + return not_found(pvmw); + return true; + } + if (!pmd_present(pmde)) { + swp_entry_t entry; - if (migration_entry_to_page(entry) != page) - return not_found(pvmw); - return true; - } + if (!thp_migration_supported() || + !(pvmw->flags & PVMW_MIGRATION)) + return not_found(pvmw); + entry = pmd_to_swp_entry(pmde); + if (!is_migration_entry(entry) || + migration_entry_to_page(entry) != page) + return not_found(pvmw); + return true; } - return not_found(pvmw); - } else { /* THP pmd was split under us: handle on pte level */ spin_unlock(pvmw->ptl); pvmw->ptl = NULL; - } - } else if (!pmd_present(pmde)) { - /* - * If PVMW_SYNC, take and drop THP pmd lock so that we - * cannot return prematurely, while zap_huge_pmd() has - * cleared *pmd but not decremented compound_mapcount(). - */ - if ((pvmw->flags & PVMW_SYNC) && - PageTransCompound(pvmw->page)) { - spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); + } else if (!pmd_present(pmde)) { + /* + * If PVMW_SYNC, take and drop THP pmd lock so that we + * cannot return prematurely, while zap_huge_pmd() has + * cleared *pmd but not decremented compound_mapcount(). + */ + if ((pvmw->flags & PVMW_SYNC) && + PageTransCompound(page)) { + spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); - spin_unlock(ptl); + spin_unlock(ptl); + } + step_forward(pvmw, PMD_SIZE); + continue; } - return false; - } - if (!map_pte(pvmw)) - goto next_pte; - while (1) { - unsigned long end; - + if (!map_pte(pvmw)) + goto next_pte; +this_pte: if (check_pte(pvmw)) return true; next_pte: - /* Seek to next pte only makes sense for THP */ - if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page)) - return not_found(pvmw); - end = vma_address_end(pvmw->page, pvmw->vma); do { pvmw->address += PAGE_SIZE; if (pvmw->address >= end) return not_found(pvmw); /* Did we cross page table boundary? */ - if (pvmw->address % PMD_SIZE == 0) { - pte_unmap(pvmw->pte); + if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) { if (pvmw->ptl) { spin_unlock(pvmw->ptl); pvmw->ptl = NULL; } + pte_unmap(pvmw->pte); + pvmw->pte = NULL; goto restart; - } else { - pvmw->pte++; + } + pvmw->pte++; + if ((pvmw->flags & PVMW_SYNC) && !pvmw->ptl) { + pvmw->ptl = pte_lockptr(mm, pvmw->pmd); + spin_lock(pvmw->ptl); } } while (pte_none(*pvmw->pte)); @@ -258,7 +286,10 @@ next_pte: pvmw->ptl = pte_lockptr(mm, pvmw->pmd); spin_lock(pvmw->ptl); } - } + goto this_pte; + } while (pvmw->address < end); + + return false; } /** diff --git a/mm/vmalloc.c b/mm/vmalloc.c index a13ac524f6ff..d0a7d89be091 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2344,15 +2344,16 @@ static void clear_vm_uninitialized_flag(struct vm_struct *vm) } static struct vm_struct *__get_vm_area_node(unsigned long size, - unsigned long align, unsigned long flags, unsigned long start, - unsigned long end, int node, gfp_t gfp_mask, const void *caller) + unsigned long align, unsigned long shift, unsigned long flags, + unsigned long start, unsigned long end, int node, + gfp_t gfp_mask, const void *caller) { struct vmap_area *va; struct vm_struct *area; unsigned long requested_size = size; BUG_ON(in_interrupt()); - size = PAGE_ALIGN(size); + size = ALIGN(size, 1ul << shift); if (unlikely(!size)) return NULL; @@ -2384,8 +2385,8 @@ struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, unsigned long start, unsigned long end, const void *caller) { - return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE, - GFP_KERNEL, caller); + return __get_vm_area_node(size, 1, PAGE_SHIFT, flags, start, end, + NUMA_NO_NODE, GFP_KERNEL, caller); } /** @@ -2401,7 +2402,8 @@ struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, */ struct vm_struct *get_vm_area(unsigned long size, unsigned long flags) { - return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, + return __get_vm_area_node(size, 1, PAGE_SHIFT, flags, + VMALLOC_START, VMALLOC_END, NUMA_NO_NODE, GFP_KERNEL, __builtin_return_address(0)); } @@ -2409,7 +2411,8 @@ struct vm_struct *get_vm_area(unsigned long size, unsigned long flags) struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, const void *caller) { - return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, + return __get_vm_area_node(size, 1, PAGE_SHIFT, flags, + VMALLOC_START, VMALLOC_END, NUMA_NO_NODE, GFP_KERNEL, caller); } @@ -2902,9 +2905,9 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, } again: - size = PAGE_ALIGN(size); - area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED | - vm_flags, start, end, node, gfp_mask, caller); + area = __get_vm_area_node(real_size, align, shift, VM_ALLOC | + VM_UNINITIALIZED | vm_flags, start, end, node, + gfp_mask, caller); if (!area) { warn_alloc(gfp_mask, NULL, "vmalloc size %lu allocation failure: " @@ -2923,6 +2926,7 @@ again: */ clear_vm_uninitialized_flag(area); + size = PAGE_ALIGN(size); kmemleak_vmalloc(area, size, gfp_mask); return addr; @@ -2999,6 +3003,23 @@ void *vmalloc(unsigned long size) EXPORT_SYMBOL(vmalloc); /** + * vmalloc_no_huge - allocate virtually contiguous memory using small pages + * @size: allocation size + * + * Allocate enough non-huge pages to cover @size from the page level + * allocator and map them into contiguous kernel virtual space. + * + * Return: pointer to the allocated memory or %NULL on error + */ +void *vmalloc_no_huge(unsigned long size) +{ + return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, + GFP_KERNEL, PAGE_KERNEL, VM_NO_HUGE_VMAP, + NUMA_NO_NODE, __builtin_return_address(0)); +} +EXPORT_SYMBOL(vmalloc_no_huge); + +/** * vzalloc - allocate virtually contiguous memory with zero fill * @size: allocation size * diff --git a/net/ceph/auth.c b/net/ceph/auth.c index de407e8feb97..d2b268a1838e 100644 --- a/net/ceph/auth.c +++ b/net/ceph/auth.c @@ -36,7 +36,7 @@ static int init_protocol(struct ceph_auth_client *ac, int proto) } } -static void set_global_id(struct ceph_auth_client *ac, u64 global_id) +void ceph_auth_set_global_id(struct ceph_auth_client *ac, u64 global_id) { dout("%s global_id %llu\n", __func__, global_id); @@ -260,19 +260,22 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, ac->negotiating = false; } - ret = ac->ops->handle_reply(ac, result, payload, payload_end, + if (result) { + pr_err("auth protocol '%s' mauth authentication failed: %d\n", + ceph_auth_proto_name(ac->protocol), result); + ret = result; + goto out; + } + + ret = ac->ops->handle_reply(ac, global_id, payload, payload_end, NULL, NULL, NULL, NULL); if (ret == -EAGAIN) { ret = build_request(ac, true, reply_buf, reply_len); goto out; } else if (ret) { - pr_err("auth protocol '%s' mauth authentication failed: %d\n", - ceph_auth_proto_name(ac->protocol), result); goto out; } - set_global_id(ac, global_id); - out: mutex_unlock(&ac->mutex); return ret; @@ -498,11 +501,10 @@ int ceph_auth_handle_reply_done(struct ceph_auth_client *ac, int ret; mutex_lock(&ac->mutex); - ret = ac->ops->handle_reply(ac, 0, reply, reply + reply_len, + ret = ac->ops->handle_reply(ac, global_id, reply, reply + reply_len, session_key, session_key_len, con_secret, con_secret_len); - if (!ret) - set_global_id(ac, global_id); + WARN_ON(ret == -EAGAIN || ret > 0); mutex_unlock(&ac->mutex); return ret; } diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c index 70e86e462250..097e9f8d87a7 100644 --- a/net/ceph/auth_none.c +++ b/net/ceph/auth_none.c @@ -69,7 +69,7 @@ static int build_request(struct ceph_auth_client *ac, void *buf, void *end) * the generic auth code decode the global_id, and we carry no actual * authenticate state, so nothing happens here. */ -static int handle_reply(struct ceph_auth_client *ac, int result, +static int handle_reply(struct ceph_auth_client *ac, u64 global_id, void *buf, void *end, u8 *session_key, int *session_key_len, u8 *con_secret, int *con_secret_len) @@ -77,7 +77,8 @@ static int handle_reply(struct ceph_auth_client *ac, int result, struct ceph_auth_none_info *xi = ac->private; xi->starting = false; - return result; + ceph_auth_set_global_id(ac, global_id); + return 0; } static void ceph_auth_none_destroy_authorizer(struct ceph_authorizer *a) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 79641c4afee9..b71b1635916e 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -597,7 +597,7 @@ bad: return -EINVAL; } -static int handle_auth_session_key(struct ceph_auth_client *ac, +static int handle_auth_session_key(struct ceph_auth_client *ac, u64 global_id, void **p, void *end, u8 *session_key, int *session_key_len, u8 *con_secret, int *con_secret_len) @@ -613,6 +613,7 @@ static int handle_auth_session_key(struct ceph_auth_client *ac, if (ret) return ret; + ceph_auth_set_global_id(ac, global_id); if (*p == end) { /* pre-nautilus (or didn't request service tickets!) */ WARN_ON(session_key || con_secret); @@ -661,7 +662,7 @@ e_inval: return -EINVAL; } -static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result, +static int ceph_x_handle_reply(struct ceph_auth_client *ac, u64 global_id, void *buf, void *end, u8 *session_key, int *session_key_len, u8 *con_secret, int *con_secret_len) @@ -669,13 +670,11 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result, struct ceph_x_info *xi = ac->private; struct ceph_x_ticket_handler *th; int len = end - buf; + int result; void *p; int op; int ret; - if (result) - return result; /* XXX hmm? */ - if (xi->starting) { /* it's a hello */ struct ceph_x_server_challenge *sc = buf; @@ -697,9 +696,9 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result, switch (op) { case CEPHX_GET_AUTH_SESSION_KEY: /* AUTH ticket + [connection secret] + service tickets */ - ret = handle_auth_session_key(ac, &p, end, session_key, - session_key_len, con_secret, - con_secret_len); + ret = handle_auth_session_key(ac, global_id, &p, end, + session_key, session_key_len, + con_secret, con_secret_len); break; case CEPHX_GET_PRINCIPAL_SESSION_KEY: diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 438fa18bcb55..9408ee63cb26 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3388,44 +3388,30 @@ static int rt5645_probe(struct snd_soc_component *component) { struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component); struct rt5645_priv *rt5645 = snd_soc_component_get_drvdata(component); - int ret = 0; rt5645->component = component; switch (rt5645->codec_type) { case CODEC_TYPE_RT5645: - ret = snd_soc_dapm_new_controls(dapm, + snd_soc_dapm_new_controls(dapm, rt5645_specific_dapm_widgets, ARRAY_SIZE(rt5645_specific_dapm_widgets)); - if (ret < 0) - goto exit; - - ret = snd_soc_dapm_add_routes(dapm, + snd_soc_dapm_add_routes(dapm, rt5645_specific_dapm_routes, ARRAY_SIZE(rt5645_specific_dapm_routes)); - if (ret < 0) - goto exit; - if (rt5645->v_id < 3) { - ret = snd_soc_dapm_add_routes(dapm, + snd_soc_dapm_add_routes(dapm, rt5645_old_dapm_routes, ARRAY_SIZE(rt5645_old_dapm_routes)); - if (ret < 0) - goto exit; } break; case CODEC_TYPE_RT5650: - ret = snd_soc_dapm_new_controls(dapm, + snd_soc_dapm_new_controls(dapm, rt5650_specific_dapm_widgets, ARRAY_SIZE(rt5650_specific_dapm_widgets)); - if (ret < 0) - goto exit; - - ret = snd_soc_dapm_add_routes(dapm, + snd_soc_dapm_add_routes(dapm, rt5650_specific_dapm_routes, ARRAY_SIZE(rt5650_specific_dapm_routes)); - if (ret < 0) - goto exit; break; } @@ -3433,17 +3419,9 @@ static int rt5645_probe(struct snd_soc_component *component) /* for JD function */ if (rt5645->pdata.jd_mode) { - ret = snd_soc_dapm_force_enable_pin(dapm, "JD Power"); - if (ret < 0) - goto exit; - - ret = snd_soc_dapm_force_enable_pin(dapm, "LDO2"); - if (ret < 0) - goto exit; - - ret = snd_soc_dapm_sync(dapm); - if (ret < 0) - goto exit; + snd_soc_dapm_force_enable_pin(dapm, "JD Power"); + snd_soc_dapm_force_enable_pin(dapm, "LDO2"); + snd_soc_dapm_sync(dapm); } if (rt5645->pdata.long_name) @@ -3454,14 +3432,9 @@ static int rt5645_probe(struct snd_soc_component *component) GFP_KERNEL); if (!rt5645->eq_param) - ret = -ENOMEM; -exit: - /* - * If there was an error above, everything will be cleaned up by the - * caller if we return an error here. This will be done with a later - * call to rt5645_remove(). - */ - return ret; + return -ENOMEM; + + return 0; } static void rt5645_remove(struct snd_soc_component *component) diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 978f5b5f4dc0..d8812f27648c 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -376,7 +376,7 @@ static void test_add_max_memory_regions(void) pr_info("Adding slots 0..%i, each memory region with %dK size\n", (max_mem_slots - 1), MEM_REGION_SIZE >> 10); - mem = mmap(NULL, MEM_REGION_SIZE * max_mem_slots + alignment, + mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host"); mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1)); @@ -401,7 +401,7 @@ static void test_add_max_memory_regions(void) TEST_ASSERT(ret == -1 && errno == EINVAL, "Adding one more memory slot should fail with EINVAL"); - munmap(mem, MEM_REGION_SIZE * max_mem_slots + alignment); + munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment); munmap(mem_extra, MEM_REGION_SIZE); kvm_vm_free(vm); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6a6bc7af0e28..46fb042837d2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2055,6 +2055,13 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault) return true; } +static int kvm_try_get_pfn(kvm_pfn_t pfn) +{ + if (kvm_is_reserved_pfn(pfn)) + return 1; + return get_page_unless_zero(pfn_to_page(pfn)); +} + static int hva_to_pfn_remapped(struct vm_area_struct *vma, unsigned long addr, bool *async, bool write_fault, bool *writable, @@ -2104,13 +2111,21 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, * Whoever called remap_pfn_range is also going to call e.g. * unmap_mapping_range before the underlying pages are freed, * causing a call to our MMU notifier. + * + * Certain IO or PFNMAP mappings can be backed with valid + * struct pages, but be allocated without refcounting e.g., + * tail pages of non-compound higher order allocations, which + * would then underflow the refcount when the caller does the + * required put_page. Don't allow those pages here. */ - kvm_get_pfn(pfn); + if (!kvm_try_get_pfn(pfn)) + r = -EFAULT; out: pte_unmap_unlock(ptep, ptl); *p_pfn = pfn; - return 0; + + return r; } /* |