diff options
114 files changed, 1198 insertions, 618 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 035ffc1e16a3..9cbcf167bdd0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14883,9 +14883,9 @@ F: include/linux/arm_sdei.h F: include/uapi/linux/arm_sdei.h SOFTWARE RAID (Multiple Disks) SUPPORT -M: Shaohua Li <shli@kernel.org> +M: Song Liu <song@kernel.org> L: linux-raid@vger.kernel.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/song/md.git S: Supported F: drivers/md/Makefile F: drivers/md/Kconfig diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 33b00579beff..24360211534a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -7,6 +7,8 @@ config ARM select ARCH_HAS_BINFMT_FLAT select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_DMA_COHERENT_TO_PFN if SWIOTLB + select ARCH_HAS_DMA_MMAP_PGPROT if SWIOTLB select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_KEEPINITRD @@ -18,6 +20,8 @@ config ARM select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL select ARCH_HAS_STRICT_MODULE_RWX if MMU + select ARCH_HAS_SYNC_DMA_FOR_DEVICE if SWIOTLB + select ARCH_HAS_SYNC_DMA_FOR_CPU if SWIOTLB select ARCH_HAS_TEARDOWN_DMA_OPS if MMU select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index c54cd7ed90ba..c1222c0e9fd3 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -664,10 +664,6 @@ config ARM_LPAE !CPU_32v4 && !CPU_32v3 select PHYS_ADDR_T_64BIT select SWIOTLB - select ARCH_HAS_DMA_COHERENT_TO_PFN - select ARCH_HAS_DMA_MMAP_PGPROT - select ARCH_HAS_SYNC_DMA_FOR_DEVICE - select ARCH_HAS_SYNC_DMA_FOR_CPU help Say Y if you have an ARMv7 processor supporting the LPAE page table format and you would like to access memory beyond the diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index a39b079e73f2..6d58c1739b42 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -2,6 +2,7 @@ #ifndef _PARISC_PGTABLE_H #define _PARISC_PGTABLE_H +#include <asm/page.h> #include <asm-generic/4level-fixup.h> #include <asm/fixmap.h> @@ -98,8 +99,6 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) #endif /* !__ASSEMBLY__ */ -#include <asm/page.h> - #define pte_ERROR(e) \ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 81b005e4c7d9..325959d19d9a 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1236,7 +1236,7 @@ void x86_pmu_enable_event(struct perf_event *event) * Add a single event to the PMU. * * The event is added to the group of enabled events - * but only if it can be scehduled with existing events. + * but only if it can be scheduled with existing events. */ static int x86_pmu_add(struct perf_event *event, int flags) { diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 567fb98c0892..9762dd6d99fa 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -363,7 +363,7 @@ struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id, /* Special handling for SPI GPIOs if used */ if (IS_ERR(desc)) desc = of_find_spi_gpio(dev, con_id, &of_flags); - if (IS_ERR(desc)) { + if (IS_ERR(desc) && PTR_ERR(desc) != -EPROBE_DEFER) { /* This quirk looks up flags and all */ desc = of_find_spi_cs_gpio(dev, con_id, idx, flags); if (!IS_ERR(desc)) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index f497003f119c..cca749010cd0 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1091,9 +1091,11 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) lineinfo.flags |= GPIOLINE_FLAG_ACTIVE_LOW; if (test_bit(FLAG_OPEN_DRAIN, &desc->flags)) - lineinfo.flags |= GPIOLINE_FLAG_OPEN_DRAIN; + lineinfo.flags |= (GPIOLINE_FLAG_OPEN_DRAIN | + GPIOLINE_FLAG_IS_OUT); if (test_bit(FLAG_OPEN_SOURCE, &desc->flags)) - lineinfo.flags |= GPIOLINE_FLAG_OPEN_SOURCE; + lineinfo.flags |= (GPIOLINE_FLAG_OPEN_SOURCE | + GPIOLINE_FLAG_IS_OUT); if (copy_to_user(ip, &lineinfo, sizeof(lineinfo))) return -EFAULT; @@ -1371,21 +1373,13 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, if (status) goto err_remove_from_list; - status = gpiochip_irqchip_init_valid_mask(chip); - if (status) - goto err_remove_from_list; - status = gpiochip_alloc_valid_mask(chip); if (status) - goto err_remove_irqchip_mask; - - status = gpiochip_add_irqchip(chip, lock_key, request_key); - if (status) - goto err_free_gpiochip_mask; + goto err_remove_from_list; status = of_gpiochip_add(chip); if (status) - goto err_remove_chip; + goto err_free_gpiochip_mask; status = gpiochip_init_valid_mask(chip); if (status) @@ -1411,6 +1405,14 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, machine_gpiochip_add(chip); + status = gpiochip_irqchip_init_valid_mask(chip); + if (status) + goto err_remove_acpi_chip; + + status = gpiochip_add_irqchip(chip, lock_key, request_key); + if (status) + goto err_remove_irqchip_mask; + /* * By first adding the chardev, and then adding the device, * we get a device node entry in sysfs under @@ -1422,21 +1424,21 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, if (gpiolib_initialized) { status = gpiochip_setup_dev(gdev); if (status) - goto err_remove_acpi_chip; + goto err_remove_irqchip; } return 0; +err_remove_irqchip: + gpiochip_irqchip_remove(chip); +err_remove_irqchip_mask: + gpiochip_irqchip_free_valid_mask(chip); err_remove_acpi_chip: acpi_gpiochip_remove(chip); err_remove_of_chip: gpiochip_free_hogs(chip); of_gpiochip_remove(chip); -err_remove_chip: - gpiochip_irqchip_remove(chip); err_free_gpiochip_mask: gpiochip_free_valid_mask(chip); -err_remove_irqchip_mask: - gpiochip_irqchip_free_valid_mask(chip); err_remove_from_list: spin_lock_irqsave(&gpio_lock, flags); list_del(&gdev->list); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 4e4094f842e7..8b26c970a3cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1143,6 +1143,9 @@ static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, num_deps = chunk->length_dw * 4 / sizeof(struct drm_amdgpu_cs_chunk_sem); + if (p->post_deps) + return -EINVAL; + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), GFP_KERNEL); p->num_post_deps = 0; @@ -1166,8 +1169,7 @@ static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk - *chunk) + struct amdgpu_cs_chunk *chunk) { struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; unsigned num_deps; @@ -1177,6 +1179,9 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p num_deps = chunk->length_dw * 4 / sizeof(struct drm_amdgpu_cs_chunk_syncobj); + if (p->post_deps) + return -EINVAL; + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), GFP_KERNEL); p->num_post_deps = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 04b8ac4432c7..4ea67f94cae2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -604,6 +604,10 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) (adev->gfx.rlc_feature_version < 1) || !adev->gfx.rlc.is_rlc_v2_1) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + if (adev->pm.pp_feature & PP_GFXOFF_MASK) + adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_RLC_SMU_HS; break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 662612f89c70..9922bce3fd89 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -552,7 +552,6 @@ static int nv_common_early_init(void *handle) AMD_CG_SUPPORT_BIF_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | - AMD_PG_SUPPORT_MMHUB | AMD_PG_SUPPORT_ATHUB; adev->external_rev_id = adev->rev_id + 0x1; break; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 23265414d448..04fbf05d7176 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -992,11 +992,6 @@ static int soc15_common_early_init(void *handle) adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; } - - if (adev->pm.pp_feature & PP_GFXOFF_MASK) - adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | - AMD_PG_SUPPORT_CP | - AMD_PG_SUPPORT_RLC_SMU_HS; break; default: /* FIXME: not supported yet */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 4a29f72334d0..45be7a2132bb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3131,13 +3131,25 @@ static enum dc_color_depth convert_color_depth_from_display_info(const struct drm_connector *connector, const struct drm_connector_state *state) { - uint32_t bpc = connector->display_info.bpc; + uint8_t bpc = (uint8_t)connector->display_info.bpc; + + /* Assume 8 bpc by default if no bpc is specified. */ + bpc = bpc ? bpc : 8; if (!state) state = connector->state; if (state) { - bpc = state->max_bpc; + /* + * Cap display bpc based on the user requested value. + * + * The value for state->max_bpc may not correctly updated + * depending on when the connector gets added to the state + * or if this was called outside of atomic check, so it + * can't be used directly. + */ + bpc = min(bpc, state->max_requested_bpc); + /* Round down to the nearest even number. */ bpc = bpc - (bpc & 1); } diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index a0f52c86d8c7..a78b2e295895 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -907,8 +907,6 @@ struct smu_funcs ((smu)->funcs->register_irq_handler ? (smu)->funcs->register_irq_handler(smu) : 0) #define smu_set_azalia_d3_pme(smu) \ ((smu)->funcs->set_azalia_d3_pme ? (smu)->funcs->set_azalia_d3_pme((smu)) : 0) -#define smu_get_uclk_dpm_states(smu, clocks_in_khz, num_states) \ - ((smu)->ppt_funcs->get_uclk_dpm_states ? (smu)->ppt_funcs->get_uclk_dpm_states((smu), (clocks_in_khz), (num_states)) : 0) #define smu_get_max_sustainable_clocks_by_dc(smu, max_clocks) \ ((smu)->funcs->get_max_sustainable_clocks_by_dc ? (smu)->funcs->get_max_sustainable_clocks_by_dc((smu), (max_clocks)) : 0) #define smu_get_uclk_dpm_states(smu, clocks_in_khz, num_states) \ diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index 5fde5cf65b42..53097961bf2b 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -326,7 +326,8 @@ static int smu_v11_0_setup_pptable(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; const struct smc_firmware_header_v1_0 *hdr; int ret, index; - uint32_t size; + uint32_t size = 0; + uint16_t atom_table_size; uint8_t frev, crev; void *table; uint16_t version_major, version_minor; @@ -354,10 +355,11 @@ static int smu_v11_0_setup_pptable(struct smu_context *smu) index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, powerplayinfo); - ret = smu_get_atom_data_table(smu, index, (uint16_t *)&size, &frev, &crev, + ret = smu_get_atom_data_table(smu, index, &atom_table_size, &frev, &crev, (uint8_t **)&table); if (ret) return ret; + size = atom_table_size; } if (!smu->smu_table.power_play_table) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c index 5a118984de33..a0eabc134dd6 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c @@ -8,6 +8,7 @@ #include <linux/iommu.h> #include <linux/of_device.h> #include <linux/of_graph.h> +#include <linux/of_reserved_mem.h> #include <linux/platform_device.h> #include <linux/dma-mapping.h> #ifdef CONFIG_DEBUG_FS @@ -143,6 +144,12 @@ static int komeda_parse_dt(struct device *dev, struct komeda_dev *mdev) return mdev->irq; } + /* Get the optional framebuffer memory resource */ + ret = of_reserved_mem_device_init(dev); + if (ret && ret != -ENODEV) + return ret; + ret = 0; + for_each_available_child_of_node(np, child) { if (of_node_cmp(child->name, "pipeline") == 0) { ret = komeda_parse_pipe_dt(mdev, child); @@ -289,6 +296,8 @@ void komeda_dev_destroy(struct komeda_dev *mdev) mdev->n_pipelines = 0; + of_reserved_mem_device_release(dev); + if (funcs && funcs->cleanup) funcs->cleanup(mdev); diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.c b/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.c index cd4d9f53ddef..c9a1edb9a000 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.c @@ -35,6 +35,25 @@ komeda_get_format_caps(struct komeda_format_caps_table *table, return NULL; } +u32 komeda_get_afbc_format_bpp(const struct drm_format_info *info, u64 modifier) +{ + u32 bpp; + + switch (info->format) { + case DRM_FORMAT_YUV420_8BIT: + bpp = 12; + break; + case DRM_FORMAT_YUV420_10BIT: + bpp = 15; + break; + default: + bpp = info->cpp[0] * 8; + break; + } + + return bpp; +} + /* Two assumptions * 1. RGB always has YTR * 2. Tiled RGB always has SC diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.h b/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.h index 3631910d33b5..32273cf18f7c 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.h +++ b/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.h @@ -97,6 +97,9 @@ const struct komeda_format_caps * komeda_get_format_caps(struct komeda_format_caps_table *table, u32 fourcc, u64 modifier); +u32 komeda_get_afbc_format_bpp(const struct drm_format_info *info, + u64 modifier); + u32 *komeda_get_layer_fourcc_list(struct komeda_format_caps_table *table, u32 layer_type, u32 *n_fmts); diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c index 3b0a70ed6aa0..1b01a625f40e 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c @@ -43,7 +43,7 @@ komeda_fb_afbc_size_check(struct komeda_fb *kfb, struct drm_file *file, struct drm_framebuffer *fb = &kfb->base; const struct drm_format_info *info = fb->format; struct drm_gem_object *obj; - u32 alignment_w = 0, alignment_h = 0, alignment_header, n_blocks; + u32 alignment_w = 0, alignment_h = 0, alignment_header, n_blocks, bpp; u64 min_size; obj = drm_gem_object_lookup(file, mode_cmd->handles[0]); @@ -88,8 +88,9 @@ komeda_fb_afbc_size_check(struct komeda_fb *kfb, struct drm_file *file, kfb->offset_payload = ALIGN(n_blocks * AFBC_HEADER_SIZE, alignment_header); + bpp = komeda_get_afbc_format_bpp(info, fb->modifier); kfb->afbc_size = kfb->offset_payload + n_blocks * - ALIGN(info->cpp[0] * AFBC_SUPERBLK_PIXELS, + ALIGN(bpp * AFBC_SUPERBLK_PIXELS / 8, AFBC_SUPERBLK_ALIGNMENT); min_size = kfb->afbc_size + fb->offsets[0]; if (min_size > obj->size) { diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c index 419a8b0e5de8..d50e75f0b2bd 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c @@ -15,6 +15,7 @@ #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_irq.h> #include <drm/drm_vblank.h> +#include <drm/drm_probe_helper.h> #include "komeda_dev.h" #include "komeda_framebuffer.h" @@ -315,6 +316,8 @@ struct komeda_kms_dev *komeda_kms_attach(struct komeda_dev *mdev) drm->irq_enabled = true; + drm_kms_helper_poll_init(drm); + err = drm_dev_register(drm, 0); if (err) goto cleanup_mode_config; @@ -322,6 +325,7 @@ struct komeda_kms_dev *komeda_kms_attach(struct komeda_dev *mdev) return kms; cleanup_mode_config: + drm_kms_helper_poll_fini(drm); drm->irq_enabled = false; drm_mode_config_cleanup(drm); komeda_kms_cleanup_private_objs(kms); @@ -338,6 +342,7 @@ void komeda_kms_detach(struct komeda_kms_dev *kms) drm->irq_enabled = false; mdev->funcs->disable_irq(mdev); drm_dev_unregister(drm); + drm_kms_helper_poll_fini(drm); component_unbind_all(mdev->dev, drm); komeda_kms_cleanup_private_objs(kms); drm_mode_config_cleanup(drm); diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 7925a176f900..1cb1fa74cfbc 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -1465,8 +1465,8 @@ static void ddi_dotclock_get(struct intel_crtc_state *pipe_config) else if (intel_crtc_has_dp_encoder(pipe_config)) dotclock = intel_dotclock_calculate(pipe_config->port_clock, &pipe_config->dp_m_n); - else if (pipe_config->has_hdmi_sink && pipe_config->pipe_bpp == 36) - dotclock = pipe_config->port_clock * 2 / 3; + else if (pipe_config->has_hdmi_sink && pipe_config->pipe_bpp > 24) + dotclock = pipe_config->port_clock * 24 / pipe_config->pipe_bpp; else dotclock = pipe_config->port_clock; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 1d58f7ec5d84..f11979879e7b 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -829,7 +829,7 @@ struct intel_crtc_state { /* * Frequence the dpll for the port should run at. Differs from the - * adjusted dotclock e.g. for DP or 12bpc hdmi mode. This is also + * adjusted dotclock e.g. for DP or 10/12bpc hdmi mode. This is also * already multiplied by pixel_multiplier. */ int port_clock; diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 95fdbd0fbcac..945bc20f1d33 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -17,6 +17,7 @@ #include <linux/of_address.h> #include <linux/of_platform.h> #include <linux/pm_runtime.h> +#include <linux/dma-mapping.h> #include "mtk_drm_crtc.h" #include "mtk_drm_ddp.h" @@ -213,6 +214,7 @@ static int mtk_drm_kms_init(struct drm_device *drm) struct mtk_drm_private *private = drm->dev_private; struct platform_device *pdev; struct device_node *np; + struct device *dma_dev; int ret; if (!iommu_present(&platform_bus_type)) @@ -275,7 +277,29 @@ static int mtk_drm_kms_init(struct drm_device *drm) goto err_component_unbind; } - private->dma_dev = &pdev->dev; + dma_dev = &pdev->dev; + private->dma_dev = dma_dev; + + /* + * Configure the DMA segment size to make sure we get contiguous IOVA + * when importing PRIME buffers. + */ + if (!dma_dev->dma_parms) { + private->dma_parms_allocated = true; + dma_dev->dma_parms = + devm_kzalloc(drm->dev, sizeof(*dma_dev->dma_parms), + GFP_KERNEL); + } + if (!dma_dev->dma_parms) { + ret = -ENOMEM; + goto err_component_unbind; + } + + ret = dma_set_max_seg_size(dma_dev, (unsigned int)DMA_BIT_MASK(32)); + if (ret) { + dev_err(dma_dev, "Failed to set DMA segment size\n"); + goto err_unset_dma_parms; + } /* * We don't use the drm_irq_install() helpers provided by the DRM @@ -285,13 +309,16 @@ static int mtk_drm_kms_init(struct drm_device *drm) drm->irq_enabled = true; ret = drm_vblank_init(drm, MAX_CRTC); if (ret < 0) - goto err_component_unbind; + goto err_unset_dma_parms; drm_kms_helper_poll_init(drm); drm_mode_config_reset(drm); return 0; +err_unset_dma_parms: + if (private->dma_parms_allocated) + dma_dev->dma_parms = NULL; err_component_unbind: component_unbind_all(drm->dev, drm); err_config_cleanup: @@ -302,9 +329,14 @@ err_config_cleanup: static void mtk_drm_kms_deinit(struct drm_device *drm) { + struct mtk_drm_private *private = drm->dev_private; + drm_kms_helper_poll_fini(drm); drm_atomic_helper_shutdown(drm); + if (private->dma_parms_allocated) + private->dma_dev->dma_parms = NULL; + component_unbind_all(drm->dev, drm); drm_mode_config_cleanup(drm); } @@ -320,6 +352,18 @@ static const struct file_operations mtk_drm_fops = { .compat_ioctl = drm_compat_ioctl, }; +/* + * We need to override this because the device used to import the memory is + * not dev->dev, as drm_gem_prime_import() expects. + */ +struct drm_gem_object *mtk_drm_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + struct mtk_drm_private *private = dev->dev_private; + + return drm_gem_prime_import_dev(dev, dma_buf, private->dma_dev); +} + static struct drm_driver mtk_drm_driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME | DRIVER_ATOMIC, @@ -331,7 +375,7 @@ static struct drm_driver mtk_drm_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_export = drm_gem_prime_export, - .gem_prime_import = drm_gem_prime_import, + .gem_prime_import = mtk_drm_gem_prime_import, .gem_prime_get_sg_table = mtk_gem_prime_get_sg_table, .gem_prime_import_sg_table = mtk_gem_prime_import_sg_table, .gem_prime_mmap = mtk_drm_gem_mmap_buf, @@ -524,12 +568,15 @@ static int mtk_drm_probe(struct platform_device *pdev) comp = devm_kzalloc(dev, sizeof(*comp), GFP_KERNEL); if (!comp) { ret = -ENOMEM; + of_node_put(node); goto err_node; } ret = mtk_ddp_comp_init(dev, node, comp, comp_id, NULL); - if (ret) + if (ret) { + of_node_put(node); goto err_node; + } private->ddp_comp[comp_id] = comp; } diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.h b/drivers/gpu/drm/mediatek/mtk_drm_drv.h index 598ff3e70446..e03fea12ff59 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.h @@ -51,6 +51,8 @@ struct mtk_drm_private { } commit; struct drm_atomic_state *suspend_state; + + bool dma_parms_allocated; }; extern struct platform_driver mtk_ddp_driver; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c index b4e7404fe660..a11637b0f6cc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c @@ -40,8 +40,7 @@ nvkm_i2c_aux_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) u8 *ptr = msg->buf; while (remaining) { - u8 cnt = (remaining > 16) ? 16 : remaining; - u8 cmd; + u8 cnt, retries, cmd; if (msg->flags & I2C_M_RD) cmd = 1; @@ -51,10 +50,19 @@ nvkm_i2c_aux_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) if (mcnt || remaining > 16) cmd |= 4; /* MOT */ - ret = aux->func->xfer(aux, true, cmd, msg->addr, ptr, &cnt); - if (ret < 0) { - nvkm_i2c_aux_release(aux); - return ret; + for (retries = 0, cnt = 0; + retries < 32 && !cnt; + retries++) { + cnt = min_t(u8, remaining, 16); + ret = aux->func->xfer(aux, true, cmd, + msg->addr, ptr, &cnt); + if (ret < 0) + goto out; + } + if (!cnt) { + AUX_TRACE(aux, "no data after 32 retries"); + ret = -EIO; + goto out; } ptr += cnt; @@ -64,8 +72,10 @@ nvkm_i2c_aux_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) msg++; } + ret = num; +out: nvkm_i2c_aux_release(aux); - return num; + return ret; } static u32 diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c index 288c59dae56a..1bad0a2cc5c6 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.c +++ b/drivers/gpu/drm/omapdrm/omap_drv.c @@ -669,7 +669,7 @@ static int pdev_probe(struct platform_device *pdev) if (omapdss_is_initialized() == false) return -EPROBE_DEFER; - ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); + ret = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (ret) { dev_err(&pdev->dev, "Failed to set the DMA mask\n"); return ret; diff --git a/drivers/gpu/drm/rcar-du/rcar_lvds.c b/drivers/gpu/drm/rcar-du/rcar_lvds.c index 1c62578590f4..082d02c84024 100644 --- a/drivers/gpu/drm/rcar-du/rcar_lvds.c +++ b/drivers/gpu/drm/rcar-du/rcar_lvds.c @@ -673,10 +673,8 @@ static int rcar_lvds_parse_dt_companion(struct rcar_lvds *lvds) /* Locate the companion LVDS encoder for dual-link operation, if any. */ companion = of_parse_phandle(dev->of_node, "renesas,companion", 0); - if (!companion) { - dev_err(dev, "Companion LVDS encoder not found\n"); - return -ENXIO; - } + if (!companion) + return 0; /* * Sanity check: the companion encoder must have the same compatible diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 5f9505a087f6..23f358cb7f49 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -26,7 +26,7 @@ static unsigned long virt_to_hvpfn(void *addr) { - unsigned long paddr; + phys_addr_t paddr; if (is_vmalloc_addr(addr)) paddr = page_to_phys(vmalloc_to_page(addr)) + diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 362e70e9d145..fb16a622e8ab 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -146,8 +146,6 @@ struct hv_context { */ u64 guestid; - void *tsc_page; - struct hv_per_cpu_context __percpu *cpu_context; /* diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 19f1730a4f24..a68d0ccf67a4 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4724,10 +4724,14 @@ static int __init cma_init(void) if (ret) goto err; - cma_configfs_init(); + ret = cma_configfs_init(); + if (ret) + goto err_ib; return 0; +err_ib: + ib_unregister_client(&cma_client); err: unregister_netdevice_notifier(&cma_nb); ib_sa_unregister_client(&sa_client); diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index b79890739a2c..af8c85d18e62 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -149,13 +149,11 @@ static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, struct auto_mode_param *param = &counter->mode.param; bool match = true; - if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) + if (!rdma_is_visible_in_pid_ns(&qp->res)) return false; - /* Ensure that counter belong to right PID */ - if (!rdma_is_kernel_res(&counter->res) && - !rdma_is_kernel_res(&qp->res) && - (task_pid_vnr(counter->res.task) != current->pid)) + /* Ensure that counter belongs to the right PID */ + if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task)) return false; if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) @@ -424,7 +422,7 @@ static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) return qp; err: - rdma_restrack_put(&qp->res); + rdma_restrack_put(res); return NULL; } diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 87d40d1ecdde..020c26976558 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -382,8 +382,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device) for (i = 0; i < RDMA_RESTRACK_MAX; i++) { if (!names[i]) continue; - curr = rdma_restrack_count(device, i, - task_active_pid_ns(current)); + curr = rdma_restrack_count(device, i); ret = fill_res_info_entry(msg, names[i], curr); if (ret) goto err; diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index bddff426ee0f..a07665f7ef8c 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -107,10 +107,8 @@ void rdma_restrack_clean(struct ib_device *dev) * rdma_restrack_count() - the current usage of specific object * @dev: IB device * @type: actual type of object to operate - * @ns: PID namespace */ -int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, - struct pid_namespace *ns) +int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type) { struct rdma_restrack_root *rt = &dev->res[type]; struct rdma_restrack_entry *e; @@ -119,10 +117,9 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, xa_lock(&rt->xa); xas_for_each(&xas, e, U32_MAX) { - if (ns == &init_pid_ns || - (!rdma_is_kernel_res(e) && - ns == task_active_pid_ns(e->task))) - cnt++; + if (!rdma_is_visible_in_pid_ns(e)) + continue; + cnt++; } xa_unlock(&rt->xa); return cnt; @@ -360,5 +357,7 @@ bool rdma_is_visible_in_pid_ns(struct rdma_restrack_entry *res) */ if (rdma_is_kernel_res(res)) return task_active_pid_ns(current) == &init_pid_ns; - return task_active_pid_ns(current) == task_active_pid_ns(res->task); + + /* PID 0 means that resource is not found in current namespace */ + return task_pid_vnr(res->task); } diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 08da840ed7ee..56553668256f 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -379,14 +379,9 @@ EXPORT_SYMBOL(ib_umem_release); int ib_umem_page_count(struct ib_umem *umem) { - int i; - int n; + int i, n = 0; struct scatterlist *sg; - if (umem->is_odp) - return ib_umem_num_pages(umem); - - n = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) n += sg_dma_len(sg) >> PAGE_SHIFT; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 48b04d2f175f..60c8f76aab33 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -136,6 +136,13 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, spin_unlock_irqrestore(&cmdq->lock, flags); return -EBUSY; } + + size = req->cmd_size; + /* change the cmd_size to the number of 16byte cmdq unit. + * req->cmd_size is modified here + */ + bnxt_qplib_set_cmd_slots(req); + memset(resp, 0, sizeof(*resp)); crsqe->resp = (struct creq_qp_event *)resp; crsqe->resp->cookie = req->cookie; @@ -150,7 +157,6 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, cmdq_ptr = (struct bnxt_qplib_cmdqe **)cmdq->pbl_ptr; preq = (u8 *)req; - size = req->cmd_size * BNXT_QPLIB_CMDQE_UNITS; do { /* Locate the next cmdq slot */ sw_prod = HWQ_CMP(cmdq->prod, cmdq); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 2138533bb642..dfeadc192e17 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -55,9 +55,7 @@ do { \ memset(&(req), 0, sizeof((req))); \ (req).opcode = CMDQ_BASE_OPCODE_##CMD; \ - (req).cmd_size = (sizeof((req)) + \ - BNXT_QPLIB_CMDQE_UNITS - 1) / \ - BNXT_QPLIB_CMDQE_UNITS; \ + (req).cmd_size = sizeof((req)); \ (req).flags = cpu_to_le16(cmd_flags); \ } while (0) @@ -95,6 +93,13 @@ static inline u32 bnxt_qplib_cmdqe_cnt_per_pg(u32 depth) BNXT_QPLIB_CMDQE_UNITS); } +/* Set the cmd_size to a factor of CMDQE unit */ +static inline void bnxt_qplib_set_cmd_slots(struct cmdq_base *req) +{ + req->cmd_size = (req->cmd_size + BNXT_QPLIB_CMDQE_UNITS - 1) / + BNXT_QPLIB_CMDQE_UNITS; +} + #define MAX_CMDQ_IDX(depth) ((depth) - 1) static inline u32 bnxt_qplib_max_cmdq_idx_per_pg(u32 depth) diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c index 93613e5def9b..986c12153e62 100644 --- a/drivers/infiniband/hw/hfi1/fault.c +++ b/drivers/infiniband/hw/hfi1/fault.c @@ -141,12 +141,14 @@ static ssize_t fault_opcodes_write(struct file *file, const char __user *buf, if (!data) return -ENOMEM; copy = min(len, datalen - 1); - if (copy_from_user(data, buf, copy)) - return -EFAULT; + if (copy_from_user(data, buf, copy)) { + ret = -EFAULT; + goto free_data; + } ret = debugfs_file_get(file->f_path.dentry); if (unlikely(ret)) - return ret; + goto free_data; ptr = data; token = ptr; for (ptr = data; *ptr; ptr = end + 1, token = ptr) { @@ -195,6 +197,7 @@ static ssize_t fault_opcodes_write(struct file *file, const char __user *buf, ret = len; debugfs_file_put(file->f_path.dentry); +free_data: kfree(data); return ret; } @@ -214,7 +217,7 @@ static ssize_t fault_opcodes_read(struct file *file, char __user *buf, return -ENOMEM; ret = debugfs_file_get(file->f_path.dentry); if (unlikely(ret)) - return ret; + goto free_data; bit = find_first_bit(fault->opcodes, bitsize); while (bit < bitsize) { zero = find_next_zero_bit(fault->opcodes, bitsize, bit); @@ -232,6 +235,7 @@ static ssize_t fault_opcodes_read(struct file *file, char __user *buf, data[size - 1] = '\n'; data[size] = '\0'; ret = simple_read_from_buffer(buf, len, pos, data, size); +free_data: kfree(data); return ret; } diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 996fc298207e..6141f4edc6bf 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -2574,18 +2574,9 @@ void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp) hfi1_kern_clear_hw_flow(priv->rcd, qp); } -static bool tid_rdma_tid_err(struct hfi1_ctxtdata *rcd, - struct hfi1_packet *packet, u8 rcv_type, - u8 opcode) +static bool tid_rdma_tid_err(struct hfi1_packet *packet, u8 rcv_type) { struct rvt_qp *qp = packet->qp; - struct hfi1_qp_priv *qpriv = qp->priv; - u32 ipsn; - struct ib_other_headers *ohdr = packet->ohdr; - struct rvt_ack_entry *e; - struct tid_rdma_request *req; - struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); - u32 i; if (rcv_type >= RHF_RCV_TYPE_IB) goto done; @@ -2602,41 +2593,9 @@ static bool tid_rdma_tid_err(struct hfi1_ctxtdata *rcd, if (rcv_type == RHF_RCV_TYPE_EAGER) { hfi1_restart_rc(qp, qp->s_last_psn + 1, 1); hfi1_schedule_send(qp); - goto done_unlock; - } - - /* - * For TID READ response, error out QP after freeing the tid - * resources. - */ - if (opcode == TID_OP(READ_RESP)) { - ipsn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn)); - if (cmp_psn(ipsn, qp->s_last_psn) > 0 && - cmp_psn(ipsn, qp->s_psn) < 0) { - hfi1_kern_read_tid_flow_free(qp); - spin_unlock(&qp->s_lock); - rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); - goto done; - } - goto done_unlock; - } - - /* - * Error out the qp for TID RDMA WRITE - */ - hfi1_kern_clear_hw_flow(qpriv->rcd, qp); - for (i = 0; i < rvt_max_atomic(rdi); i++) { - e = &qp->s_ack_queue[i]; - if (e->opcode == TID_OP(WRITE_REQ)) { - req = ack_to_tid_req(e); - hfi1_kern_exp_rcv_clear_all(req); - } } - spin_unlock(&qp->s_lock); - rvt_rc_error(qp, IB_WC_LOC_LEN_ERR); - goto done; -done_unlock: + /* Since no payload is delivered, just drop the packet */ spin_unlock(&qp->s_lock); done: return true; @@ -2687,12 +2646,12 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, u32 fpsn; lockdep_assert_held(&qp->r_lock); + spin_lock(&qp->s_lock); /* If the psn is out of valid range, drop the packet */ if (cmp_psn(ibpsn, qp->s_last_psn) < 0 || cmp_psn(ibpsn, qp->s_psn) > 0) - return ret; + goto s_unlock; - spin_lock(&qp->s_lock); /* * Note that NAKs implicitly ACK outstanding SEND and RDMA write * requests and implicitly NAK RDMA read and atomic requests issued @@ -2740,9 +2699,12 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, wqe = do_rc_completion(qp, wqe, ibp); if (qp->s_acked == qp->s_tail) - break; + goto s_unlock; } + if (qp->s_acked == qp->s_tail) + goto s_unlock; + /* Handle the eflags for the request */ if (wqe->wr.opcode != IB_WR_TID_RDMA_READ) goto s_unlock; @@ -2922,7 +2884,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, if (lnh == HFI1_LRH_GRH) goto r_unlock; - if (tid_rdma_tid_err(rcd, packet, rcv_type, opcode)) + if (tid_rdma_tid_err(packet, rcv_type)) goto r_unlock; } @@ -2942,8 +2904,15 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, */ spin_lock(&qp->s_lock); qpriv = qp->priv; + if (qpriv->r_tid_tail == HFI1_QP_WQE_INVALID || + qpriv->r_tid_tail == qpriv->r_tid_head) + goto unlock; e = &qp->s_ack_queue[qpriv->r_tid_tail]; + if (e->opcode != TID_OP(WRITE_REQ)) + goto unlock; req = ack_to_tid_req(e); + if (req->comp_seg == req->cur_seg) + goto unlock; flow = &req->flows[req->clear_tail]; trace_hfi1_eflags_err_write(qp, rcv_type, rte, psn); trace_hfi1_rsp_handle_kdeth_eflags(qp, psn); @@ -4509,7 +4478,7 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) struct rvt_swqe *wqe; struct tid_rdma_request *req; struct tid_rdma_flow *flow; - u32 aeth, psn, req_psn, ack_psn, resync_psn, ack_kpsn; + u32 aeth, psn, req_psn, ack_psn, flpsn, resync_psn, ack_kpsn; unsigned long flags; u16 fidx; @@ -4538,6 +4507,9 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) ack_kpsn--; } + if (unlikely(qp->s_acked == qp->s_tail)) + goto ack_op_err; + wqe = rvt_get_swqe_ptr(qp, qp->s_acked); if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE) @@ -4550,7 +4522,8 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow); /* Drop stale ACK/NAK */ - if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0) + if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0 || + cmp_psn(req_psn, flow->flow_state.resp_ib_psn) < 0) goto ack_op_err; while (cmp_psn(ack_kpsn, @@ -4712,7 +4685,12 @@ done: switch ((aeth >> IB_AETH_CREDIT_SHIFT) & IB_AETH_CREDIT_MASK) { case 0: /* PSN sequence error */ + if (!req->flows) + break; flow = &req->flows[req->acked_tail]; + flpsn = full_flow_psn(flow, flow->flow_state.lpsn); + if (cmp_psn(psn, flpsn) > 0) + break; trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow); req->r_ack_psn = mask_psn(be32_to_cpu(ohdr->bth[2])); diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 68c951491a08..57079110af9b 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1677,8 +1677,6 @@ tx_err: tx_buf_size, DMA_TO_DEVICE); kfree(tun_qp->tx_ring[i].buf.addr); } - kfree(tun_qp->tx_ring); - tun_qp->tx_ring = NULL; i = MLX4_NUM_TUNNEL_BUFS; err: while (i > 0) { @@ -1687,6 +1685,8 @@ err: rx_buf_size, DMA_FROM_DEVICE); kfree(tun_qp->ring[i].addr); } + kfree(tun_qp->tx_ring); + tun_qp->tx_ring = NULL; kfree(tun_qp->ring); tun_qp->ring = NULL; return -ENOMEM; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e12a4404096b..0569bcab02d4 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1023,7 +1023,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - if (MLX5_CAP_GEN(mdev, pg)) + if (dev->odp_caps.general_caps & IB_ODP_SUPPORT) props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; props->odp_caps = dev->odp_caps; } @@ -6139,6 +6139,8 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->port[i].roce.last_port_state = IB_PORT_DOWN; } + mlx5_ib_internal_fill_odp_caps(dev); + err = mlx5_ib_init_multiport_master(dev); if (err) return err; @@ -6563,8 +6565,6 @@ static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev) static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) { - mlx5_ib_internal_fill_odp_caps(dev); - return mlx5_ib_odp_init_one(dev); } diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index fe1a76d8531c..a40e0abf2338 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -57,9 +57,10 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int entry; if (umem->is_odp) { - unsigned int page_shift = to_ib_umem_odp(umem)->page_shift; + struct ib_umem_odp *odp = to_ib_umem_odp(umem); + unsigned int page_shift = odp->page_shift; - *ncont = ib_umem_page_count(umem); + *ncont = ib_umem_odp_num_pages(odp); *count = *ncont << (page_shift - PAGE_SHIFT); *shift = page_shift; if (order) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index f6a53455bf8b..9ae587b74b12 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1475,4 +1475,18 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, bool dyn_bfreg); int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); + +static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, + bool do_modify_atomic) +{ + if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) + return false; + + if (do_modify_atomic && + MLX5_CAP_GEN(dev->mdev, atomic) && + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) + return false; + + return true; +} #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b74fad08412f..3401f5f6792e 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1293,9 +1293,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (err < 0) return ERR_PTR(err); - use_umr = !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled) && - (!MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled) || - !MLX5_CAP_GEN(dev->mdev, atomic)); + use_umr = mlx5_ib_can_use_umr(dev, true); if (order <= mr_cache_max_order(dev) && use_umr) { mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, @@ -1448,7 +1446,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, goto err; } - if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) { + if (!mlx5_ib_can_use_umr(dev, true) || + (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len))) { /* * UMR can't be used - MKey needs to be replaced. */ diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 1d257d1b3b0d..0a59912a4cef 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -301,7 +301,8 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) memset(caps, 0, sizeof(*caps)); - if (!MLX5_CAP_GEN(dev->mdev, pg)) + if (!MLX5_CAP_GEN(dev->mdev, pg) || + !mlx5_ib_can_use_umr(dev, true)) return; caps->general_caps = IB_ODP_SUPPORT; @@ -355,7 +356,8 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) && MLX5_CAP_GEN(dev->mdev, null_mkey) && - MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) + MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) && + !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled)) caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT; return; @@ -1622,8 +1624,10 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) { int ret = 0; - if (dev->odp_caps.general_caps & IB_ODP_SUPPORT) - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops); + if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT)) + return ret; + + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops); if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) { ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey); @@ -1633,9 +1637,6 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) } } - if (!MLX5_CAP_GEN(dev->mdev, pg)) - return ret; - ret = mlx5_ib_create_pf_eq(dev, &dev->odp_pf_eq); return ret; @@ -1643,7 +1644,7 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev) { - if (!MLX5_CAP_GEN(dev->mdev, pg)) + if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT)) return; mlx5_ib_destroy_pf_eq(dev, &dev->odp_pf_eq); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 379328b2598f..72869ff4a334 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4162,7 +4162,7 @@ static u64 get_xlt_octo(u64 bytes) MLX5_IB_UMR_OCTOWORD; } -static __be64 frwr_mkey_mask(void) +static __be64 frwr_mkey_mask(bool atomic) { u64 result; @@ -4175,10 +4175,12 @@ static __be64 frwr_mkey_mask(void) MLX5_MKEY_MASK_LW | MLX5_MKEY_MASK_RR | MLX5_MKEY_MASK_RW | - MLX5_MKEY_MASK_A | MLX5_MKEY_MASK_SMALL_FENCE | MLX5_MKEY_MASK_FREE; + if (atomic) + result |= MLX5_MKEY_MASK_A; + return cpu_to_be64(result); } @@ -4204,7 +4206,7 @@ static __be64 sig_mkey_mask(void) } static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, - struct mlx5_ib_mr *mr, u8 flags) + struct mlx5_ib_mr *mr, u8 flags, bool atomic) { int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; @@ -4212,7 +4214,7 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, umr->flags = flags; umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); - umr->mkey_mask = frwr_mkey_mask(); + umr->mkey_mask = frwr_mkey_mask(atomic); } static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr) @@ -4811,10 +4813,22 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, { struct mlx5_ib_mr *mr = to_mmr(wr->mr); struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); + struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device); int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; + bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC; u8 flags = 0; + if (!mlx5_ib_can_use_umr(dev, atomic)) { + mlx5_ib_warn(to_mdev(qp->ibqp.device), + "Fast update of %s for MR is disabled\n", + (MLX5_CAP_GEN(dev->mdev, + umr_modify_entity_size_disabled)) ? + "entity size" : + "atomic access"); + return -EINVAL; + } + if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { mlx5_ib_warn(to_mdev(qp->ibqp.device), "Invalid IB_SEND_INLINE send flag\n"); @@ -4826,7 +4840,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, if (umr_inline) flags |= MLX5_UMR_INLINE; - set_reg_umr_seg(*seg, mr, flags); + set_reg_umr_seg(*seg, mr, flags, atomic); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; handle_post_send_edge(&qp->sq, seg, *size, cur_edge); diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 77b1aabf6ff3..dba4535494ab 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -138,9 +138,9 @@ struct siw_umem { }; struct siw_pble { - u64 addr; /* Address of assigned user buffer */ - u64 size; /* Size of this entry */ - u64 pbl_off; /* Total offset from start of PBL */ + dma_addr_t addr; /* Address of assigned buffer */ + unsigned int size; /* Size of this entry */ + unsigned long pbl_off; /* Total offset from start of PBL */ }; struct siw_pbl { @@ -734,7 +734,7 @@ static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len) "MEM[0x%08x] %s: " fmt, mem->stag, __func__, ##__VA_ARGS__) #define siw_dbg_cep(cep, fmt, ...) \ - ibdev_dbg(&cep->sdev->base_dev, "CEP[0x%p] %s: " fmt, \ + ibdev_dbg(&cep->sdev->base_dev, "CEP[0x%pK] %s: " fmt, \ cep, __func__, ##__VA_ARGS__) void siw_cq_flush(struct siw_cq *cq); diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 9ce8a1b925d2..1db5ad3d9580 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -355,8 +355,8 @@ static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason, getname_local(cep->sock, &event.local_addr); getname_peer(cep->sock, &event.remote_addr); } - siw_dbg_cep(cep, "[QP %u]: id 0x%p, reason=%d, status=%d\n", - cep->qp ? qp_id(cep->qp) : -1, id, reason, status); + siw_dbg_cep(cep, "[QP %u]: reason=%d, status=%d\n", + cep->qp ? qp_id(cep->qp) : UINT_MAX, reason, status); return id->event_handler(id, &event); } @@ -947,8 +947,6 @@ static void siw_accept_newconn(struct siw_cep *cep) siw_cep_get(new_cep); new_s->sk->sk_user_data = new_cep; - siw_dbg_cep(cep, "listen socket 0x%p, new 0x%p\n", s, new_s); - if (siw_tcp_nagle == false) { int val = 1; @@ -1011,7 +1009,8 @@ static void siw_cm_work_handler(struct work_struct *w) cep = work->cep; siw_dbg_cep(cep, "[QP %u]: work type: %d, state %d\n", - cep->qp ? qp_id(cep->qp) : -1, work->type, cep->state); + cep->qp ? qp_id(cep->qp) : UINT_MAX, + work->type, cep->state); siw_cep_set_inuse(cep); @@ -1145,9 +1144,9 @@ static void siw_cm_work_handler(struct work_struct *w) } if (release_cep) { siw_dbg_cep(cep, - "release: timer=%s, QP[%u], id 0x%p\n", + "release: timer=%s, QP[%u]\n", cep->mpa_timer ? "y" : "n", - cep->qp ? qp_id(cep->qp) : -1, cep->cm_id); + cep->qp ? qp_id(cep->qp) : UINT_MAX); siw_cancel_mpatimer(cep); @@ -1211,8 +1210,8 @@ int siw_cm_queue_work(struct siw_cep *cep, enum siw_work_type type) else delay = MPAREP_TIMEOUT; } - siw_dbg_cep(cep, "[QP %u]: work type: %d, work 0x%p, timeout %lu\n", - cep->qp ? qp_id(cep->qp) : -1, type, work, delay); + siw_dbg_cep(cep, "[QP %u]: work type: %d, timeout %lu\n", + cep->qp ? qp_id(cep->qp) : -1, type, delay); queue_delayed_work(siw_cm_wq, &work->work, delay); @@ -1376,16 +1375,16 @@ int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) } if (v4) siw_dbg_qp(qp, - "id 0x%p, pd_len %d, laddr %pI4 %d, raddr %pI4 %d\n", - id, pd_len, + "pd_len %d, laddr %pI4 %d, raddr %pI4 %d\n", + pd_len, &((struct sockaddr_in *)(laddr))->sin_addr, ntohs(((struct sockaddr_in *)(laddr))->sin_port), &((struct sockaddr_in *)(raddr))->sin_addr, ntohs(((struct sockaddr_in *)(raddr))->sin_port)); else siw_dbg_qp(qp, - "id 0x%p, pd_len %d, laddr %pI6 %d, raddr %pI6 %d\n", - id, pd_len, + "pd_len %d, laddr %pI6 %d, raddr %pI6 %d\n", + pd_len, &((struct sockaddr_in6 *)(laddr))->sin6_addr, ntohs(((struct sockaddr_in6 *)(laddr))->sin6_port), &((struct sockaddr_in6 *)(raddr))->sin6_addr, @@ -1508,14 +1507,13 @@ int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) if (rv >= 0) { rv = siw_cm_queue_work(cep, SIW_CM_WORK_MPATIMEOUT); if (!rv) { - siw_dbg_cep(cep, "id 0x%p, [QP %u]: exit\n", id, - qp_id(qp)); + siw_dbg_cep(cep, "[QP %u]: exit\n", qp_id(qp)); siw_cep_set_free(cep); return 0; } } error: - siw_dbg_qp(qp, "failed: %d\n", rv); + siw_dbg(id->device, "failed: %d\n", rv); if (cep) { siw_socket_disassoc(s); @@ -1540,7 +1538,8 @@ error: } else if (s) { sock_release(s); } - siw_qp_put(qp); + if (qp) + siw_qp_put(qp); return rv; } @@ -1580,7 +1579,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) siw_cancel_mpatimer(cep); if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) { - siw_dbg_cep(cep, "id 0x%p: out of state\n", id); + siw_dbg_cep(cep, "out of state\n"); siw_cep_set_free(cep); siw_cep_put(cep); @@ -1601,7 +1600,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) up_write(&qp->state_lock); goto error; } - siw_dbg_cep(cep, "id 0x%p\n", id); + siw_dbg_cep(cep, "[QP %d]\n", params->qpn); if (try_gso && cep->mpa.hdr.params.bits & MPA_RR_FLAG_GSO_EXP) { siw_dbg_cep(cep, "peer allows GSO on TX\n"); @@ -1611,8 +1610,8 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) params->ird > sdev->attrs.max_ird) { siw_dbg_cep( cep, - "id 0x%p, [QP %u]: ord %d (max %d), ird %d (max %d)\n", - id, qp_id(qp), params->ord, sdev->attrs.max_ord, + "[QP %u]: ord %d (max %d), ird %d (max %d)\n", + qp_id(qp), params->ord, sdev->attrs.max_ord, params->ird, sdev->attrs.max_ird); rv = -EINVAL; up_write(&qp->state_lock); @@ -1624,8 +1623,8 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) if (params->private_data_len > max_priv_data) { siw_dbg_cep( cep, - "id 0x%p, [QP %u]: private data length: %d (max %d)\n", - id, qp_id(qp), params->private_data_len, max_priv_data); + "[QP %u]: private data length: %d (max %d)\n", + qp_id(qp), params->private_data_len, max_priv_data); rv = -EINVAL; up_write(&qp->state_lock); goto error; @@ -1679,7 +1678,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) qp_attrs.flags = SIW_MPA_CRC; qp_attrs.state = SIW_QP_STATE_RTS; - siw_dbg_cep(cep, "id 0x%p, [QP%u]: moving to rts\n", id, qp_id(qp)); + siw_dbg_cep(cep, "[QP%u]: moving to rts\n", qp_id(qp)); /* Associate QP with CEP */ siw_cep_get(cep); @@ -1700,8 +1699,8 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) if (rv) goto error; - siw_dbg_cep(cep, "id 0x%p, [QP %u]: send mpa reply, %d byte pdata\n", - id, qp_id(qp), params->private_data_len); + siw_dbg_cep(cep, "[QP %u]: send mpa reply, %d byte pdata\n", + qp_id(qp), params->private_data_len); rv = siw_send_mpareqrep(cep, params->private_data, params->private_data_len); @@ -1759,14 +1758,14 @@ int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len) siw_cancel_mpatimer(cep); if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) { - siw_dbg_cep(cep, "id 0x%p: out of state\n", id); + siw_dbg_cep(cep, "out of state\n"); siw_cep_set_free(cep); siw_cep_put(cep); /* put last reference */ return -ECONNRESET; } - siw_dbg_cep(cep, "id 0x%p, cep->state %d, pd_len %d\n", id, cep->state, + siw_dbg_cep(cep, "cep->state %d, pd_len %d\n", cep->state, pd_len); if (__mpa_rr_revision(cep->mpa.hdr.params.bits) >= MPA_REVISION_1) { @@ -1804,14 +1803,14 @@ static int siw_listen_address(struct iw_cm_id *id, int backlog, rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&s_val, sizeof(s_val)); if (rv) { - siw_dbg(id->device, "id 0x%p: setsockopt error: %d\n", id, rv); + siw_dbg(id->device, "setsockopt error: %d\n", rv); goto error; } rv = s->ops->bind(s, laddr, addr_family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6)); if (rv) { - siw_dbg(id->device, "id 0x%p: socket bind error: %d\n", id, rv); + siw_dbg(id->device, "socket bind error: %d\n", rv); goto error; } cep = siw_cep_alloc(sdev); @@ -1824,13 +1823,13 @@ static int siw_listen_address(struct iw_cm_id *id, int backlog, rv = siw_cm_alloc_work(cep, backlog); if (rv) { siw_dbg(id->device, - "id 0x%p: alloc_work error %d, backlog %d\n", id, + "alloc_work error %d, backlog %d\n", rv, backlog); goto error; } rv = s->ops->listen(s, backlog); if (rv) { - siw_dbg(id->device, "id 0x%p: listen error %d\n", id, rv); + siw_dbg(id->device, "listen error %d\n", rv); goto error; } cep->cm_id = id; @@ -1914,8 +1913,7 @@ static void siw_drop_listeners(struct iw_cm_id *id) list_del(p); - siw_dbg_cep(cep, "id 0x%p: drop cep, state %d\n", id, - cep->state); + siw_dbg_cep(cep, "drop cep, state %d\n", cep->state); siw_cep_set_inuse(cep); @@ -1952,7 +1950,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) struct net_device *dev = to_siw_dev(id->device)->netdev; int rv = 0, listeners = 0; - siw_dbg(id->device, "id 0x%p: backlog %d\n", id, backlog); + siw_dbg(id->device, "backlog %d\n", backlog); /* * For each attached address of the interface, create a @@ -1968,8 +1966,8 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) s_raddr = (struct sockaddr_in *)&id->remote_addr; siw_dbg(id->device, - "id 0x%p: laddr %pI4:%d, raddr %pI4:%d\n", - id, &s_laddr.sin_addr, ntohs(s_laddr.sin_port), + "laddr %pI4:%d, raddr %pI4:%d\n", + &s_laddr.sin_addr, ntohs(s_laddr.sin_port), &s_raddr->sin_addr, ntohs(s_raddr->sin_port)); rtnl_lock(); @@ -1994,8 +1992,8 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) *s_raddr = &to_sockaddr_in6(id->remote_addr); siw_dbg(id->device, - "id 0x%p: laddr %pI6:%d, raddr %pI6:%d\n", - id, &s_laddr->sin6_addr, ntohs(s_laddr->sin6_port), + "laddr %pI6:%d, raddr %pI6:%d\n", + &s_laddr->sin6_addr, ntohs(s_laddr->sin6_port), &s_raddr->sin6_addr, ntohs(s_raddr->sin6_port)); read_lock_bh(&in6_dev->lock); @@ -2028,17 +2026,15 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) else if (!rv) rv = -EINVAL; - siw_dbg(id->device, "id 0x%p: %s\n", id, rv ? "FAIL" : "OK"); + siw_dbg(id->device, "%s\n", rv ? "FAIL" : "OK"); return rv; } int siw_destroy_listen(struct iw_cm_id *id) { - siw_dbg(id->device, "id 0x%p\n", id); - if (!id->provider_data) { - siw_dbg(id->device, "id 0x%p: no cep(s)\n", id); + siw_dbg(id->device, "no cep(s)\n"); return 0; } siw_drop_listeners(id); diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c index e381ae9b7d62..d8db3bee9da7 100644 --- a/drivers/infiniband/sw/siw/siw_cq.c +++ b/drivers/infiniband/sw/siw/siw_cq.c @@ -71,9 +71,10 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) wc->wc_flags = IB_WC_WITH_INVALIDATE; } wc->qp = cqe->base_qp; - siw_dbg_cq(cq, "idx %u, type %d, flags %2x, id 0x%p\n", + siw_dbg_cq(cq, + "idx %u, type %d, flags %2x, id 0x%pK\n", cq->cq_get % cq->num_cqe, cqe->opcode, - cqe->flags, (void *)cqe->id); + cqe->flags, (void *)(uintptr_t)cqe->id); } WRITE_ONCE(cqe->flags, 0); cq->cq_get++; diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c index 67171c82b0c4..87a56039f0ef 100644 --- a/drivers/infiniband/sw/siw/siw_mem.c +++ b/drivers/infiniband/sw/siw/siw_mem.c @@ -197,12 +197,12 @@ int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr, */ if (addr < mem->va || addr + len > mem->va + mem->len) { siw_dbg_pd(pd, "MEM interval len %d\n", len); - siw_dbg_pd(pd, "[0x%016llx, 0x%016llx] out of bounds\n", - (unsigned long long)addr, - (unsigned long long)(addr + len)); - siw_dbg_pd(pd, "[0x%016llx, 0x%016llx] STag=0x%08x\n", - (unsigned long long)mem->va, - (unsigned long long)(mem->va + mem->len), + siw_dbg_pd(pd, "[0x%pK, 0x%pK] out of bounds\n", + (void *)(uintptr_t)addr, + (void *)(uintptr_t)(addr + len)); + siw_dbg_pd(pd, "[0x%pK, 0x%pK] STag=0x%08x\n", + (void *)(uintptr_t)mem->va, + (void *)(uintptr_t)(mem->va + mem->len), mem->stag); return -E_BASE_BOUNDS; @@ -330,7 +330,7 @@ out: * Optionally, provides remaining len within current element, and * current PBL index for later resume at same element. */ -u64 siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx) +dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx) { int i = idx ? *idx : 0; diff --git a/drivers/infiniband/sw/siw/siw_mem.h b/drivers/infiniband/sw/siw/siw_mem.h index f43daf280891..db138c8423da 100644 --- a/drivers/infiniband/sw/siw/siw_mem.h +++ b/drivers/infiniband/sw/siw/siw_mem.h @@ -9,7 +9,7 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable); void siw_umem_release(struct siw_umem *umem, bool dirty); struct siw_pbl *siw_pbl_alloc(u32 num_buf); -u64 siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx); +dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx); struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index); int siw_mem_add(struct siw_device *sdev, struct siw_mem *m); int siw_invalidate_stag(struct ib_pd *pd, u32 stag); diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 0990307c5d2c..430314c8abd9 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -949,7 +949,7 @@ skip_irq: rv = -EINVAL; goto out; } - wqe->sqe.sge[0].laddr = (u64)&wqe->sqe.sge[1]; + wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1]; wqe->sqe.sge[0].lkey = 0; wqe->sqe.num_sge = 1; } diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index f87657a11657..c0a887240325 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -38,9 +38,10 @@ static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem, p = siw_get_upage(umem, dest_addr); if (unlikely(!p)) { - pr_warn("siw: %s: [QP %u]: bogus addr: %p, %p\n", + pr_warn("siw: %s: [QP %u]: bogus addr: %pK, %pK\n", __func__, qp_id(rx_qp(srx)), - (void *)dest_addr, (void *)umem->fp_addr); + (void *)(uintptr_t)dest_addr, + (void *)(uintptr_t)umem->fp_addr); /* siw internal error */ srx->skb_copied += copied; srx->skb_new -= copied; @@ -50,7 +51,7 @@ static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem, pg_off = dest_addr & ~PAGE_MASK; bytes = min(len, (int)PAGE_SIZE - pg_off); - siw_dbg_qp(rx_qp(srx), "page %p, bytes=%u\n", p, bytes); + siw_dbg_qp(rx_qp(srx), "page %pK, bytes=%u\n", p, bytes); dest = kmap_atomic(p); rv = skb_copy_bits(srx->skb, srx->skb_offset, dest + pg_off, @@ -104,11 +105,11 @@ static int siw_rx_kva(struct siw_rx_stream *srx, void *kva, int len) { int rv; - siw_dbg_qp(rx_qp(srx), "kva: 0x%p, len: %u\n", kva, len); + siw_dbg_qp(rx_qp(srx), "kva: 0x%pK, len: %u\n", kva, len); rv = skb_copy_bits(srx->skb, srx->skb_offset, kva, len); if (unlikely(rv)) { - pr_warn("siw: [QP %u]: %s, len %d, kva 0x%p, rv %d\n", + pr_warn("siw: [QP %u]: %s, len %d, kva 0x%pK, rv %d\n", qp_id(rx_qp(srx)), __func__, len, kva, rv); return rv; @@ -132,7 +133,7 @@ static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx, while (len) { int bytes; - u64 buf_addr = + dma_addr_t buf_addr = siw_pbl_get_buffer(pbl, offset, &bytes, pbl_idx); if (!buf_addr) break; @@ -485,8 +486,8 @@ int siw_proc_send(struct siw_qp *qp) mem_p = *mem; if (mem_p->mem_obj == NULL) rv = siw_rx_kva(srx, - (void *)(sge->laddr + frx->sge_off), - sge_bytes); + (void *)(uintptr_t)(sge->laddr + frx->sge_off), + sge_bytes); else if (!mem_p->is_pbl) rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + frx->sge_off, sge_bytes); @@ -598,8 +599,8 @@ int siw_proc_write(struct siw_qp *qp) if (mem->mem_obj == NULL) rv = siw_rx_kva(srx, - (void *)(srx->ddp_to + srx->fpdu_part_rcvd), - bytes); + (void *)(uintptr_t)(srx->ddp_to + srx->fpdu_part_rcvd), + bytes); else if (!mem->is_pbl) rv = siw_rx_umem(srx, mem->umem, srx->ddp_to + srx->fpdu_part_rcvd, bytes); @@ -841,8 +842,9 @@ int siw_proc_rresp(struct siw_qp *qp) bytes = min(srx->fpdu_part_rem, srx->skb_new); if (mem_p->mem_obj == NULL) - rv = siw_rx_kva(srx, (void *)(sge->laddr + wqe->processed), - bytes); + rv = siw_rx_kva(srx, + (void *)(uintptr_t)(sge->laddr + wqe->processed), + bytes); else if (!mem_p->is_pbl) rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + wqe->processed, bytes); diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 43020d2040fc..438a2917a47c 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -26,7 +26,7 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx) { struct siw_pbl *pbl = mem->pbl; u64 offset = addr - mem->va; - u64 paddr = siw_pbl_get_buffer(pbl, offset, NULL, idx); + dma_addr_t paddr = siw_pbl_get_buffer(pbl, offset, NULL, idx); if (paddr) return virt_to_page(paddr); @@ -37,7 +37,7 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx) /* * Copy short payload at provided destination payload address */ -static int siw_try_1seg(struct siw_iwarp_tx *c_tx, u64 paddr) +static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr) { struct siw_wqe *wqe = &c_tx->wqe_active; struct siw_sge *sge = &wqe->sqe.sge[0]; @@ -50,16 +50,16 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, u64 paddr) return 0; if (tx_flags(wqe) & SIW_WQE_INLINE) { - memcpy((void *)paddr, &wqe->sqe.sge[1], bytes); + memcpy(paddr, &wqe->sqe.sge[1], bytes); } else { struct siw_mem *mem = wqe->mem[0]; if (!mem->mem_obj) { /* Kernel client using kva */ - memcpy((void *)paddr, (void *)sge->laddr, bytes); + memcpy(paddr, + (const void *)(uintptr_t)sge->laddr, bytes); } else if (c_tx->in_syscall) { - if (copy_from_user((void *)paddr, - (const void __user *)sge->laddr, + if (copy_from_user(paddr, u64_to_user_ptr(sge->laddr), bytes)) return -EFAULT; } else { @@ -79,12 +79,12 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, u64 paddr) buffer = kmap_atomic(p); if (likely(PAGE_SIZE - off >= bytes)) { - memcpy((void *)paddr, buffer + off, bytes); + memcpy(paddr, buffer + off, bytes); kunmap_atomic(buffer); } else { unsigned long part = bytes - (PAGE_SIZE - off); - memcpy((void *)paddr, buffer + off, part); + memcpy(paddr, buffer + off, part); kunmap_atomic(buffer); if (!mem->is_pbl) @@ -98,7 +98,7 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, u64 paddr) return -EFAULT; buffer = kmap_atomic(p); - memcpy((void *)(paddr + part), buffer, + memcpy(paddr + part, buffer, bytes - part); kunmap_atomic(buffer); } @@ -166,7 +166,7 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) c_tx->ctrl_len = sizeof(struct iwarp_send); crc = (char *)&c_tx->pkt.send_pkt.crc; - data = siw_try_1seg(c_tx, (u64)crc); + data = siw_try_1seg(c_tx, crc); break; case SIW_OP_SEND_REMOTE_INV: @@ -189,7 +189,7 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) c_tx->ctrl_len = sizeof(struct iwarp_send_inv); crc = (char *)&c_tx->pkt.send_pkt.crc; - data = siw_try_1seg(c_tx, (u64)crc); + data = siw_try_1seg(c_tx, crc); break; case SIW_OP_WRITE: @@ -201,7 +201,7 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) c_tx->ctrl_len = sizeof(struct iwarp_rdma_write); crc = (char *)&c_tx->pkt.write_pkt.crc; - data = siw_try_1seg(c_tx, (u64)crc); + data = siw_try_1seg(c_tx, crc); break; case SIW_OP_READ_RESPONSE: @@ -216,7 +216,7 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) c_tx->ctrl_len = sizeof(struct iwarp_rdma_rresp); crc = (char *)&c_tx->pkt.write_pkt.crc; - data = siw_try_1seg(c_tx, (u64)crc); + data = siw_try_1seg(c_tx, crc); break; default: @@ -398,15 +398,13 @@ static int siw_0copy_tx(struct socket *s, struct page **page, #define MAX_TRAILER (MPA_CRC_SIZE + 4) -static void siw_unmap_pages(struct page **pages, int hdr_len, int num_maps) +static void siw_unmap_pages(struct page **pp, unsigned long kmap_mask) { - if (hdr_len) { - ++pages; - --num_maps; - } - while (num_maps-- > 0) { - kunmap(*pages); - pages++; + while (kmap_mask) { + if (kmap_mask & BIT(0)) + kunmap(*pp); + pp++; + kmap_mask >>= 1; } } @@ -437,6 +435,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) unsigned int data_len = c_tx->bytes_unsent, hdr_len = 0, trl_len = 0, sge_off = c_tx->sge_off, sge_idx = c_tx->sge_idx, pbl_idx = c_tx->pbl_idx; + unsigned long kmap_mask = 0L; if (c_tx->state == SIW_SEND_HDR) { if (c_tx->use_sendpage) { @@ -463,8 +462,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) if (!(tx_flags(wqe) & SIW_WQE_INLINE)) { mem = wqe->mem[sge_idx]; - if (!mem->mem_obj) - is_kva = 1; + is_kva = mem->mem_obj == NULL ? 1 : 0; } else { is_kva = 1; } @@ -473,7 +471,8 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) * tx from kernel virtual address: either inline data * or memory region with assigned kernel buffer */ - iov[seg].iov_base = (void *)(sge->laddr + sge_off); + iov[seg].iov_base = + (void *)(uintptr_t)(sge->laddr + sge_off); iov[seg].iov_len = sge_len; if (do_crc) @@ -500,12 +499,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) p = siw_get_upage(mem->umem, sge->laddr + sge_off); if (unlikely(!p)) { - if (hdr_len) - seg--; - if (!c_tx->use_sendpage && seg) { - siw_unmap_pages(page_array, - hdr_len, seg); - } + siw_unmap_pages(page_array, kmap_mask); wqe->processed -= c_tx->bytes_unsent; rv = -EFAULT; goto done_crc; @@ -515,6 +509,10 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) if (!c_tx->use_sendpage) { iov[seg].iov_base = kmap(p) + fp_off; iov[seg].iov_len = plen; + + /* Remember for later kunmap() */ + kmap_mask |= BIT(seg); + if (do_crc) crypto_shash_update( c_tx->mpa_crc_hd, @@ -526,13 +524,13 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) page_address(p) + fp_off, plen); } else { - u64 pa = ((sge->laddr + sge_off) & PAGE_MASK); + u64 va = sge->laddr + sge_off; - page_array[seg] = virt_to_page(pa); + page_array[seg] = virt_to_page(va & PAGE_MASK); if (do_crc) crypto_shash_update( c_tx->mpa_crc_hd, - (void *)(sge->laddr + sge_off), + (void *)(uintptr_t)va, plen); } @@ -543,10 +541,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) if (++seg > (int)MAX_ARRAY) { siw_dbg_qp(tx_qp(c_tx), "to many fragments\n"); - if (!is_kva && !c_tx->use_sendpage) { - siw_unmap_pages(page_array, hdr_len, - seg - 1); - } + siw_unmap_pages(page_array, kmap_mask); wqe->processed -= c_tx->bytes_unsent; rv = -EMSGSIZE; goto done_crc; @@ -597,8 +592,7 @@ sge_done: } else { rv = kernel_sendmsg(s, &msg, iov, seg + 1, hdr_len + data_len + trl_len); - if (!is_kva) - siw_unmap_pages(page_array, hdr_len, seg); + siw_unmap_pages(page_array, kmap_mask); } if (rv < (int)hdr_len) { /* Not even complete hdr pushed or negative rv */ @@ -829,7 +823,8 @@ static int siw_qp_sq_proc_tx(struct siw_qp *qp, struct siw_wqe *wqe) rv = -EINVAL; goto tx_error; } - wqe->sqe.sge[0].laddr = (u64)&wqe->sqe.sge[1]; + wqe->sqe.sge[0].laddr = + (u64)(uintptr_t)&wqe->sqe.sge[1]; } } wqe->wr_status = SIW_WR_INPROGRESS; @@ -924,7 +919,7 @@ tx_error: static int siw_fastreg_mr(struct ib_pd *pd, struct siw_sqe *sqe) { - struct ib_mr *base_mr = (struct ib_mr *)sqe->base_mr; + struct ib_mr *base_mr = (struct ib_mr *)(uintptr_t)sqe->base_mr; struct siw_device *sdev = to_siw_dev(pd->device); struct siw_mem *mem = siw_mem_id2obj(sdev, sqe->rkey >> 8); int rv = 0; @@ -954,8 +949,7 @@ static int siw_fastreg_mr(struct ib_pd *pd, struct siw_sqe *sqe) mem->stag = sqe->rkey; mem->perms = sqe->access; - siw_dbg_mem(mem, "STag now valid, MR va: 0x%016llx -> 0x%016llx\n", - mem->va, base_mr->iova); + siw_dbg_mem(mem, "STag 0x%08x now valid\n", sqe->rkey); mem->va = base_mr->iova; mem->stag_valid = 1; out: diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index e7f3a2379d9d..da52c90e06d4 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -424,8 +424,7 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, */ qp->srq = to_siw_srq(attrs->srq); qp->attrs.rq_size = 0; - siw_dbg(base_dev, "QP [%u]: [SRQ 0x%p] attached\n", - qp->qp_num, qp->srq); + siw_dbg(base_dev, "QP [%u]: SRQ attached\n", qp->qp_num); } else if (num_rqe) { if (qp->kernel_verbs) qp->recvq = vzalloc(num_rqe * sizeof(struct siw_rqe)); @@ -610,7 +609,7 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata) base_ucontext); struct siw_qp_attrs qp_attrs; - siw_dbg_qp(qp, "state %d, cep 0x%p\n", qp->attrs.state, qp->cep); + siw_dbg_qp(qp, "state %d\n", qp->attrs.state); /* * Mark QP as in process of destruction to prevent from @@ -662,7 +661,7 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr, void *kbuf = &sqe->sge[1]; int num_sge = core_wr->num_sge, bytes = 0; - sqe->sge[0].laddr = (u64)kbuf; + sqe->sge[0].laddr = (uintptr_t)kbuf; sqe->sge[0].lkey = 0; while (num_sge--) { @@ -825,7 +824,7 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr, break; case IB_WR_REG_MR: - sqe->base_mr = (uint64_t)reg_wr(wr)->mr; + sqe->base_mr = (uintptr_t)reg_wr(wr)->mr; sqe->rkey = reg_wr(wr)->key; sqe->access = reg_wr(wr)->access & IWARP_ACCESS_MASK; sqe->opcode = SIW_OP_REG_MR; @@ -842,8 +841,9 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr, rv = -EINVAL; break; } - siw_dbg_qp(qp, "opcode %d, flags 0x%x, wr_id 0x%p\n", - sqe->opcode, sqe->flags, (void *)sqe->id); + siw_dbg_qp(qp, "opcode %d, flags 0x%x, wr_id 0x%pK\n", + sqe->opcode, sqe->flags, + (void *)(uintptr_t)sqe->id); if (unlikely(rv < 0)) break; @@ -1205,8 +1205,8 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, unsigned long mem_limit = rlimit(RLIMIT_MEMLOCK); int rv; - siw_dbg_pd(pd, "start: 0x%016llx, va: 0x%016llx, len: %llu\n", - (unsigned long long)start, (unsigned long long)rnic_va, + siw_dbg_pd(pd, "start: 0x%pK, va: 0x%pK, len: %llu\n", + (void *)(uintptr_t)start, (void *)(uintptr_t)rnic_va, (unsigned long long)len); if (atomic_inc_return(&sdev->num_mr) > SIW_MAX_MR) { @@ -1363,7 +1363,7 @@ int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle, struct siw_mem *mem = mr->mem; struct siw_pbl *pbl = mem->pbl; struct siw_pble *pble; - u64 pbl_size; + unsigned long pbl_size; int i, rv; if (!pbl) { @@ -1402,16 +1402,18 @@ int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle, pbl_size += sg_dma_len(slp); } siw_dbg_mem(mem, - "sge[%d], size %llu, addr 0x%016llx, total %llu\n", - i, pble->size, pble->addr, pbl_size); + "sge[%d], size %u, addr 0x%p, total %lu\n", + i, pble->size, (void *)(uintptr_t)pble->addr, + pbl_size); } rv = ib_sg_to_pages(base_mr, sl, num_sle, sg_off, siw_set_pbl_page); if (rv > 0) { mem->len = base_mr->length; mem->va = base_mr->iova; siw_dbg_mem(mem, - "%llu bytes, start 0x%016llx, %u SLE to %u entries\n", - mem->len, mem->va, num_sle, pbl->num_buf); + "%llu bytes, start 0x%pK, %u SLE to %u entries\n", + mem->len, (void *)(uintptr_t)mem->va, num_sle, + pbl->num_buf); } return rv; } @@ -1529,7 +1531,7 @@ int siw_create_srq(struct ib_srq *base_srq, } spin_lock_init(&srq->lock); - siw_dbg_pd(base_srq->pd, "[SRQ 0x%p]: success\n", srq); + siw_dbg_pd(base_srq->pd, "[SRQ]: success\n"); return 0; @@ -1650,8 +1652,7 @@ int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr, if (unlikely(!srq->kernel_verbs)) { siw_dbg_pd(base_srq->pd, - "[SRQ 0x%p]: no kernel post_recv for mapped srq\n", - srq); + "[SRQ]: no kernel post_recv for mapped srq\n"); rv = -EINVAL; goto out; } @@ -1673,8 +1674,7 @@ int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr, } if (unlikely(wr->num_sge > srq->max_sge)) { siw_dbg_pd(base_srq->pd, - "[SRQ 0x%p]: too many sge's: %d\n", srq, - wr->num_sge); + "[SRQ]: too many sge's: %d\n", wr->num_sge); rv = -EINVAL; break; } @@ -1693,7 +1693,7 @@ int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr, spin_unlock_irqrestore(&srq->lock, flags); out: if (unlikely(rv < 0)) { - siw_dbg_pd(base_srq->pd, "[SRQ 0x%p]: error %d\n", srq, rv); + siw_dbg_pd(base_srq->pd, "[SRQ]: error %d\n", rv); *bad_wr = wr; } return rv; diff --git a/drivers/input/serio/hyperv-keyboard.c b/drivers/input/serio/hyperv-keyboard.c index 88ae7c2ac3c8..e486a8a74c40 100644 --- a/drivers/input/serio/hyperv-keyboard.c +++ b/drivers/input/serio/hyperv-keyboard.c @@ -237,40 +237,17 @@ static void hv_kbd_handle_received_packet(struct hv_device *hv_dev, static void hv_kbd_on_channel_callback(void *context) { + struct vmpacket_descriptor *desc; struct hv_device *hv_dev = context; - void *buffer; - int bufferlen = 0x100; /* Start with sensible size */ u32 bytes_recvd; u64 req_id; - int error; - buffer = kmalloc(bufferlen, GFP_ATOMIC); - if (!buffer) - return; - - while (1) { - error = vmbus_recvpacket_raw(hv_dev->channel, buffer, bufferlen, - &bytes_recvd, &req_id); - switch (error) { - case 0: - if (bytes_recvd == 0) { - kfree(buffer); - return; - } - - hv_kbd_handle_received_packet(hv_dev, buffer, - bytes_recvd, req_id); - break; + foreach_vmbus_pkt(desc, hv_dev->channel) { + bytes_recvd = desc->len8 * 8; + req_id = desc->trans_id; - case -ENOBUFS: - kfree(buffer); - /* Handle large packet */ - bufferlen = bytes_recvd; - buffer = kmalloc(bytes_recvd, GFP_ATOMIC); - if (!buffer) - return; - break; - } + hv_kbd_handle_received_packet(hv_dev, desc, bytes_recvd, + req_id); } } diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index d991d40f797f..f68a62c3c32b 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -965,11 +965,14 @@ static void *iommu_dma_alloc_pages(struct device *dev, size_t size, { bool coherent = dev_is_dma_coherent(dev); size_t alloc_size = PAGE_ALIGN(size); + int node = dev_to_node(dev); struct page *page = NULL; void *cpu_addr; page = dma_alloc_contiguous(dev, alloc_size, gfp); if (!page) + page = alloc_pages_node(node, gfp, get_order(alloc_size)); + if (!page) return NULL; if (IS_ENABLED(CONFIG_DMA_REMAP) && (!coherent || PageHighMem(page))) { diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index b6b5acc92ca2..2a48ea3f1b30 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1599,7 +1599,9 @@ dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) unsigned long freed; c = container_of(shrink, struct dm_bufio_client, shrinker); - if (!dm_bufio_trylock(c)) + if (sc->gfp_mask & __GFP_FS) + dm_bufio_lock(c); + else if (!dm_bufio_trylock(c)) return SHRINK_STOP; freed = __scan(c, sc->nr_to_scan, sc->gfp_mask); diff --git a/drivers/md/dm-dust.c b/drivers/md/dm-dust.c index 845f376a72d9..8288887b7f94 100644 --- a/drivers/md/dm-dust.c +++ b/drivers/md/dm-dust.c @@ -25,6 +25,7 @@ struct dust_device { unsigned long long badblock_count; spinlock_t dust_lock; unsigned int blksz; + int sect_per_block_shift; unsigned int sect_per_block; sector_t start; bool fail_read_on_bb:1; @@ -79,7 +80,7 @@ static int dust_remove_block(struct dust_device *dd, unsigned long long block) unsigned long flags; spin_lock_irqsave(&dd->dust_lock, flags); - bblock = dust_rb_search(&dd->badblocklist, block * dd->sect_per_block); + bblock = dust_rb_search(&dd->badblocklist, block); if (bblock == NULL) { if (!dd->quiet_mode) { @@ -113,7 +114,7 @@ static int dust_add_block(struct dust_device *dd, unsigned long long block) } spin_lock_irqsave(&dd->dust_lock, flags); - bblock->bb = block * dd->sect_per_block; + bblock->bb = block; if (!dust_rb_insert(&dd->badblocklist, bblock)) { if (!dd->quiet_mode) { DMERR("%s: block %llu already in badblocklist", @@ -138,7 +139,7 @@ static int dust_query_block(struct dust_device *dd, unsigned long long block) unsigned long flags; spin_lock_irqsave(&dd->dust_lock, flags); - bblock = dust_rb_search(&dd->badblocklist, block * dd->sect_per_block); + bblock = dust_rb_search(&dd->badblocklist, block); if (bblock != NULL) DMINFO("%s: block %llu found in badblocklist", __func__, block); else @@ -165,6 +166,7 @@ static int dust_map_read(struct dust_device *dd, sector_t thisblock, int ret = DM_MAPIO_REMAPPED; if (fail_read_on_bb) { + thisblock >>= dd->sect_per_block_shift; spin_lock_irqsave(&dd->dust_lock, flags); ret = __dust_map_read(dd, thisblock); spin_unlock_irqrestore(&dd->dust_lock, flags); @@ -195,6 +197,7 @@ static int dust_map_write(struct dust_device *dd, sector_t thisblock, unsigned long flags; if (fail_read_on_bb) { + thisblock >>= dd->sect_per_block_shift; spin_lock_irqsave(&dd->dust_lock, flags); __dust_map_write(dd, thisblock); spin_unlock_irqrestore(&dd->dust_lock, flags); @@ -331,6 +334,8 @@ static int dust_ctr(struct dm_target *ti, unsigned int argc, char **argv) dd->blksz = blksz; dd->start = tmp; + dd->sect_per_block_shift = __ffs(sect_per_block); + /* * Whether to fail a read on a "bad" block. * Defaults to false; enabled later by message. diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index b1b0de402dfc..9118ab85cb3a 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1943,7 +1943,22 @@ offload_to_thread: queue_work(ic->wait_wq, &dio->work); return; } + if (journal_read_pos != NOT_FOUND) + dio->range.n_sectors = ic->sectors_per_block; wait_and_add_new_range(ic, &dio->range); + /* + * wait_and_add_new_range drops the spinlock, so the journal + * may have been changed arbitrarily. We need to recheck. + * To simplify the code, we restrict I/O size to just one block. + */ + if (journal_read_pos != NOT_FOUND) { + sector_t next_sector; + unsigned new_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector); + if (unlikely(new_pos != journal_read_pos)) { + remove_range_unlocked(ic, &dio->range); + goto retry; + } + } } spin_unlock_irq(&ic->endio_wait.lock); diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index df2011de7be2..1bbe4a34ef4c 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -566,8 +566,10 @@ static int run_io_job(struct kcopyd_job *job) * no point in continuing. */ if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) && - job->master_job->write_err) + job->master_job->write_err) { + job->write_err = job->master_job->write_err; return -EIO; + } io_job_start(job->kc->throttle); @@ -619,6 +621,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc, else job->read_err = 1; push(&kc->complete_jobs, job); + wake(kc); break; } diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 8a60a4a070ac..1f933dd197cd 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3194,7 +3194,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) */ r = rs_prepare_reshape(rs); if (r) - return r; + goto bad; /* Reshaping ain't recovery, so disable recovery */ rs_setup_recovery(rs, MaxSector); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 7b6c3ee9e755..8820931ec7d2 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1342,7 +1342,7 @@ void dm_table_event(struct dm_table *t) } EXPORT_SYMBOL(dm_table_event); -sector_t dm_table_get_size(struct dm_table *t) +inline sector_t dm_table_get_size(struct dm_table *t) { return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0; } @@ -1367,6 +1367,9 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) unsigned int l, n = 0, k = 0; sector_t *node; + if (unlikely(sector >= dm_table_get_size(t))) + return &t->targets[t->num_targets]; + for (l = 0; l < t->depth; l++) { n = get_child(n, k); node = get_node(t, l, n); diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 8545dcee9fd0..595a73110e17 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2017 Western Digital Corporation or its affiliates. * @@ -34,7 +35,7 @@ * (1) Super block (1 block) * (2) Chunk mapping table (nr_map_blocks) * (3) Bitmap blocks (nr_bitmap_blocks) - * All metadata blocks are stored in conventional zones, starting from the + * All metadata blocks are stored in conventional zones, starting from * the first conventional zone found on disk. */ struct dmz_super { @@ -233,7 +234,7 @@ void dmz_unlock_map(struct dmz_metadata *zmd) * Lock/unlock metadata access. This is a "read" lock on a semaphore * that prevents metadata flush from running while metadata are being * modified. The actual metadata write mutual exclusion is achieved with - * the map lock and zone styate management (active and reclaim state are + * the map lock and zone state management (active and reclaim state are * mutually exclusive). */ void dmz_lock_metadata(struct dmz_metadata *zmd) @@ -402,15 +403,18 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd, sector_t block = zmd->sb[zmd->mblk_primary].block + mblk_no; struct bio *bio; + if (dmz_bdev_is_dying(zmd->dev)) + return ERR_PTR(-EIO); + /* Get a new block and a BIO to read it */ mblk = dmz_alloc_mblock(zmd, mblk_no); if (!mblk) - return NULL; + return ERR_PTR(-ENOMEM); bio = bio_alloc(GFP_NOIO, 1); if (!bio) { dmz_free_mblock(zmd, mblk); - return NULL; + return ERR_PTR(-ENOMEM); } spin_lock(&zmd->mblk_lock); @@ -541,8 +545,8 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd, if (!mblk) { /* Cache miss: read the block from disk */ mblk = dmz_get_mblock_slow(zmd, mblk_no); - if (!mblk) - return ERR_PTR(-ENOMEM); + if (IS_ERR(mblk)) + return mblk; } /* Wait for on-going read I/O and check for error */ @@ -570,16 +574,19 @@ static void dmz_dirty_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk) /* * Issue a metadata block write BIO. */ -static void dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, - unsigned int set) +static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, + unsigned int set) { sector_t block = zmd->sb[set].block + mblk->no; struct bio *bio; + if (dmz_bdev_is_dying(zmd->dev)) + return -EIO; + bio = bio_alloc(GFP_NOIO, 1); if (!bio) { set_bit(DMZ_META_ERROR, &mblk->state); - return; + return -ENOMEM; } set_bit(DMZ_META_WRITING, &mblk->state); @@ -591,6 +598,8 @@ static void dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META | REQ_PRIO); bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0); submit_bio(bio); + + return 0; } /* @@ -602,6 +611,9 @@ static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block, struct bio *bio; int ret; + if (dmz_bdev_is_dying(zmd->dev)) + return -EIO; + bio = bio_alloc(GFP_NOIO, 1); if (!bio) return -ENOMEM; @@ -659,22 +671,29 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd, { struct dmz_mblock *mblk; struct blk_plug plug; - int ret = 0; + int ret = 0, nr_mblks_submitted = 0; /* Issue writes */ blk_start_plug(&plug); - list_for_each_entry(mblk, write_list, link) - dmz_write_mblock(zmd, mblk, set); + list_for_each_entry(mblk, write_list, link) { + ret = dmz_write_mblock(zmd, mblk, set); + if (ret) + break; + nr_mblks_submitted++; + } blk_finish_plug(&plug); /* Wait for completion */ list_for_each_entry(mblk, write_list, link) { + if (!nr_mblks_submitted) + break; wait_on_bit_io(&mblk->state, DMZ_META_WRITING, TASK_UNINTERRUPTIBLE); if (test_bit(DMZ_META_ERROR, &mblk->state)) { clear_bit(DMZ_META_ERROR, &mblk->state); ret = -EIO; } + nr_mblks_submitted--; } /* Flush drive cache (this will also sync data) */ @@ -736,6 +755,11 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) */ dmz_lock_flush(zmd); + if (dmz_bdev_is_dying(zmd->dev)) { + ret = -EIO; + goto out; + } + /* Get dirty blocks */ spin_lock(&zmd->mblk_lock); list_splice_init(&zmd->mblk_dirty_list, &write_list); @@ -1542,7 +1566,7 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd) struct dm_zone *zone; if (list_empty(&zmd->map_rnd_list)) - return NULL; + return ERR_PTR(-EBUSY); list_for_each_entry(zone, &zmd->map_rnd_list, link) { if (dmz_is_buf(zone)) @@ -1553,7 +1577,7 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd) return dzone; } - return NULL; + return ERR_PTR(-EBUSY); } /* @@ -1564,7 +1588,7 @@ static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd) struct dm_zone *zone; if (list_empty(&zmd->map_seq_list)) - return NULL; + return ERR_PTR(-EBUSY); list_for_each_entry(zone, &zmd->map_seq_list, link) { if (!zone->bzone) @@ -1573,7 +1597,7 @@ static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd) return zone; } - return NULL; + return ERR_PTR(-EBUSY); } /* @@ -1628,9 +1652,13 @@ again: if (op != REQ_OP_WRITE) goto out; - /* Alloate a random zone */ + /* Allocate a random zone */ dzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); if (!dzone) { + if (dmz_bdev_is_dying(zmd->dev)) { + dzone = ERR_PTR(-EIO); + goto out; + } dmz_wait_for_free_zones(zmd); goto again; } @@ -1725,9 +1753,13 @@ again: if (bzone) goto out; - /* Alloate a random zone */ + /* Allocate a random zone */ bzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); if (!bzone) { + if (dmz_bdev_is_dying(zmd->dev)) { + bzone = ERR_PTR(-EIO); + goto out; + } dmz_wait_for_free_zones(zmd); goto again; } diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index edf4b95eb075..d240d7ca8a8a 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2017 Western Digital Corporation or its affiliates. * @@ -37,7 +38,7 @@ enum { /* * Number of seconds of target BIO inactivity to consider the target idle. */ -#define DMZ_IDLE_PERIOD (10UL * HZ) +#define DMZ_IDLE_PERIOD (10UL * HZ) /* * Percentage of unmapped (free) random zones below which reclaim starts @@ -134,6 +135,9 @@ static int dmz_reclaim_copy(struct dmz_reclaim *zrc, set_bit(DM_KCOPYD_WRITE_SEQ, &flags); while (block < end_block) { + if (dev->flags & DMZ_BDEV_DYING) + return -EIO; + /* Get a valid region from the source zone */ ret = dmz_first_valid_block(zmd, src_zone, &block); if (ret <= 0) @@ -215,7 +219,7 @@ static int dmz_reclaim_buf(struct dmz_reclaim *zrc, struct dm_zone *dzone) dmz_unlock_flush(zmd); - return 0; + return ret; } /* @@ -259,7 +263,7 @@ static int dmz_reclaim_seq_data(struct dmz_reclaim *zrc, struct dm_zone *dzone) dmz_unlock_flush(zmd); - return 0; + return ret; } /* @@ -312,7 +316,7 @@ static int dmz_reclaim_rnd_data(struct dmz_reclaim *zrc, struct dm_zone *dzone) dmz_unlock_flush(zmd); - return 0; + return ret; } /* @@ -334,7 +338,7 @@ static void dmz_reclaim_empty(struct dmz_reclaim *zrc, struct dm_zone *dzone) /* * Find a candidate zone for reclaim and process it. */ -static void dmz_reclaim(struct dmz_reclaim *zrc) +static int dmz_do_reclaim(struct dmz_reclaim *zrc) { struct dmz_metadata *zmd = zrc->metadata; struct dm_zone *dzone; @@ -344,8 +348,8 @@ static void dmz_reclaim(struct dmz_reclaim *zrc) /* Get a data zone */ dzone = dmz_get_zone_for_reclaim(zmd); - if (!dzone) - return; + if (IS_ERR(dzone)) + return PTR_ERR(dzone); start = jiffies; @@ -391,13 +395,20 @@ static void dmz_reclaim(struct dmz_reclaim *zrc) out: if (ret) { dmz_unlock_zone_reclaim(dzone); - return; + return ret; } - (void) dmz_flush_metadata(zrc->metadata); + ret = dmz_flush_metadata(zrc->metadata); + if (ret) { + dmz_dev_debug(zrc->dev, + "Metadata flush for zone %u failed, err %d\n", + dmz_id(zmd, rzone), ret); + return ret; + } dmz_dev_debug(zrc->dev, "Reclaimed zone %u in %u ms", dmz_id(zmd, rzone), jiffies_to_msecs(jiffies - start)); + return 0; } /* @@ -427,7 +438,7 @@ static bool dmz_should_reclaim(struct dmz_reclaim *zrc) return false; /* - * If the percentage of unmappped random zones is low, + * If the percentage of unmapped random zones is low, * reclaim even if the target is busy. */ return p_unmap_rnd <= DMZ_RECLAIM_LOW_UNMAP_RND; @@ -442,6 +453,10 @@ static void dmz_reclaim_work(struct work_struct *work) struct dmz_metadata *zmd = zrc->metadata; unsigned int nr_rnd, nr_unmap_rnd; unsigned int p_unmap_rnd; + int ret; + + if (dmz_bdev_is_dying(zrc->dev)) + return; if (!dmz_should_reclaim(zrc)) { mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD); @@ -471,7 +486,17 @@ static void dmz_reclaim_work(struct work_struct *work) (dmz_target_idle(zrc) ? "Idle" : "Busy"), p_unmap_rnd, nr_unmap_rnd, nr_rnd); - dmz_reclaim(zrc); + ret = dmz_do_reclaim(zrc); + if (ret) { + dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret); + if (ret == -EIO) + /* + * LLD might be performing some error handling sequence + * at the underlying device. To not interfere, do not + * attempt to schedule the next reclaim run immediately. + */ + return; + } dmz_schedule_reclaim(zrc); } diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 51d029bbb740..31478fef6032 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2017 Western Digital Corporation or its affiliates. * @@ -133,6 +134,8 @@ static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone, refcount_inc(&bioctx->ref); generic_make_request(clone); + if (clone->bi_status == BLK_STS_IOERR) + return -EIO; if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone)) zone->wp_block += nr_blocks; @@ -277,8 +280,8 @@ static int dmz_handle_buffered_write(struct dmz_target *dmz, /* Get the buffer zone. One will be allocated if needed */ bzone = dmz_get_chunk_buffer(zmd, zone); - if (!bzone) - return -ENOSPC; + if (IS_ERR(bzone)) + return PTR_ERR(bzone); if (dmz_is_readonly(bzone)) return -EROFS; @@ -389,6 +392,11 @@ static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw, dmz_lock_metadata(zmd); + if (dmz->dev->flags & DMZ_BDEV_DYING) { + ret = -EIO; + goto out; + } + /* * Get the data zone mapping the chunk. There may be no * mapping for read and discard. If a mapping is obtained, @@ -493,6 +501,8 @@ static void dmz_flush_work(struct work_struct *work) /* Flush dirty metadata blocks */ ret = dmz_flush_metadata(dmz->metadata); + if (ret) + dmz_dev_debug(dmz->dev, "Metadata flush failed, rc=%d\n", ret); /* Process queued flush requests */ while (1) { @@ -513,22 +523,24 @@ static void dmz_flush_work(struct work_struct *work) * Get a chunk work and start it to process a new BIO. * If the BIO chunk has no work yet, create one. */ -static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) +static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) { unsigned int chunk = dmz_bio_chunk(dmz->dev, bio); struct dm_chunk_work *cw; + int ret = 0; mutex_lock(&dmz->chunk_lock); /* Get the BIO chunk work. If one is not active yet, create one */ cw = radix_tree_lookup(&dmz->chunk_rxtree, chunk); if (!cw) { - int ret; /* Create a new chunk work */ cw = kmalloc(sizeof(struct dm_chunk_work), GFP_NOIO); - if (!cw) + if (unlikely(!cw)) { + ret = -ENOMEM; goto out; + } INIT_WORK(&cw->work, dmz_chunk_work); refcount_set(&cw->refcount, 0); @@ -539,7 +551,6 @@ static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) ret = radix_tree_insert(&dmz->chunk_rxtree, chunk, cw); if (unlikely(ret)) { kfree(cw); - cw = NULL; goto out; } } @@ -547,10 +558,38 @@ static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) bio_list_add(&cw->bio_list, bio); dmz_get_chunk_work(cw); + dmz_reclaim_bio_acc(dmz->reclaim); if (queue_work(dmz->chunk_wq, &cw->work)) dmz_get_chunk_work(cw); out: mutex_unlock(&dmz->chunk_lock); + return ret; +} + +/* + * Check the backing device availability. If it's on the way out, + * start failing I/O. Reclaim and metadata components also call this + * function to cleanly abort operation in the event of such failure. + */ +bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev) +{ + struct gendisk *disk; + + if (!(dmz_dev->flags & DMZ_BDEV_DYING)) { + disk = dmz_dev->bdev->bd_disk; + if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) { + dmz_dev_warn(dmz_dev, "Backing device queue dying"); + dmz_dev->flags |= DMZ_BDEV_DYING; + } else if (disk->fops->check_events) { + if (disk->fops->check_events(disk, 0) & + DISK_EVENT_MEDIA_CHANGE) { + dmz_dev_warn(dmz_dev, "Backing device offline"); + dmz_dev->flags |= DMZ_BDEV_DYING; + } + } + } + + return dmz_dev->flags & DMZ_BDEV_DYING; } /* @@ -564,6 +603,10 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) sector_t sector = bio->bi_iter.bi_sector; unsigned int nr_sectors = bio_sectors(bio); sector_t chunk_sector; + int ret; + + if (dmz_bdev_is_dying(dmz->dev)) + return DM_MAPIO_KILL; dmz_dev_debug(dev, "BIO op %d sector %llu + %u => chunk %llu, block %llu, %u blocks", bio_op(bio), (unsigned long long)sector, nr_sectors, @@ -601,8 +644,14 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) dm_accept_partial_bio(bio, dev->zone_nr_sectors - chunk_sector); /* Now ready to handle this BIO */ - dmz_reclaim_bio_acc(dmz->reclaim); - dmz_queue_chunk_work(dmz, bio); + ret = dmz_queue_chunk_work(dmz, bio); + if (ret) { + dmz_dev_debug(dmz->dev, + "BIO op %d, can't process chunk %llu, err %i\n", + bio_op(bio), (u64)dmz_bio_chunk(dmz->dev, bio), + ret); + return DM_MAPIO_REQUEUE; + } return DM_MAPIO_SUBMITTED; } @@ -855,6 +904,9 @@ static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev) { struct dmz_target *dmz = ti->private; + if (dmz_bdev_is_dying(dmz->dev)) + return -ENODEV; + *bdev = dmz->dev->bdev; return 0; diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h index ed8de49c9a08..d8e70b0ade35 100644 --- a/drivers/md/dm-zoned.h +++ b/drivers/md/dm-zoned.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2017 Western Digital Corporation or its affiliates. * @@ -56,6 +57,8 @@ struct dmz_dev { unsigned int nr_zones; + unsigned int flags; + sector_t zone_nr_sectors; unsigned int zone_nr_sectors_shift; @@ -67,6 +70,9 @@ struct dmz_dev { (dev)->zone_nr_sectors_shift) #define dmz_chunk_block(dev, b) ((b) & ((dev)->zone_nr_blocks - 1)) +/* Device flags. */ +#define DMZ_BDEV_DYING (1 << 0) + /* * Zone descriptor. */ @@ -245,4 +251,9 @@ void dmz_resume_reclaim(struct dmz_reclaim *zrc); void dmz_reclaim_bio_acc(struct dmz_reclaim *zrc); void dmz_schedule_reclaim(struct dmz_reclaim *zrc); +/* + * Functions defined in dm-zoned-target.c + */ +bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev); + #endif /* DM_ZONED_H */ diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 58b319757b1e..8aae0624a297 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -628,39 +628,40 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) new_parent = shadow_current(s); + pn = dm_block_data(new_parent); + size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ? + sizeof(__le64) : s->info->value_type.size; + + /* create & init the left block */ r = new_block(s->info, &left); if (r < 0) return r; + ln = dm_block_data(left); + nr_left = le32_to_cpu(pn->header.nr_entries) / 2; + + ln->header.flags = pn->header.flags; + ln->header.nr_entries = cpu_to_le32(nr_left); + ln->header.max_entries = pn->header.max_entries; + ln->header.value_size = pn->header.value_size; + memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0])); + memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size); + + /* create & init the right block */ r = new_block(s->info, &right); if (r < 0) { unlock_block(s->info, left); return r; } - pn = dm_block_data(new_parent); - ln = dm_block_data(left); rn = dm_block_data(right); - - nr_left = le32_to_cpu(pn->header.nr_entries) / 2; nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left; - ln->header.flags = pn->header.flags; - ln->header.nr_entries = cpu_to_le32(nr_left); - ln->header.max_entries = pn->header.max_entries; - ln->header.value_size = pn->header.value_size; - rn->header.flags = pn->header.flags; rn->header.nr_entries = cpu_to_le32(nr_right); rn->header.max_entries = pn->header.max_entries; rn->header.value_size = pn->header.value_size; - - memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0])); memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0])); - - size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ? - sizeof(__le64) : s->info->value_type.size; - memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size); memcpy(value_ptr(rn, 0), value_ptr(pn, nr_left), nr_right * size); diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index aec449243966..25328582cc48 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -249,7 +249,7 @@ static int out(struct sm_metadata *smm) } if (smm->recursion_count == 1) - apply_bops(smm); + r = apply_bops(smm); smm->recursion_count--; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c258a1ce4b28..d3d6b7bd6903 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2257,6 +2257,16 @@ static const struct nvme_core_quirk_entry core_quirks[] = { .vid = 0x1179, .mn = "THNSF5256GPUK TOSHIBA", .quirks = NVME_QUIRK_NO_APST, + }, + { + /* + * This LiteON CL1-3D*-Q11 firmware version has a race + * condition associated with actions related to suspend to idle + * LiteON has resolved the problem in future firmware + */ + .vid = 0x14a4, + .fr = "22301111", + .quirks = NVME_QUIRK_SIMPLE_SUSPEND, } }; @@ -2597,6 +2607,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) goto out_free; } + if (!(ctrl->ops->flags & NVME_F_FABRICS)) + ctrl->cntlid = le16_to_cpu(id->cntlid); + if (!ctrl->identified) { int i; @@ -2697,7 +2710,6 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) goto out_free; } } else { - ctrl->cntlid = le16_to_cpu(id->cntlid); ctrl->hmpre = le32_to_cpu(id->hmpre); ctrl->hmmin = le32_to_cpu(id->hmmin); ctrl->hmminds = le32_to_cpu(id->hmminds); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 888d4543894e..af831d3d15d0 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -428,6 +428,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) srcu_read_unlock(&head->srcu, srcu_idx); } + synchronize_srcu(&ns->head->srcu); kblockd_schedule_work(&ns->head->requeue_work); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 778b3a0b6adb..2d678fb968c7 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -92,6 +92,11 @@ enum nvme_quirks { * Broken Write Zeroes. */ NVME_QUIRK_DISABLE_WRITE_ZEROES = (1 << 9), + + /* + * Force simple suspend/resume path. + */ + NVME_QUIRK_SIMPLE_SUSPEND = (1 << 10), }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6bd9b1033965..732d5b63ec05 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2876,7 +2876,8 @@ static int nvme_suspend(struct device *dev) * state (which may not be possible if the link is up). */ if (pm_suspend_via_firmware() || !ctrl->npss || - !pcie_aspm_enabled(pdev)) { + !pcie_aspm_enabled(pdev) || + (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND)) { nvme_dev_disable(ndev, true); return 0; } diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 2c3bb8a966e5..bade2e025ecf 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -824,6 +824,7 @@ struct lpfc_hba { uint32_t cfg_cq_poll_threshold; uint32_t cfg_cq_max_proc_limit; uint32_t cfg_fcp_cpu_map; + uint32_t cfg_fcp_mq_threshold; uint32_t cfg_hdw_queue; uint32_t cfg_irq_chann; uint32_t cfg_suppress_rsp; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index ea62322ffe2b..8d8c495b5b60 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -5709,6 +5709,19 @@ LPFC_ATTR_RW(nvme_embed_cmd, 1, 0, 2, "Embed NVME Command in WQE"); /* + * lpfc_fcp_mq_threshold: Set the maximum number of Hardware Queues + * the driver will advertise it supports to the SCSI layer. + * + * 0 = Set nr_hw_queues by the number of CPUs or HW queues. + * 1,128 = Manually specify the maximum nr_hw_queue value to be set, + * + * Value range is [0,128]. Default value is 8. + */ +LPFC_ATTR_R(fcp_mq_threshold, LPFC_FCP_MQ_THRESHOLD_DEF, + LPFC_FCP_MQ_THRESHOLD_MIN, LPFC_FCP_MQ_THRESHOLD_MAX, + "Set the number of SCSI Queues advertised"); + +/* * lpfc_hdw_queue: Set the number of Hardware Queues the driver * will advertise it supports to the NVME and SCSI layers. This also * will map to the number of CQ/WQ pairs the driver will create. @@ -6030,6 +6043,7 @@ struct device_attribute *lpfc_hba_attrs[] = { &dev_attr_lpfc_cq_poll_threshold, &dev_attr_lpfc_cq_max_proc_limit, &dev_attr_lpfc_fcp_cpu_map, + &dev_attr_lpfc_fcp_mq_threshold, &dev_attr_lpfc_hdw_queue, &dev_attr_lpfc_irq_chann, &dev_attr_lpfc_suppress_rsp, @@ -7112,6 +7126,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba) /* Initialize first burst. Target vs Initiator are different. */ lpfc_nvme_enable_fb_init(phba, lpfc_nvme_enable_fb); lpfc_nvmet_fb_size_init(phba, lpfc_nvmet_fb_size); + lpfc_fcp_mq_threshold_init(phba, lpfc_fcp_mq_threshold); lpfc_hdw_queue_init(phba, lpfc_hdw_queue); lpfc_irq_chann_init(phba, lpfc_irq_chann); lpfc_enable_bbcr_init(phba, lpfc_enable_bbcr); diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index a7549ae32542..1ac98becb5ba 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -4309,10 +4309,12 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) shost->max_cmd_len = 16; if (phba->sli_rev == LPFC_SLI_REV4) { - if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ) - shost->nr_hw_queues = phba->cfg_hdw_queue; - else - shost->nr_hw_queues = phba->sli4_hba.num_present_cpu; + if (!phba->cfg_fcp_mq_threshold || + phba->cfg_fcp_mq_threshold > phba->cfg_hdw_queue) + phba->cfg_fcp_mq_threshold = phba->cfg_hdw_queue; + + shost->nr_hw_queues = min_t(int, 2 * num_possible_nodes(), + phba->cfg_fcp_mq_threshold); shost->dma_boundary = phba->sli4_hba.pc_sli4_params.sge_supp_len-1; diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 3aeca387b22a..329f7aa7e169 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -44,6 +44,11 @@ #define LPFC_HBA_HDWQ_MAX 128 #define LPFC_HBA_HDWQ_DEF 0 +/* FCP MQ queue count limiting */ +#define LPFC_FCP_MQ_THRESHOLD_MIN 0 +#define LPFC_FCP_MQ_THRESHOLD_MAX 128 +#define LPFC_FCP_MQ_THRESHOLD_DEF 8 + /* Common buffer size to accomidate SCSI and NVME IO buffers */ #define LPFC_COMMON_IO_BUF_SZ 768 diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 8d560c562e9c..6b7b390b2e52 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -2956,6 +2956,8 @@ qla24xx_vport_delete(struct fc_vport *fc_vport) dma_free_coherent(&ha->pdev->dev, vha->gnl.size, vha->gnl.l, vha->gnl.ldma); + vha->gnl.l = NULL; + vfree(vha->scan.l); if (vha->qpair && vha->qpair->vp_idx == vha->vp_idx) { diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 2e58cff9d200..98e60a34afd9 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3440,6 +3440,12 @@ skip_dpc: return 0; probe_failed: + if (base_vha->gnl.l) { + dma_free_coherent(&ha->pdev->dev, base_vha->gnl.size, + base_vha->gnl.l, base_vha->gnl.ldma); + base_vha->gnl.l = NULL; + } + if (base_vha->timer_active) qla2x00_stop_timer(base_vha); base_vha->flags.online = 0; @@ -3673,7 +3679,7 @@ qla2x00_remove_one(struct pci_dev *pdev) if (!atomic_read(&pdev->enable_cnt)) { dma_free_coherent(&ha->pdev->dev, base_vha->gnl.size, base_vha->gnl.l, base_vha->gnl.ldma); - + base_vha->gnl.l = NULL; scsi_host_put(base_vha->host); kfree(ha); pci_set_drvdata(pdev, NULL); @@ -3713,6 +3719,8 @@ qla2x00_remove_one(struct pci_dev *pdev) dma_free_coherent(&ha->pdev->dev, base_vha->gnl.size, base_vha->gnl.l, base_vha->gnl.ldma); + base_vha->gnl.l = NULL; + vfree(base_vha->scan.l); if (IS_QLAFX00(ha)) @@ -4816,6 +4824,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht, "Alloc failed for scan database.\n"); dma_free_coherent(&ha->pdev->dev, vha->gnl.size, vha->gnl.l, vha->gnl.ldma); + vha->gnl.l = NULL; scsi_remove_host(vha->host); return NULL; } diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index e274053109d0..029da74bb2f5 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -7062,6 +7062,9 @@ static inline int ufshcd_config_vreg_lpm(struct ufs_hba *hba, static inline int ufshcd_config_vreg_hpm(struct ufs_hba *hba, struct ufs_vreg *vreg) { + if (!vreg) + return 0; + return ufshcd_config_vreg_load(hba->dev, vreg, vreg->max_uA); } diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 04eda111920e..661bb9358364 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1132,14 +1132,16 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * struct se_cmd *se_cmd = cmd->se_cmd; struct tcmu_dev *udev = cmd->tcmu_dev; bool read_len_valid = false; - uint32_t read_len = se_cmd->data_length; + uint32_t read_len; /* * cmd has been completed already from timeout, just reclaim * data area space and free cmd */ - if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) + if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) { + WARN_ON_ONCE(se_cmd); goto out; + } list_del_init(&cmd->queue_entry); @@ -1152,6 +1154,7 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * goto done; } + read_len = se_cmd->data_length; if (se_cmd->data_direction == DMA_FROM_DEVICE && (entry->hdr.uflags & TCMU_UFLAG_READ_LEN) && entry->rsp.read_len) { read_len_valid = true; @@ -1307,6 +1310,7 @@ static int tcmu_check_expired_cmd(int id, void *p, void *data) */ scsi_status = SAM_STAT_CHECK_CONDITION; list_del_init(&cmd->queue_entry); + cmd->se_cmd = NULL; } else { list_del_init(&cmd->queue_entry); idr_remove(&udev->commands, id); @@ -2022,6 +2026,7 @@ static void tcmu_reset_ring(struct tcmu_dev *udev, u8 err_level) idr_remove(&udev->commands, i); if (!test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) { + WARN_ON(!cmd->se_cmd); list_del_init(&cmd->queue_entry); if (err_level == 1) { /* diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index e078cc55b989..b3c8b886bf64 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -913,8 +913,9 @@ get_more_pages: if (page_offset(page) >= ceph_wbc.i_size) { dout("%p page eof %llu\n", page, ceph_wbc.i_size); - if (ceph_wbc.size_stable || - page_offset(page) >= i_size_read(inode)) + if ((ceph_wbc.size_stable || + page_offset(page) >= i_size_read(inode)) && + clear_page_dirty_for_io(page)) mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE); unlock_page(page); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index d98dcd976c80..ce0f5658720a 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1301,6 +1301,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, { struct ceph_inode_info *ci = cap->ci; struct inode *inode = &ci->vfs_inode; + struct ceph_buffer *old_blob = NULL; struct cap_msg_args arg; int held, revoking; int wake = 0; @@ -1365,7 +1366,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, ci->i_requested_max_size = arg.max_size; if (flushing & CEPH_CAP_XATTR_EXCL) { - __ceph_build_xattrs_blob(ci); + old_blob = __ceph_build_xattrs_blob(ci); arg.xattr_version = ci->i_xattrs.version; arg.xattr_buf = ci->i_xattrs.blob; } else { @@ -1409,6 +1410,8 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, spin_unlock(&ci->i_ceph_lock); + ceph_buffer_put(old_blob); + ret = send_cap_msg(&arg); if (ret < 0) { dout("error sending cap msg, must requeue %p\n", inode); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 791f84a13bb8..18500edefc56 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -736,6 +736,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, int issued, new_issued, info_caps; struct timespec64 mtime, atime, ctime; struct ceph_buffer *xattr_blob = NULL; + struct ceph_buffer *old_blob = NULL; struct ceph_string *pool_ns = NULL; struct ceph_cap *new_cap = NULL; int err = 0; @@ -881,7 +882,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, if ((ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) && le64_to_cpu(info->xattr_version) > ci->i_xattrs.version) { if (ci->i_xattrs.blob) - ceph_buffer_put(ci->i_xattrs.blob); + old_blob = ci->i_xattrs.blob; ci->i_xattrs.blob = xattr_blob; if (xattr_blob) memcpy(ci->i_xattrs.blob->vec.iov_base, @@ -1022,8 +1023,8 @@ static int fill_inode(struct inode *inode, struct page *locked_page, out: if (new_cap) ceph_put_cap(mdsc, new_cap); - if (xattr_blob) - ceph_buffer_put(xattr_blob); + ceph_buffer_put(old_blob); + ceph_buffer_put(xattr_blob); ceph_put_string(pool_ns); return err; } diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index ac9b53b89365..5083e238ad15 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -111,8 +111,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, req->r_wait_for_completion = ceph_lock_wait_for_completion; err = ceph_mdsc_do_request(mdsc, inode, req); - - if (operation == CEPH_MDS_OP_GETFILELOCK) { + if (!err && operation == CEPH_MDS_OP_GETFILELOCK) { fl->fl_pid = -le64_to_cpu(req->r_reply_info.filelock_reply->pid); if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type) fl->fl_type = F_RDLCK; diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 4c6494eb02b5..ccfcc66aaf44 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -465,6 +465,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) struct inode *inode = &ci->vfs_inode; struct ceph_cap_snap *capsnap; struct ceph_snap_context *old_snapc, *new_snapc; + struct ceph_buffer *old_blob = NULL; int used, dirty; capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS); @@ -541,7 +542,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) capsnap->gid = inode->i_gid; if (dirty & CEPH_CAP_XATTR_EXCL) { - __ceph_build_xattrs_blob(ci); + old_blob = __ceph_build_xattrs_blob(ci); capsnap->xattr_blob = ceph_buffer_get(ci->i_xattrs.blob); capsnap->xattr_version = ci->i_xattrs.version; @@ -584,6 +585,7 @@ update_snapc: } spin_unlock(&ci->i_ceph_lock); + ceph_buffer_put(old_blob); kfree(capsnap); ceph_put_snap_context(old_snapc); } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index d2352fd95dbc..6b9f1ee7de85 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -926,7 +926,7 @@ extern int ceph_getattr(const struct path *path, struct kstat *stat, int __ceph_setxattr(struct inode *, const char *, const void *, size_t, int); ssize_t __ceph_getxattr(struct inode *, const char *, void *, size_t); extern ssize_t ceph_listxattr(struct dentry *, char *, size_t); -extern void __ceph_build_xattrs_blob(struct ceph_inode_info *ci); +extern struct ceph_buffer *__ceph_build_xattrs_blob(struct ceph_inode_info *ci); extern void __ceph_destroy_xattrs(struct ceph_inode_info *ci); extern const struct xattr_handler *ceph_xattr_handlers[]; diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 37b458a9af3a..939eab7aa219 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -754,12 +754,15 @@ static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size, /* * If there are dirty xattrs, reencode xattrs into the prealloc_blob - * and swap into place. + * and swap into place. It returns the old i_xattrs.blob (or NULL) so + * that it can be freed by the caller as the i_ceph_lock is likely to be + * held. */ -void __ceph_build_xattrs_blob(struct ceph_inode_info *ci) +struct ceph_buffer *__ceph_build_xattrs_blob(struct ceph_inode_info *ci) { struct rb_node *p; struct ceph_inode_xattr *xattr = NULL; + struct ceph_buffer *old_blob = NULL; void *dest; dout("__build_xattrs_blob %p\n", &ci->vfs_inode); @@ -790,12 +793,14 @@ void __ceph_build_xattrs_blob(struct ceph_inode_info *ci) dest - ci->i_xattrs.prealloc_blob->vec.iov_base; if (ci->i_xattrs.blob) - ceph_buffer_put(ci->i_xattrs.blob); + old_blob = ci->i_xattrs.blob; ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob; ci->i_xattrs.prealloc_blob = NULL; ci->i_xattrs.dirty = false; ci->i_xattrs.version++; } + + return old_blob; } static inline int __get_request_mask(struct inode *in) { @@ -1036,6 +1041,7 @@ int __ceph_setxattr(struct inode *inode, const char *name, struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_cap_flush *prealloc_cf = NULL; + struct ceph_buffer *old_blob = NULL; int issued; int err; int dirty = 0; @@ -1109,13 +1115,15 @@ retry: struct ceph_buffer *blob; spin_unlock(&ci->i_ceph_lock); - dout(" preaallocating new blob size=%d\n", required_blob_size); + ceph_buffer_put(old_blob); /* Shouldn't be required */ + dout(" pre-allocating new blob size=%d\n", required_blob_size); blob = ceph_buffer_new(required_blob_size, GFP_NOFS); if (!blob) goto do_sync_unlocked; spin_lock(&ci->i_ceph_lock); + /* prealloc_blob can't be released while holding i_ceph_lock */ if (ci->i_xattrs.prealloc_blob) - ceph_buffer_put(ci->i_xattrs.prealloc_blob); + old_blob = ci->i_xattrs.prealloc_blob; ci->i_xattrs.prealloc_blob = blob; goto retry; } @@ -1131,6 +1139,7 @@ retry: } spin_unlock(&ci->i_ceph_lock); + ceph_buffer_put(old_blob); if (lock_snap_rwsem) up_read(&mdsc->snap_rwsem); if (dirty) diff --git a/fs/io_uring.c b/fs/io_uring.c index 24bbe3cb7ad4..cfb48bd088e1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -679,6 +679,13 @@ static void io_put_req(struct io_kiocb *req) io_free_req(req); } +static unsigned io_cqring_events(struct io_cq_ring *ring) +{ + /* See comment at the top of this file */ + smp_rmb(); + return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head); +} + /* * Find and free completed poll iocbs */ @@ -771,7 +778,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, static int io_iopoll_getevents(struct io_ring_ctx *ctx, unsigned int *nr_events, long min) { - while (!list_empty(&ctx->poll_list)) { + while (!list_empty(&ctx->poll_list) && !need_resched()) { int ret; ret = io_do_iopoll(ctx, nr_events, min); @@ -798,6 +805,12 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx) unsigned int nr_events = 0; io_iopoll_getevents(ctx, &nr_events, 1); + + /* + * Ensure we allow local-to-the-cpu processing to take place, + * in this case we need to ensure that we reap all events. + */ + cond_resched(); } mutex_unlock(&ctx->uring_lock); } @@ -805,11 +818,42 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx) static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, long min) { - int ret = 0; + int iters, ret = 0; + /* + * We disallow the app entering submit/complete with polling, but we + * still need to lock the ring to prevent racing with polled issue + * that got punted to a workqueue. + */ + mutex_lock(&ctx->uring_lock); + + iters = 0; do { int tmin = 0; + /* + * Don't enter poll loop if we already have events pending. + * If we do, we can potentially be spinning for commands that + * already triggered a CQE (eg in error). + */ + if (io_cqring_events(ctx->cq_ring)) + break; + + /* + * If a submit got punted to a workqueue, we can have the + * application entering polling for a command before it gets + * issued. That app will hold the uring_lock for the duration + * of the poll right here, so we need to take a breather every + * now and then to ensure that the issue has a chance to add + * the poll to the issued list. Otherwise we can spin here + * forever, while the workqueue is stuck trying to acquire the + * very same mutex. + */ + if (!(++iters & 7)) { + mutex_unlock(&ctx->uring_lock); + mutex_lock(&ctx->uring_lock); + } + if (*nr_events < min) tmin = min - *nr_events; @@ -819,6 +863,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, ret = 0; } while (min && !*nr_events && !need_resched()); + mutex_unlock(&ctx->uring_lock); return ret; } @@ -2280,15 +2325,7 @@ static int io_sq_thread(void *data) unsigned nr_events = 0; if (ctx->flags & IORING_SETUP_IOPOLL) { - /* - * We disallow the app entering submit/complete - * with polling, but we still need to lock the - * ring to prevent racing with polled issue - * that got punted to a workqueue. - */ - mutex_lock(&ctx->uring_lock); io_iopoll_check(ctx, &nr_events, 0); - mutex_unlock(&ctx->uring_lock); } else { /* * Normal IO, just pretend everything completed. @@ -2433,13 +2470,6 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) return submit; } -static unsigned io_cqring_events(struct io_cq_ring *ring) -{ - /* See comment at the top of this file */ - smp_rmb(); - return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head); -} - /* * Wait until events become available, if we don't already have some. The * application must reap them itself, as they reside on the shared cq ring. @@ -3190,9 +3220,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, min_complete = min(min_complete, ctx->cq_entries); if (ctx->flags & IORING_SETUP_IOPOLL) { - mutex_lock(&ctx->uring_lock); ret = io_iopoll_check(ctx, &nr_events, min_complete); - mutex_unlock(&ctx->uring_lock); } else { ret = io_cqring_wait(ctx, min_complete, sig, sigsz); } diff --git a/fs/read_write.c b/fs/read_write.c index 1f5088dec566..5bbf587f5bc1 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1811,10 +1811,7 @@ static int generic_remap_check_len(struct inode *inode_in, return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL; } -/* - * Read a page's worth of file data into the page cache. Return the page - * locked. - */ +/* Read a page's worth of file data into the page cache. */ static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset) { struct page *page; @@ -1826,11 +1823,33 @@ static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset) put_page(page); return ERR_PTR(-EIO); } - lock_page(page); return page; } /* + * Lock two pages, ensuring that we lock in offset order if the pages are from + * the same file. + */ +static void vfs_lock_two_pages(struct page *page1, struct page *page2) +{ + /* Always lock in order of increasing index. */ + if (page1->index > page2->index) + swap(page1, page2); + + lock_page(page1); + if (page1 != page2) + lock_page(page2); +} + +/* Unlock two pages, being careful not to unlock the same page twice. */ +static void vfs_unlock_two_pages(struct page *page1, struct page *page2) +{ + unlock_page(page1); + if (page1 != page2) + unlock_page(page2); +} + +/* * Compare extents of two files to see if they are the same. * Caller must have locked both inodes to prevent write races. */ @@ -1867,10 +1886,24 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, dest_page = vfs_dedupe_get_page(dest, destoff); if (IS_ERR(dest_page)) { error = PTR_ERR(dest_page); - unlock_page(src_page); put_page(src_page); goto out_error; } + + vfs_lock_two_pages(src_page, dest_page); + + /* + * Now that we've locked both pages, make sure they're still + * mapped to the file data we're interested in. If not, + * someone is invalidating pages on us and we lose. + */ + if (!PageUptodate(src_page) || !PageUptodate(dest_page) || + src_page->mapping != src->i_mapping || + dest_page->mapping != dest->i_mapping) { + same = false; + goto unlock; + } + src_addr = kmap_atomic(src_page); dest_addr = kmap_atomic(dest_page); @@ -1882,8 +1915,8 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, kunmap_atomic(dest_addr); kunmap_atomic(src_addr); - unlock_page(dest_page); - unlock_page(src_page); +unlock: + vfs_unlock_two_pages(src_page, dest_page); put_page(dest_page); put_page(src_page); diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index ccbdbd62f0d8..fe6d804a38dc 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -880,6 +880,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) /* len == 0 means wake all */ struct userfaultfd_wake_range range = { .len = 0, }; unsigned long new_flags; + bool still_valid; WRITE_ONCE(ctx->released, true); @@ -895,8 +896,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) * taking the mmap_sem for writing. */ down_write(&mm->mmap_sem); - if (!mmget_still_valid(mm)) - goto skip_mm; + still_valid = mmget_still_valid(mm); prev = NULL; for (vma = mm->mmap; vma; vma = vma->vm_next) { cond_resched(); @@ -907,19 +907,20 @@ static int userfaultfd_release(struct inode *inode, struct file *file) continue; } new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP); - prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end, - new_flags, vma->anon_vma, - vma->vm_file, vma->vm_pgoff, - vma_policy(vma), - NULL_VM_UFFD_CTX); - if (prev) - vma = prev; - else - prev = vma; + if (still_valid) { + prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end, + new_flags, vma->anon_vma, + vma->vm_file, vma->vm_pgoff, + vma_policy(vma), + NULL_VM_UFFD_CTX); + if (prev) + vma = prev; + else + prev = vma; + } vma->vm_flags = new_flags; vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; } -skip_mm: up_write(&mm->mmap_sem); mmput(mm); wakeup: diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 7fcf7569743f..7bd7534f5051 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -547,63 +547,12 @@ xfs_file_compat_ioctl( struct inode *inode = file_inode(filp); struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - void __user *arg = (void __user *)p; + void __user *arg = compat_ptr(p); int error; trace_xfs_file_compat_ioctl(ip); switch (cmd) { - /* No size or alignment issues on any arch */ - case XFS_IOC_DIOINFO: - case XFS_IOC_FSGEOMETRY_V4: - case XFS_IOC_FSGEOMETRY: - case XFS_IOC_AG_GEOMETRY: - case XFS_IOC_FSGETXATTR: - case XFS_IOC_FSSETXATTR: - case XFS_IOC_FSGETXATTRA: - case XFS_IOC_FSSETDM: - case XFS_IOC_GETBMAP: - case XFS_IOC_GETBMAPA: - case XFS_IOC_GETBMAPX: - case XFS_IOC_FSCOUNTS: - case XFS_IOC_SET_RESBLKS: - case XFS_IOC_GET_RESBLKS: - case XFS_IOC_FSGROWFSLOG: - case XFS_IOC_GOINGDOWN: - case XFS_IOC_ERROR_INJECTION: - case XFS_IOC_ERROR_CLEARALL: - case FS_IOC_GETFSMAP: - case XFS_IOC_SCRUB_METADATA: - case XFS_IOC_BULKSTAT: - case XFS_IOC_INUMBERS: - return xfs_file_ioctl(filp, cmd, p); -#if !defined(BROKEN_X86_ALIGNMENT) || defined(CONFIG_X86_X32) - /* - * These are handled fine if no alignment issues. To support x32 - * which uses native 64-bit alignment we must emit these cases in - * addition to the ia-32 compat set below. - */ - case XFS_IOC_ALLOCSP: - case XFS_IOC_FREESP: - case XFS_IOC_RESVSP: - case XFS_IOC_UNRESVSP: - case XFS_IOC_ALLOCSP64: - case XFS_IOC_FREESP64: - case XFS_IOC_RESVSP64: - case XFS_IOC_UNRESVSP64: - case XFS_IOC_FSGEOMETRY_V1: - case XFS_IOC_FSGROWFSDATA: - case XFS_IOC_FSGROWFSRT: - case XFS_IOC_ZERO_RANGE: -#ifdef CONFIG_X86_X32 - /* - * x32 special: this gets a different cmd number from the ia-32 compat - * case below; the associated data will match native 64-bit alignment. - */ - case XFS_IOC_SWAPEXT: -#endif - return xfs_file_ioctl(filp, cmd, p); -#endif #if defined(BROKEN_X86_ALIGNMENT) case XFS_IOC_ALLOCSP_32: case XFS_IOC_FREESP_32: @@ -705,6 +654,7 @@ xfs_file_compat_ioctl( case XFS_IOC_FSSETDM_BY_HANDLE_32: return xfs_compat_fssetdm_by_handle(filp, arg); default: - return -ENOIOCTLCMD; + /* try the native version */ + return xfs_file_ioctl(filp, cmd, (unsigned long)arg); } } diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ff3c1fae5357..fe285d123d69 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -793,6 +793,7 @@ xfs_setattr_nonsize( out_cancel: xfs_trans_cancel(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); out_dqrele: xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 0c954cad7449..a339bd5fa260 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -32,7 +32,7 @@ xfs_break_leased_layouts( struct xfs_inode *ip = XFS_I(inode); int error; - while ((error = break_layout(inode, false) == -EWOULDBLOCK)) { + while ((error = break_layout(inode, false)) == -EWOULDBLOCK) { xfs_iunlock(ip, *iolock); *did_unlock = true; error = break_layout(inode, true); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index c4ec7afd1170..edbe37b7f636 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1190,11 +1190,11 @@ xfs_reflink_remap_blocks( } /* - * Grab the exclusive iolock for a data copy from src to dest, making - * sure to abide vfs locking order (lowest pointer value goes first) and - * breaking the pnfs layout leases on dest before proceeding. The loop - * is needed because we cannot call the blocking break_layout() with the - * src iolock held, and therefore have to back out both locks. + * Grab the exclusive iolock for a data copy from src to dest, making sure to + * abide vfs locking order (lowest pointer value goes first) and breaking the + * layout leases before proceeding. The loop is needed because we cannot call + * the blocking break_layout() with the iolocks held, and therefore have to + * back out both locks. */ static int xfs_iolock_two_inodes_and_break_layout( @@ -1203,33 +1203,44 @@ xfs_iolock_two_inodes_and_break_layout( { int error; -retry: - if (src < dest) { - inode_lock_shared(src); - inode_lock_nested(dest, I_MUTEX_NONDIR2); - } else { - /* src >= dest */ - inode_lock(dest); - } + if (src > dest) + swap(src, dest); - error = break_layout(dest, false); - if (error == -EWOULDBLOCK) { - inode_unlock(dest); - if (src < dest) - inode_unlock_shared(src); +retry: + /* Wait to break both inodes' layouts before we start locking. */ + error = break_layout(src, true); + if (error) + return error; + if (src != dest) { error = break_layout(dest, true); if (error) return error; - goto retry; } + + /* Lock one inode and make sure nobody got in and leased it. */ + inode_lock(src); + error = break_layout(src, false); if (error) { + inode_unlock(src); + if (error == -EWOULDBLOCK) + goto retry; + return error; + } + + if (src == dest) + return 0; + + /* Lock the other inode and make sure nobody got in and leased it. */ + inode_lock_nested(dest, I_MUTEX_NONDIR2); + error = break_layout(dest, false); + if (error) { + inode_unlock(src); inode_unlock(dest); - if (src < dest) - inode_unlock_shared(src); + if (error == -EWOULDBLOCK) + goto retry; return error; } - if (src > dest) - inode_lock_shared_nested(src, I_MUTEX_NONDIR2); + return 0; } @@ -1247,10 +1258,10 @@ xfs_reflink_remap_unlock( xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); if (!same_inode) - xfs_iunlock(src, XFS_MMAPLOCK_SHARED); + xfs_iunlock(src, XFS_MMAPLOCK_EXCL); inode_unlock(inode_out); if (!same_inode) - inode_unlock_shared(inode_in); + inode_unlock(inode_in); } /* @@ -1325,7 +1336,7 @@ xfs_reflink_remap_prep( if (same_inode) xfs_ilock(src, XFS_MMAPLOCK_EXCL); else - xfs_lock_two_inodes(src, XFS_MMAPLOCK_SHARED, dest, + xfs_lock_two_inodes(src, XFS_MMAPLOCK_EXCL, dest, XFS_MMAPLOCK_EXCL); /* Check file eligibility and prepare for block sharing. */ diff --git a/include/linux/ceph/buffer.h b/include/linux/ceph/buffer.h index 5e58bb29b1a3..11cdc7c60480 100644 --- a/include/linux/ceph/buffer.h +++ b/include/linux/ceph/buffer.h @@ -30,7 +30,8 @@ static inline struct ceph_buffer *ceph_buffer_get(struct ceph_buffer *b) static inline void ceph_buffer_put(struct ceph_buffer *b) { - kref_put(&b->kref, ceph_buffer_release); + if (b) + kref_put(&b->kref, ceph_buffer_release); } extern int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end); diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index c05d4e661489..03f8e98e3bcc 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -160,10 +160,7 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages, static inline struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) { - int node = dev ? dev_to_node(dev) : NUMA_NO_NODE; - size_t align = get_order(PAGE_ALIGN(size)); - - return alloc_pages_node(node, gfp, align); + return NULL; } static inline void dma_free_contiguous(struct device *dev, struct page *page, diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 40915b461f18..f757a58191a6 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -241,30 +241,6 @@ static inline int irq_to_gpio(unsigned irq) return -EINVAL; } -static inline int -gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name, - unsigned int gpio_offset, unsigned int pin_offset, - unsigned int npins) -{ - WARN_ON(1); - return -EINVAL; -} - -static inline int -gpiochip_add_pingroup_range(struct gpio_chip *chip, - struct pinctrl_dev *pctldev, - unsigned int gpio_offset, const char *pin_group) -{ - WARN_ON(1); - return -EINVAL; -} - -static inline void -gpiochip_remove_pin_ranges(struct gpio_chip *chip) -{ - WARN_ON(1); -} - static inline int devm_gpio_request(struct device *dev, unsigned gpio, const char *label) { diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index b0fc6b26bdf5..83df1ec6664e 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -105,8 +105,7 @@ struct rdma_restrack_entry { }; int rdma_restrack_count(struct ib_device *dev, - enum rdma_restrack_type type, - struct pid_namespace *ns); + enum rdma_restrack_type type); void rdma_restrack_kadd(struct rdma_restrack_entry *res); void rdma_restrack_uadd(struct rdma_restrack_entry *res); diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 2bd410f934b3..69cfb4345388 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -230,9 +230,7 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages, */ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) { - int node = dev ? dev_to_node(dev) : NUMA_NO_NODE; - size_t count = PAGE_ALIGN(size) >> PAGE_SHIFT; - size_t align = get_order(PAGE_ALIGN(size)); + size_t count = size >> PAGE_SHIFT; struct page *page = NULL; struct cma *cma = NULL; @@ -243,14 +241,12 @@ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) /* CMA can be used only in the context which permits sleeping */ if (cma && gfpflags_allow_blocking(gfp)) { + size_t align = get_order(size); size_t cma_align = min_t(size_t, align, CONFIG_CMA_ALIGNMENT); page = cma_alloc(cma, count, cma_align, gfp & __GFP_NOWARN); } - /* Fallback allocation of normal pages */ - if (!page) - page = alloc_pages_node(node, gfp, align); return page; } diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 795c9b095d75..706113c6bebc 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -85,6 +85,8 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { + size_t alloc_size = PAGE_ALIGN(size); + int node = dev_to_node(dev); struct page *page = NULL; u64 phys_mask; @@ -95,8 +97,14 @@ struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, gfp &= ~__GFP_ZERO; gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, &phys_mask); + page = dma_alloc_contiguous(dev, alloc_size, gfp); + if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { + dma_free_contiguous(dev, page, alloc_size); + page = NULL; + } again: - page = dma_alloc_contiguous(dev, size, gfp); + if (!page) + page = alloc_pages_node(node, gfp, get_order(alloc_size)); if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { dma_free_contiguous(dev, page, size); page = NULL; diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 9484e88dabc2..9be995fc3c5a 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -295,6 +295,18 @@ static void irq_sysfs_add(int irq, struct irq_desc *desc) } } +static void irq_sysfs_del(struct irq_desc *desc) +{ + /* + * If irq_sysfs_init() has not yet been invoked (early boot), then + * irq_kobj_base is NULL and the descriptor was never added. + * kobject_del() complains about a object with no parent, so make + * it conditional. + */ + if (irq_kobj_base) + kobject_del(&desc->kobj); +} + static int __init irq_sysfs_init(void) { struct irq_desc *desc; @@ -325,6 +337,7 @@ static struct kobj_type irq_kobj_type = { }; static void irq_sysfs_add(int irq, struct irq_desc *desc) {} +static void irq_sysfs_del(struct irq_desc *desc) {} #endif /* CONFIG_SYSFS */ @@ -438,7 +451,7 @@ static void free_desc(unsigned int irq) * The sysfs entry must be serialized against a concurrent * irq_sysfs_init() as well. */ - kobject_del(&desc->kobj); + irq_sysfs_del(desc); delete_irq_desc(irq); /* diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 9873fc627d61..d9770a5393c8 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -470,6 +470,7 @@ static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer); */ static void do_optimize_kprobes(void) { + lockdep_assert_held(&text_mutex); /* * The optimization/unoptimization refers online_cpus via * stop_machine() and cpu-hotplug modifies online_cpus. @@ -487,9 +488,7 @@ static void do_optimize_kprobes(void) list_empty(&optimizing_list)) return; - mutex_lock(&text_mutex); arch_optimize_kprobes(&optimizing_list); - mutex_unlock(&text_mutex); } /* @@ -500,6 +499,7 @@ static void do_unoptimize_kprobes(void) { struct optimized_kprobe *op, *tmp; + lockdep_assert_held(&text_mutex); /* See comment in do_optimize_kprobes() */ lockdep_assert_cpus_held(); @@ -507,7 +507,6 @@ static void do_unoptimize_kprobes(void) if (list_empty(&unoptimizing_list)) return; - mutex_lock(&text_mutex); arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); /* Loop free_list for disarming */ list_for_each_entry_safe(op, tmp, &freeing_list, list) { @@ -524,7 +523,6 @@ static void do_unoptimize_kprobes(void) } else list_del_init(&op->list); } - mutex_unlock(&text_mutex); } /* Reclaim all kprobes on the free_list */ @@ -556,6 +554,7 @@ static void kprobe_optimizer(struct work_struct *work) { mutex_lock(&kprobe_mutex); cpus_read_lock(); + mutex_lock(&text_mutex); /* Lock modules while optimizing kprobes */ mutex_lock(&module_mutex); @@ -583,6 +582,7 @@ static void kprobe_optimizer(struct work_struct *work) do_free_cleaned_kprobes(); mutex_unlock(&module_mutex); + mutex_unlock(&text_mutex); cpus_read_unlock(); mutex_unlock(&kprobe_mutex); diff --git a/kernel/module.c b/kernel/module.c index 5933395af9a0..9ee93421269c 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -65,9 +65,9 @@ /* * Modules' sections will be aligned on page boundaries * to ensure complete separation of code and data, but - * only when CONFIG_STRICT_MODULE_RWX=y + * only when CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y */ -#ifdef CONFIG_STRICT_MODULE_RWX +#ifdef CONFIG_ARCH_HAS_STRICT_MODULE_RWX # define debug_align(X) ALIGN(X, PAGE_SIZE) #else # define debug_align(X) (X) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2b037f195473..010d578118d6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3904,7 +3904,7 @@ void __noreturn do_task_dead(void) static inline void sched_submit_work(struct task_struct *tsk) { - if (!tsk->state || tsk_is_pi_blocked(tsk)) + if (!tsk->state) return; /* @@ -3920,6 +3920,9 @@ static inline void sched_submit_work(struct task_struct *tsk) preempt_enable_no_resched(); } + if (tsk_is_pi_blocked(tsk)) + return; + /* * If we are going to sleep and we have plugged IO queued, * make sure to submit it to avoid deadlocks. diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 23fbbcc414d5..6e52b67b420e 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1131,7 +1131,15 @@ static void psi_trigger_destroy(struct kref *ref) * deadlock while waiting for psi_poll_work to acquire trigger_lock */ if (kworker_to_destroy) { + /* + * After the RCU grace period has expired, the worker + * can no longer be found through group->poll_kworker. + * But it might have been already scheduled before + * that - deschedule it cleanly before destroying it. + */ kthread_cancel_delayed_work_sync(&group->poll_work); + atomic_set(&group->poll_scheduled, 0); + kthread_destroy_worker(kworker_to_destroy); } kfree(t); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 738065f765ab..de1f15969e27 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -32,6 +32,7 @@ #include <linux/shmem_fs.h> #include <linux/oom.h> #include <linux/numa.h> +#include <linux/page_owner.h> #include <asm/tlb.h> #include <asm/pgalloc.h> @@ -2516,6 +2517,9 @@ static void __split_huge_page(struct page *page, struct list_head *list, } ClearPageCompound(head); + + split_page_owner(head, HPAGE_PMD_ORDER); + /* See comment in __split_huge_page_tail() */ if (PageAnon(head)) { /* Additional pin to swap cache */ diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 2277b82902d8..95d16a42db6b 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -407,8 +407,14 @@ static inline bool shadow_invalid(u8 tag, s8 shadow_byte) if (IS_ENABLED(CONFIG_KASAN_GENERIC)) return shadow_byte < 0 || shadow_byte >= KASAN_SHADOW_SCALE_SIZE; - else - return tag != (u8)shadow_byte; + + /* else CONFIG_KASAN_SW_TAGS: */ + if ((u8)shadow_byte == KASAN_TAG_INVALID) + return true; + if ((tag != KASAN_TAG_KERNEL) && (tag != (u8)shadow_byte)) + return true; + + return false; } static bool __kasan_slab_free(struct kmem_cache *cache, void *object, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6f5c0c517c49..26e2999af608 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3260,6 +3260,60 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, } } +static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg) +{ + unsigned long stat[MEMCG_NR_STAT]; + struct mem_cgroup *mi; + int node, cpu, i; + + for (i = 0; i < MEMCG_NR_STAT; i++) + stat[i] = 0; + + for_each_online_cpu(cpu) + for (i = 0; i < MEMCG_NR_STAT; i++) + stat[i] += raw_cpu_read(memcg->vmstats_percpu->stat[i]); + + for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) + for (i = 0; i < MEMCG_NR_STAT; i++) + atomic_long_add(stat[i], &mi->vmstats[i]); + + for_each_node(node) { + struct mem_cgroup_per_node *pn = memcg->nodeinfo[node]; + struct mem_cgroup_per_node *pi; + + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) + stat[i] = 0; + + for_each_online_cpu(cpu) + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) + stat[i] += raw_cpu_read( + pn->lruvec_stat_cpu->count[i]); + + for (pi = pn; pi; pi = parent_nodeinfo(pi, node)) + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) + atomic_long_add(stat[i], &pi->lruvec_stat[i]); + } +} + +static void memcg_flush_percpu_vmevents(struct mem_cgroup *memcg) +{ + unsigned long events[NR_VM_EVENT_ITEMS]; + struct mem_cgroup *mi; + int cpu, i; + + for (i = 0; i < NR_VM_EVENT_ITEMS; i++) + events[i] = 0; + + for_each_online_cpu(cpu) + for (i = 0; i < NR_VM_EVENT_ITEMS; i++) + events[i] += raw_cpu_read( + memcg->vmstats_percpu->events[i]); + + for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) + for (i = 0; i < NR_VM_EVENT_ITEMS; i++) + atomic_long_add(events[i], &mi->vmevents[i]); +} + #ifdef CONFIG_MEMCG_KMEM static int memcg_online_kmem(struct mem_cgroup *memcg) { @@ -4682,6 +4736,12 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) { int node; + /* + * Flush percpu vmstats and vmevents to guarantee the value correctness + * on parent's and all ancestor levels. + */ + memcg_flush_percpu_vmstats(memcg); + memcg_flush_percpu_vmevents(memcg); for_each_node(node) free_mem_cgroup_per_node_info(memcg, node); free_percpu(memcg->vmstats_percpu); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 272c6de1bf4e..9c9194959271 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2238,27 +2238,12 @@ static int move_freepages(struct zone *zone, unsigned int order; int pages_moved = 0; -#ifndef CONFIG_HOLES_IN_ZONE - /* - * page_zone is not safe to call in this context when - * CONFIG_HOLES_IN_ZONE is set. This bug check is probably redundant - * anyway as we check zone boundaries in move_freepages_block(). - * Remove at a later date when no bug reports exist related to - * grouping pages by mobility - */ - VM_BUG_ON(pfn_valid(page_to_pfn(start_page)) && - pfn_valid(page_to_pfn(end_page)) && - page_zone(start_page) != page_zone(end_page)); -#endif for (page = start_page; page <= end_page;) { if (!pfn_valid_within(page_to_pfn(page))) { page++; continue; } - /* Make sure we are not inadvertently changing nodes */ - VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page); - if (!PageBuddy(page)) { /* * We assume that pages that could be isolated for @@ -2273,6 +2258,10 @@ static int move_freepages(struct zone *zone, continue; } + /* Make sure we are not inadvertently changing nodes */ + VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page); + VM_BUG_ON_PAGE(page_zone(page) != zone, page); + order = page_order(page); move_to_free_area(page, &zone->free_area[order], migratetype); page += 1 << order; diff --git a/mm/z3fold.c b/mm/z3fold.c index ed19d98c9dcd..e31cd9bd4ed5 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -41,6 +41,7 @@ #include <linux/workqueue.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/wait.h> #include <linux/zpool.h> #include <linux/magic.h> @@ -145,6 +146,8 @@ struct z3fold_header { * @release_wq: workqueue for safe page release * @work: work_struct for safe page release * @inode: inode for z3fold pseudo filesystem + * @destroying: bool to stop migration once we start destruction + * @isolated: int to count the number of pages currently in isolation * * This structure is allocated at pool creation time and maintains metadata * pertaining to a particular z3fold pool. @@ -163,8 +166,11 @@ struct z3fold_pool { const struct zpool_ops *zpool_ops; struct workqueue_struct *compact_wq; struct workqueue_struct *release_wq; + struct wait_queue_head isolate_wait; struct work_struct work; struct inode *inode; + bool destroying; + int isolated; }; /* @@ -769,6 +775,7 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp, goto out_c; spin_lock_init(&pool->lock); spin_lock_init(&pool->stale_lock); + init_waitqueue_head(&pool->isolate_wait); pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2); if (!pool->unbuddied) goto out_pool; @@ -808,6 +815,15 @@ out: return NULL; } +static bool pool_isolated_are_drained(struct z3fold_pool *pool) +{ + bool ret; + + spin_lock(&pool->lock); + ret = pool->isolated == 0; + spin_unlock(&pool->lock); + return ret; +} /** * z3fold_destroy_pool() - destroys an existing z3fold pool * @pool: the z3fold pool to be destroyed @@ -817,6 +833,22 @@ out: static void z3fold_destroy_pool(struct z3fold_pool *pool) { kmem_cache_destroy(pool->c_handle); + /* + * We set pool-> destroying under lock to ensure that + * z3fold_page_isolate() sees any changes to destroying. This way we + * avoid the need for any memory barriers. + */ + + spin_lock(&pool->lock); + pool->destroying = true; + spin_unlock(&pool->lock); + + /* + * We need to ensure that no pages are being migrated while we destroy + * these workqueues, as migration can queue work on either of the + * workqueues. + */ + wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool)); /* * We need to destroy pool->compact_wq before pool->release_wq, @@ -1307,6 +1339,28 @@ static u64 z3fold_get_pool_size(struct z3fold_pool *pool) return atomic64_read(&pool->pages_nr); } +/* + * z3fold_dec_isolated() expects to be called while pool->lock is held. + */ +static void z3fold_dec_isolated(struct z3fold_pool *pool) +{ + assert_spin_locked(&pool->lock); + VM_BUG_ON(pool->isolated <= 0); + pool->isolated--; + + /* + * If we have no more isolated pages, we have to see if + * z3fold_destroy_pool() is waiting for a signal. + */ + if (pool->isolated == 0 && waitqueue_active(&pool->isolate_wait)) + wake_up_all(&pool->isolate_wait); +} + +static void z3fold_inc_isolated(struct z3fold_pool *pool) +{ + pool->isolated++; +} + static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) { struct z3fold_header *zhdr; @@ -1333,6 +1387,33 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) spin_lock(&pool->lock); if (!list_empty(&page->lru)) list_del(&page->lru); + /* + * We need to check for destruction while holding pool->lock, as + * otherwise destruction could see 0 isolated pages, and + * proceed. + */ + if (unlikely(pool->destroying)) { + spin_unlock(&pool->lock); + /* + * If this page isn't stale, somebody else holds a + * reference to it. Let't drop our refcount so that they + * can call the release logic. + */ + if (unlikely(kref_put(&zhdr->refcount, + release_z3fold_page_locked))) { + /* + * If we get here we have kref problems, so we + * should freak out. + */ + WARN(1, "Z3fold is experiencing kref problems\n"); + return false; + } + z3fold_page_unlock(zhdr); + return false; + } + + + z3fold_inc_isolated(pool); spin_unlock(&pool->lock); z3fold_page_unlock(zhdr); return true; @@ -1401,6 +1482,10 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); + spin_lock(&pool->lock); + z3fold_dec_isolated(pool); + spin_unlock(&pool->lock); + page_mapcount_reset(page); put_page(page); return 0; @@ -1420,10 +1505,14 @@ static void z3fold_page_putback(struct page *page) INIT_LIST_HEAD(&page->lru); if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { atomic64_dec(&pool->pages_nr); + spin_lock(&pool->lock); + z3fold_dec_isolated(pool); + spin_unlock(&pool->lock); return; } spin_lock(&pool->lock); list_add(&page->lru, &pool->lru); + z3fold_dec_isolated(pool); spin_unlock(&pool->lock); z3fold_page_unlock(zhdr); } diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 57fbb7ced69f..08def3a0d200 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -54,6 +54,7 @@ #include <linux/mount.h> #include <linux/pseudo_fs.h> #include <linux/migrate.h> +#include <linux/wait.h> #include <linux/pagemap.h> #include <linux/fs.h> @@ -268,6 +269,10 @@ struct zs_pool { #ifdef CONFIG_COMPACTION struct inode *inode; struct work_struct free_work; + /* A wait queue for when migration races with async_free_zspage() */ + struct wait_queue_head migration_wait; + atomic_long_t isolated_pages; + bool destroying; #endif }; @@ -1862,6 +1867,31 @@ static void dec_zspage_isolation(struct zspage *zspage) zspage->isolated--; } +static void putback_zspage_deferred(struct zs_pool *pool, + struct size_class *class, + struct zspage *zspage) +{ + enum fullness_group fg; + + fg = putback_zspage(class, zspage); + if (fg == ZS_EMPTY) + schedule_work(&pool->free_work); + +} + +static inline void zs_pool_dec_isolated(struct zs_pool *pool) +{ + VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0); + atomic_long_dec(&pool->isolated_pages); + /* + * There's no possibility of racing, since wait_for_isolated_drain() + * checks the isolated count under &class->lock after enqueuing + * on migration_wait. + */ + if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying) + wake_up_all(&pool->migration_wait); +} + static void replace_sub_page(struct size_class *class, struct zspage *zspage, struct page *newpage, struct page *oldpage) { @@ -1931,6 +1961,7 @@ static bool zs_page_isolate(struct page *page, isolate_mode_t mode) */ if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) { get_zspage_mapping(zspage, &class_idx, &fullness); + atomic_long_inc(&pool->isolated_pages); remove_zspage(class, zspage, fullness); } @@ -2030,8 +2061,16 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage, * Page migration is done so let's putback isolated zspage to * the list if @page is final isolated subpage in the zspage. */ - if (!is_zspage_isolated(zspage)) - putback_zspage(class, zspage); + if (!is_zspage_isolated(zspage)) { + /* + * We cannot race with zs_destroy_pool() here because we wait + * for isolation to hit zero before we start destroying. + * Also, we ensure that everyone can see pool->destroying before + * we start waiting. + */ + putback_zspage_deferred(pool, class, zspage); + zs_pool_dec_isolated(pool); + } reset_page(page); put_page(page); @@ -2077,13 +2116,12 @@ static void zs_page_putback(struct page *page) spin_lock(&class->lock); dec_zspage_isolation(zspage); if (!is_zspage_isolated(zspage)) { - fg = putback_zspage(class, zspage); /* * Due to page_lock, we cannot free zspage immediately * so let's defer. */ - if (fg == ZS_EMPTY) - schedule_work(&pool->free_work); + putback_zspage_deferred(pool, class, zspage); + zs_pool_dec_isolated(pool); } spin_unlock(&class->lock); } @@ -2107,8 +2145,36 @@ static int zs_register_migration(struct zs_pool *pool) return 0; } +static bool pool_isolated_are_drained(struct zs_pool *pool) +{ + return atomic_long_read(&pool->isolated_pages) == 0; +} + +/* Function for resolving migration */ +static void wait_for_isolated_drain(struct zs_pool *pool) +{ + + /* + * We're in the process of destroying the pool, so there are no + * active allocations. zs_page_isolate() fails for completely free + * zspages, so we need only wait for the zs_pool's isolated + * count to hit zero. + */ + wait_event(pool->migration_wait, + pool_isolated_are_drained(pool)); +} + static void zs_unregister_migration(struct zs_pool *pool) { + pool->destroying = true; + /* + * We need a memory barrier here to ensure global visibility of + * pool->destroying. Thus pool->isolated pages will either be 0 in which + * case we don't care, or it will be > 0 and pool->destroying will + * ensure that we wake up once isolation hits 0. + */ + smp_mb(); + wait_for_isolated_drain(pool); /* This can block */ flush_work(&pool->free_work); iput(pool->inode); } @@ -2346,6 +2412,8 @@ struct zs_pool *zs_create_pool(const char *name) if (!pool->name) goto err; + init_waitqueue_head(&pool->migration_wait); + if (create_cache(pool)) goto err; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 0b2df09b2554..78ae6e8c953d 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1496,7 +1496,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, struct ceph_osds up, acting; bool force_resend = false; bool unpaused = false; - bool legacy_change; + bool legacy_change = false; bool split = false; bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE); bool recovery_deletes = ceph_osdmap_flag(osdc, @@ -1584,15 +1584,14 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, t->osd = acting.primary; } - if (unpaused || legacy_change || force_resend || - (split && con && CEPH_HAVE_FEATURE(con->peer_features, - RESEND_ON_SPLIT))) + if (unpaused || legacy_change || force_resend || split) ct_res = CALC_TARGET_NEED_RESEND; else ct_res = CALC_TARGET_NO_ACTION; out: - dout("%s t %p -> ct_res %d osd %d\n", __func__, t, ct_res, t->osd); + dout("%s t %p -> %d%d%d%d ct_res %d osd%d\n", __func__, t, unpaused, + legacy_change, force_resend, split, ct_res, t->osd); return ct_res; } diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index f5597503c771..e9ef4ca6a655 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -809,7 +809,7 @@ kvp_get_ip_info(int family, char *if_name, int op, int sn_offset = 0; int error = 0; char *buffer; - struct hv_kvp_ipaddr_value *ip_buffer; + struct hv_kvp_ipaddr_value *ip_buffer = NULL; char cidr_mask[5]; /* /xyz */ int weight; int i; diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c index a8a6a0c883f1..6af5c91337f2 100644 --- a/virt/kvm/arm/mmio.c +++ b/virt/kvm/arm/mmio.c @@ -86,6 +86,12 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) unsigned int len; int mask; + /* Detect an already handled MMIO return */ + if (unlikely(!vcpu->mmio_needed)) + return 0; + + vcpu->mmio_needed = 0; + if (!run->mmio.is_write) { len = run->mmio.len; if (len > sizeof(unsigned long)) @@ -188,6 +194,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, run->mmio.is_write = is_write; run->mmio.phys_addr = fault_ipa; run->mmio.len = len; + vcpu->mmio_needed = 1; if (!ret) { /* We handled the access successfully in the kernel. */ diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index bdbc297d06fb..e621b5d45b27 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -8,6 +8,7 @@ #include <linux/cpu.h> #include <linux/kvm_host.h> #include <kvm/arm_vgic.h> +#include <asm/kvm_emulate.h> #include <asm/kvm_mmu.h> #include "vgic.h" @@ -164,12 +165,18 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) irq->vcpu = NULL; irq->target_vcpu = vcpu0; kref_init(&irq->refcount); - if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) { + switch (dist->vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V2: irq->targets = 0; irq->group = 0; - } else { + break; + case KVM_DEV_TYPE_ARM_VGIC_V3: irq->mpidr = 0; irq->group = 1; + break; + default: + kfree(dist->spis); + return -EINVAL; } } return 0; @@ -209,7 +216,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) irq->intid = i; irq->vcpu = NULL; irq->target_vcpu = vcpu; - irq->targets = 1U << vcpu->vcpu_id; kref_init(&irq->refcount); if (vgic_irq_is_sgi(i)) { /* SGIs */ @@ -219,11 +225,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) /* PPIs */ irq->config = VGIC_CONFIG_LEVEL; } - - if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) - irq->group = 1; - else - irq->group = 0; } if (!irqchip_in_kernel(vcpu->kvm)) @@ -286,10 +287,19 @@ int vgic_init(struct kvm *kvm) for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; - if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) + switch (dist->vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V3: irq->group = 1; - else + irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu); + break; + case KVM_DEV_TYPE_ARM_VGIC_V2: irq->group = 0; + irq->targets = 1U << idx; + break; + default: + ret = -EINVAL; + goto out; + } } } |