diff options
44 files changed, 805 insertions, 391 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 53cf3971dfc3..61360e27715f 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -32,5 +32,4 @@ config DRM_AMDGPU_GART_DEBUGFS Selecting this option creates a debugfs file to inspect the mapped pages. Uses more memory for housekeeping, enable only for debugging. -source "drivers/gpu/drm/amd/powerplay/Kconfig" source "drivers/gpu/drm/amd/acp/Kconfig" diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 9ec262d4b8a2..248a05d02917 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -111,14 +111,10 @@ amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o -ifneq ($(CONFIG_DRM_AMD_POWERPLAY),) - include $(FULL_AMD_PATH)/powerplay/Makefile amdgpu-y += $(AMD_POWERPLAY_FILES) -endif - obj-$(CONFIG_DRM_AMDGPU)+= amdgpu.o CFLAGS_amdgpu_trace_points.o := -I$(src) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 235f3902643a..039b57e4644c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1943,6 +1943,7 @@ struct amdgpu_ip_block_status { bool valid; bool sw; bool hw; + bool late_initialized; bool hang; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 22c11e7698c8..2e3a0543760d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -769,8 +769,10 @@ static void amdgpu_connector_destroy(struct drm_connector *connector) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); - if (amdgpu_connector->ddc_bus->has_aux) + if (amdgpu_connector->ddc_bus->has_aux) { drm_dp_aux_unregister(&amdgpu_connector->ddc_bus->aux); + amdgpu_connector->ddc_bus->has_aux = false; + } amdgpu_connector_free_edid(connector); kfree(amdgpu_connector->con_priv); drm_connector_unregister(connector); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 99a15cad6789..a58513f271e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1424,6 +1424,7 @@ static int amdgpu_late_init(struct amdgpu_device *adev) DRM_ERROR("late_init of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); return r; } + adev->ip_block_status[i].late_initialized = true; } } @@ -1469,8 +1470,11 @@ static int amdgpu_fini(struct amdgpu_device *adev) } for (i = adev->num_ip_blocks - 1; i >= 0; i--) { + if (!adev->ip_block_status[i].late_initialized) + continue; if (adev->ip_blocks[i].funcs->late_fini) adev->ip_blocks[i].funcs->late_fini((void *)adev); + adev->ip_block_status[i].late_initialized = false; } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index dbe89fb25694..71ed27eb3dde 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -174,7 +174,6 @@ module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444); MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)"); module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444); -#ifdef CONFIG_DRM_AMD_POWERPLAY MODULE_PARM_DESC(powerplay, "Powerplay component (1 = enable, 0 = disable, -1 = auto (default))"); module_param_named(powerplay, amdgpu_powerplay, int, 0444); @@ -183,7 +182,6 @@ module_param_named(powercontainment, amdgpu_powercontainment, int, 0444); MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))"); module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, int, 0444); -#endif MODULE_PARM_DESC(sclkdeepsleep, "SCLK Deep Sleep (1 = enable (default), 0 = disable)"); module_param_named(sclkdeepsleep, amdgpu_sclk_deep_sleep_en, int, 0444); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c index 34bab616588c..91d367399956 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c @@ -220,6 +220,7 @@ void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c) { if (!i2c) return; + WARN_ON(i2c->has_aux); i2c_del_adapter(&i2c->adapter); kfree(i2c); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 68ad24101a36..7532ff822aa7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -42,7 +42,6 @@ static int amdgpu_powerplay_init(struct amdgpu_device *adev) amd_pp = &(adev->powerplay); if (adev->pp_enabled) { -#ifdef CONFIG_DRM_AMD_POWERPLAY struct amd_pp_init *pp_init; pp_init = kzalloc(sizeof(struct amd_pp_init), GFP_KERNEL); @@ -55,7 +54,6 @@ static int amdgpu_powerplay_init(struct amdgpu_device *adev) pp_init->device = amdgpu_cgs_create_device(adev); ret = amd_powerplay_init(pp_init, amd_pp); kfree(pp_init); -#endif } else { amd_pp->pp_handle = (void *)adev; @@ -97,7 +95,6 @@ static int amdgpu_pp_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret = 0; -#ifdef CONFIG_DRM_AMD_POWERPLAY switch (adev->asic_type) { case CHIP_POLARIS11: case CHIP_POLARIS10: @@ -120,9 +117,6 @@ static int amdgpu_pp_early_init(void *handle) adev->pp_enabled = false; break; } -#else - adev->pp_enabled = false; -#endif ret = amdgpu_powerplay_init(adev); if (ret) @@ -144,12 +138,11 @@ static int amdgpu_pp_late_init(void *handle) ret = adev->powerplay.ip_funcs->late_init( adev->powerplay.pp_handle); -#ifdef CONFIG_DRM_AMD_POWERPLAY if (adev->pp_enabled && adev->pm.dpm_enabled) { amdgpu_pm_sysfs_init(adev); amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_COMPLETE_INIT, NULL, NULL); } -#endif + return ret; } @@ -162,10 +155,8 @@ static int amdgpu_pp_sw_init(void *handle) ret = adev->powerplay.ip_funcs->sw_init( adev->powerplay.pp_handle); -#ifdef CONFIG_DRM_AMD_POWERPLAY if (adev->pp_enabled) adev->pm.dpm_enabled = true; -#endif return ret; } @@ -216,7 +207,6 @@ static int amdgpu_pp_hw_fini(void *handle) static void amdgpu_pp_late_fini(void *handle) { -#ifdef CONFIG_DRM_AMD_POWERPLAY struct amdgpu_device *adev = (struct amdgpu_device *)handle; if (adev->pp_enabled) { @@ -227,7 +217,6 @@ static void amdgpu_pp_late_fini(void *handle) if (adev->powerplay.ip_funcs->late_fini) adev->powerplay.ip_funcs->late_fini( adev->powerplay.pp_handle); -#endif } static int amdgpu_pp_suspend(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 7a05f79818f1..cb3d252f3c78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -273,7 +273,6 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) amdgpu_bo_unreserve(*bo); - fw_offset = 0; for (i = 0; i < AMDGPU_UCODE_ID_MAXIMUM; i++) { ucode = &adev->firmware.ucode[i]; if (ucode->fw) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 3b03558ddb01..7fe8fd884f06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -699,6 +699,20 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) case 0x05000009: /* clock table */ break; + case 0x0500000c: /* hw config */ + switch (p->adev->asic_type) { +#ifdef CONFIG_DRM_AMDGPU_CIK + case CHIP_KAVERI: + case CHIP_MULLINS: +#endif + case CHIP_CARRIZO: + break; + default: + r = -EINVAL; + goto out; + } + break; + case 0x03000001: /* encode */ r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9, *size, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index bc4b22c6fc08..06f24322e7c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -878,13 +878,13 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, * allocation size to the fragment size. */ - const uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG; + /* SI and newer are optimized for 64KB */ + uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG); + uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG; uint64_t frag_start = ALIGN(start, frag_align); uint64_t frag_end = end & ~(frag_align - 1); - uint32_t frag; - /* system pages are non continuously */ if (params->src || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) { @@ -893,10 +893,6 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, return; } - /* use more than 64KB fragment size if possible */ - frag = lower_32_bits(frag_start | frag_end); - frag = likely(frag) ? __ffs(frag) : 31; - /* handle the 4K area at the beginning */ if (start != frag_start) { amdgpu_vm_update_ptes(params, vm, start, frag_start, @@ -906,7 +902,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, /* handle the area in the middle */ amdgpu_vm_update_ptes(params, vm, frag_start, frag_end, dst, - flags | AMDGPU_PTE_FRAG(frag)); + flags | frag_flags); /* handle the 4K area at the end */ if (frag_end != end) { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 678f5eb6cbc2..f264b8f17ad1 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -3159,6 +3159,7 @@ static int dce_v11_0_sw_fini(void *handle) dce_v11_0_afmt_fini(adev); + drm_mode_config_cleanup(adev->ddev); adev->mode_info.mode_config_initialized = false; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index a754f2522ba2..c2bd9f045532 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -99,15 +99,15 @@ static void dce_virtual_stop_mc_access(struct amdgpu_device *adev, struct amdgpu_mode_mc_save *save) { switch (adev->asic_type) { +#ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_BONAIRE: case CHIP_HAWAII: case CHIP_KAVERI: case CHIP_KABINI: case CHIP_MULLINS: -#ifdef CONFIG_DRM_AMDGPU_CIK dce_v8_0_disable_dce(adev); -#endif break; +#endif case CHIP_FIJI: case CHIP_TONGA: dce_v10_0_disable_dce(adev); diff --git a/drivers/gpu/drm/amd/powerplay/Kconfig b/drivers/gpu/drm/amd/powerplay/Kconfig deleted file mode 100644 index af380335b425..000000000000 --- a/drivers/gpu/drm/amd/powerplay/Kconfig +++ /dev/null @@ -1,6 +0,0 @@ -config DRM_AMD_POWERPLAY - bool "Enable AMD powerplay component" - depends on DRM_AMDGPU - default n - help - select this option will enable AMD powerplay component. diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 7f4e8adec8a8..bfb2efd8d4d4 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1786,15 +1786,6 @@ void i915_reset(struct drm_i915_private *dev_priv) goto error; } - /* - * rps/rc6 re-init is necessary to restore state lost after the - * reset and the re-install of gt irqs. Skip for ironlake per - * previous concerns that it doesn't respond well to some forms - * of re-init after reset. - */ - intel_sanitize_gt_powersave(dev_priv); - intel_autoenable_gt_powersave(dev_priv); - wakeup: wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS); return; @@ -1872,7 +1863,17 @@ static int i915_pm_resume(struct device *kdev) /* freeze: before creating the hibernation_image */ static int i915_pm_freeze(struct device *kdev) { - return i915_pm_suspend(kdev); + int ret; + + ret = i915_pm_suspend(kdev); + if (ret) + return ret; + + ret = i915_gem_freeze(kdev_to_i915(kdev)); + if (ret) + return ret; + + return 0; } static int i915_pm_freeze_late(struct device *kdev) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4dd307ed4336..8b9ee4e390c0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1984,11 +1984,11 @@ struct drm_i915_private { struct vlv_s0ix_state vlv_s0ix_state; enum { - I915_SKL_SAGV_UNKNOWN = 0, - I915_SKL_SAGV_DISABLED, - I915_SKL_SAGV_ENABLED, - I915_SKL_SAGV_NOT_CONTROLLED - } skl_sagv_status; + I915_SAGV_UNKNOWN = 0, + I915_SAGV_DISABLED, + I915_SAGV_ENABLED, + I915_SAGV_NOT_CONTROLLED + } sagv_status; struct { /* @@ -2276,21 +2276,19 @@ struct drm_i915_gem_object { /** Record of address bit 17 of each page at last unbind. */ unsigned long *bit_17; - union { - /** for phy allocated objects */ - struct drm_dma_handle *phys_handle; - - struct i915_gem_userptr { - uintptr_t ptr; - unsigned read_only :1; - unsigned workers :4; + struct i915_gem_userptr { + uintptr_t ptr; + unsigned read_only :1; + unsigned workers :4; #define I915_GEM_USERPTR_MAX_WORKERS 15 - struct i915_mm_struct *mm; - struct i915_mmu_object *mmu_object; - struct work_struct *work; - } userptr; - }; + struct i915_mm_struct *mm; + struct i915_mmu_object *mmu_object; + struct work_struct *work; + } userptr; + + /** for phys allocated objects */ + struct drm_dma_handle *phys_handle; }; static inline struct drm_i915_gem_object * @@ -3076,6 +3074,7 @@ int i915_gem_wait_ioctl(struct drm_device *dev, void *data, void i915_gem_load_init(struct drm_device *dev); void i915_gem_load_cleanup(struct drm_device *dev); void i915_gem_load_init_fences(struct drm_i915_private *dev_priv); +int i915_gem_freeze(struct drm_i915_private *dev_priv); int i915_gem_freeze_late(struct drm_i915_private *dev_priv); void *i915_gem_object_alloc(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2c8106758922..1418c1c522cb 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2616,8 +2616,6 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) list_for_each_entry_continue(request, &engine->request_list, link) if (request->ctx == incomplete_ctx) reset_request(request); - - engine->i915->gt.active_engines &= ~intel_engine_flag(engine); } void i915_gem_reset(struct drm_i915_private *dev_priv) @@ -2628,9 +2626,15 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv) i915_gem_reset_engine(engine); - mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0); i915_gem_restore_fences(&dev_priv->drm); + + if (dev_priv->gt.awake) { + intel_sanitize_gt_powersave(dev_priv); + intel_enable_gt_powersave(dev_priv); + if (INTEL_GEN(dev_priv) >= 6) + gen6_rps_busy(dev_priv); + } } static void nop_submit_request(struct drm_i915_gem_request *request) @@ -4589,6 +4593,19 @@ void i915_gem_load_cleanup(struct drm_device *dev) rcu_barrier(); } +int i915_gem_freeze(struct drm_i915_private *dev_priv) +{ + intel_runtime_pm_get(dev_priv); + + mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_shrink_all(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + + intel_runtime_pm_put(dev_priv); + + return 0; +} + int i915_gem_freeze_late(struct drm_i915_private *dev_priv) { struct drm_i915_gem_object *obj; @@ -4612,7 +4629,8 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) * the objects as well. */ - i915_gem_shrink_all(dev_priv); + mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND); for (p = phases; *p; p++) { list_for_each_entry(obj, *p, global_list) { @@ -4620,6 +4638,7 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) obj->base.write_domain = I915_GEM_DOMAIN_CPU; } } + mutex_unlock(&dev_priv->drm.struct_mutex); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 33c85227643d..222796f5afb2 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -451,8 +451,8 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, 0, ggtt->mappable_end, DRM_MM_SEARCH_DEFAULT, DRM_MM_CREATE_DEFAULT); - if (ret) - return ERR_PTR(ret); + if (ret) /* no inactive aperture space, use cpu reloc */ + return NULL; } else { ret = i915_vma_put_fence(vma); if (ret) { diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 40978bc12ceb..8832f8ec1583 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -328,6 +328,7 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) switch (state) { case FENCE_COMPLETE: + request->engine->last_submitted_seqno = request->fence.seqno; request->engine->submit_request(request); break; @@ -641,8 +642,8 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) &request->submitq); request->emitted_jiffies = jiffies; - request->previous_seqno = engine->last_submitted_seqno; - engine->last_submitted_seqno = request->fence.seqno; + request->previous_seqno = engine->last_pending_seqno; + engine->last_pending_seqno = request->fence.seqno; i915_gem_active_set(&engine->last_request, request); list_add_tail(&request->link, &engine->request_list); list_add_tail(&request->ring_link, &ring->request_list); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 43358e18d34c..3106dcc06fe9 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -451,6 +451,18 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request) return ret; } +void i915_guc_wq_unreserve(struct drm_i915_gem_request *request) +{ + const size_t wqi_size = sizeof(struct guc_wq_item); + struct i915_guc_client *gc = request->i915->guc.execbuf_client; + + GEM_BUG_ON(READ_ONCE(gc->wq_rsvd) < wqi_size); + + spin_lock(&gc->wq_lock); + gc->wq_rsvd -= wqi_size; + spin_unlock(&gc->wq_lock); +} + /* Construct a Work Item and append it to the GuC's Work Queue */ static void guc_wq_item_append(struct i915_guc_client *gc, struct drm_i915_gem_request *rq) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c128fdbd24e4..3fc286cd1157 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -350,6 +350,9 @@ void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv) void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) { + if (READ_ONCE(dev_priv->rps.interrupts_enabled)) + return; + spin_lock_irq(&dev_priv->irq_lock); WARN_ON_ONCE(dev_priv->rps.pm_iir); WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); @@ -368,6 +371,9 @@ u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private *dev_priv, u32 mask) void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) { + if (!READ_ONCE(dev_priv->rps.interrupts_enabled)) + return; + spin_lock_irq(&dev_priv->irq_lock); dev_priv->rps.interrupts_enabled = false; @@ -2816,7 +2822,7 @@ semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr, if (engine == signaller) continue; - if (offset == signaller->semaphore.signal_ggtt[engine->id]) + if (offset == signaller->semaphore.signal_ggtt[engine->hw_id]) return signaller; } } else { @@ -2826,13 +2832,13 @@ semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr, if(engine == signaller) continue; - if (sync_bits == signaller->semaphore.mbox.wait[engine->id]) + if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id]) return signaller; } } - DRM_DEBUG_DRIVER("No signaller ring found for ring %i, ipehr 0x%08x, offset 0x%016llx\n", - engine->id, ipehr, offset); + DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x, offset 0x%016llx\n", + engine->name, ipehr, offset); return ERR_PTR(-ENODEV); } diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 9bad14d22c95..495611b7068d 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -578,6 +578,36 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) return 0; } +static void cancel_fake_irq(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + del_timer_sync(&b->hangcheck); + del_timer_sync(&b->fake_irq); + clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); +} + +void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + cancel_fake_irq(engine); + spin_lock(&b->lock); + + __intel_breadcrumbs_disable_irq(b); + if (intel_engine_has_waiter(engine)) { + b->timeout = wait_timeout(); + __intel_breadcrumbs_enable_irq(b); + if (READ_ONCE(b->irq_posted)) + wake_up_process(b->first_wait->tsk); + } else { + /* sanitize the IMR and unmask any auxiliary interrupts */ + irq_disable(engine); + } + + spin_unlock(&b->lock); +} + void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; @@ -585,8 +615,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) if (!IS_ERR_OR_NULL(b->signaler)) kthread_stop(b->signaler); - del_timer_sync(&b->hangcheck); - del_timer_sync(&b->fake_irq); + cancel_fake_irq(engine); } unsigned int intel_kick_waiters(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ad8d712ae84c..fbcfed63a76e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3408,6 +3408,8 @@ static void skylake_update_primary_plane(struct drm_plane *plane, dst_w--; dst_h--; + intel_crtc->dspaddr_offset = surf_addr; + intel_crtc->adjusted_x = src_x; intel_crtc->adjusted_y = src_y; @@ -3629,6 +3631,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) intel_runtime_pm_disable_interrupts(dev_priv); intel_runtime_pm_enable_interrupts(dev_priv); + intel_pps_unlock_regs_wa(dev_priv); intel_modeset_init_hw(dev); spin_lock_irq(&dev_priv->irq_lock); @@ -9509,6 +9512,24 @@ static void ironlake_compute_dpll(struct intel_crtc *intel_crtc, if (intel_crtc_has_dp_encoder(crtc_state)) dpll |= DPLL_SDVO_HIGH_SPEED; + /* + * The high speed IO clock is only really required for + * SDVO/HDMI/DP, but we also enable it for CRT to make it + * possible to share the DPLL between CRT and HDMI. Enabling + * the clock needlessly does no real harm, except use up a + * bit of power potentially. + * + * We'll limit this to IVB with 3 pipes, since it has only two + * DPLLs and so DPLL sharing is the only way to get three pipes + * driving PCH ports at the same time. On SNB we could do this, + * and potentially avoid enabling the second DPLL, but it's not + * clear if it''s a win or loss power wise. No point in doing + * this on ILK at all since it has a fixed DPLL<->pipe mapping. + */ + if (INTEL_INFO(dev_priv)->num_pipes == 3 && + intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG)) + dpll |= DPLL_SDVO_HIGH_SPEED; + /* compute bitmask from p1 value */ dpll |= (1 << (crtc_state->dpll.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT; /* also FPA1 */ @@ -14364,8 +14385,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) * SKL workaround: bspec recommends we disable the SAGV when we * have more then one pipe enabled */ - if (IS_SKYLAKE(dev_priv) && !skl_can_enable_sagv(state)) - skl_disable_sagv(dev_priv); + if (!intel_can_enable_sagv(state)) + intel_disable_sagv(dev_priv); intel_modeset_verify_disabled(dev); } @@ -14422,9 +14443,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) intel_modeset_verify_crtc(crtc, old_crtc_state, crtc->state); } - if (IS_SKYLAKE(dev_priv) && intel_state->modeset && - skl_can_enable_sagv(state)) - skl_enable_sagv(dev_priv); + if (intel_state->modeset && intel_can_enable_sagv(state)) + intel_enable_sagv(dev_priv); drm_atomic_helper_commit_hw_done(state); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index acd0c51f74d5..14a3cf0b7213 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4337,7 +4337,7 @@ intel_dp_unset_edid(struct intel_dp *intel_dp) intel_dp->has_audio = false; } -static void +static enum drm_connector_status intel_dp_long_pulse(struct intel_connector *intel_connector) { struct drm_connector *connector = &intel_connector->base; @@ -4361,7 +4361,7 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) else status = connector_status_disconnected; - if (status != connector_status_connected) { + if (status == connector_status_disconnected) { intel_dp->compliance_test_active = 0; intel_dp->compliance_test_type = 0; intel_dp->compliance_test_data = 0; @@ -4423,8 +4423,8 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) intel_dp->aux.i2c_defer_count = 0; intel_dp_set_edid(intel_dp); - - status = connector_status_connected; + if (is_edp(intel_dp) || intel_connector->detect_edid) + status = connector_status_connected; intel_dp->detect_done = true; /* Try to read the source of the interrupt */ @@ -4443,12 +4443,11 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) } out: - if ((status != connector_status_connected) && - (intel_dp->is_mst == false)) + if (status != connector_status_connected && !intel_dp->is_mst) intel_dp_unset_edid(intel_dp); intel_display_power_put(to_i915(dev), power_domain); - return; + return status; } static enum drm_connector_status @@ -4457,7 +4456,7 @@ intel_dp_detect(struct drm_connector *connector, bool force) struct intel_dp *intel_dp = intel_attached_dp(connector); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct intel_encoder *intel_encoder = &intel_dig_port->base; - struct intel_connector *intel_connector = to_intel_connector(connector); + enum drm_connector_status status = connector->status; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); @@ -4472,14 +4471,11 @@ intel_dp_detect(struct drm_connector *connector, bool force) /* If full detect is not performed yet, do a full detect */ if (!intel_dp->detect_done) - intel_dp_long_pulse(intel_dp->attached_connector); + status = intel_dp_long_pulse(intel_dp->attached_connector); intel_dp->detect_done = false; - if (is_edp(intel_dp) || intel_connector->detect_edid) - return connector_status_connected; - else - return connector_status_disconnected; + return status; } static void @@ -4831,36 +4827,34 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) port_name(intel_dig_port->port), long_hpd ? "long" : "short"); + if (long_hpd) { + intel_dp->detect_done = false; + return IRQ_NONE; + } + power_domain = intel_display_port_aux_power_domain(intel_encoder); intel_display_power_get(dev_priv, power_domain); - if (long_hpd) { - intel_dp_long_pulse(intel_dp->attached_connector); - if (intel_dp->is_mst) - ret = IRQ_HANDLED; - goto put_power; - - } else { - if (intel_dp->is_mst) { - if (intel_dp_check_mst_status(intel_dp) == -EINVAL) { - /* - * If we were in MST mode, and device is not - * there, get out of MST mode - */ - DRM_DEBUG_KMS("MST device may have disappeared %d vs %d\n", - intel_dp->is_mst, intel_dp->mst_mgr.mst_state); - intel_dp->is_mst = false; - drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, - intel_dp->is_mst); - goto put_power; - } + if (intel_dp->is_mst) { + if (intel_dp_check_mst_status(intel_dp) == -EINVAL) { + /* + * If we were in MST mode, and device is not + * there, get out of MST mode + */ + DRM_DEBUG_KMS("MST device may have disappeared %d vs %d\n", + intel_dp->is_mst, intel_dp->mst_mgr.mst_state); + intel_dp->is_mst = false; + drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, + intel_dp->is_mst); + intel_dp->detect_done = false; + goto put_power; } + } - if (!intel_dp->is_mst) { - if (!intel_dp_short_pulse(intel_dp)) { - intel_dp_long_pulse(intel_dp->attached_connector); - goto put_power; - } + if (!intel_dp->is_mst) { + if (!intel_dp_short_pulse(intel_dp)) { + intel_dp->detect_done = false; + goto put_power; } } diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index c26d18a574b6..1c59ca50c430 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -1694,21 +1694,32 @@ bool bxt_ddi_dp_set_dpll_hw_state(int clock, return bxt_ddi_set_dpll_hw_state(clock, &clk_div, dpll_hw_state); } +static bool +bxt_ddi_hdmi_set_dpll_hw_state(struct intel_crtc *intel_crtc, + struct intel_crtc_state *crtc_state, int clock, + struct intel_dpll_hw_state *dpll_hw_state) +{ + struct bxt_clk_div clk_div = { }; + + bxt_ddi_hdmi_pll_dividers(intel_crtc, crtc_state, clock, &clk_div); + + return bxt_ddi_set_dpll_hw_state(clock, &clk_div, dpll_hw_state); +} + static struct intel_shared_dpll * bxt_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, struct intel_encoder *encoder) { - struct bxt_clk_div clk_div = {0}; - struct intel_dpll_hw_state dpll_hw_state = {0}; + struct intel_dpll_hw_state dpll_hw_state = { }; struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_digital_port *intel_dig_port; struct intel_shared_dpll *pll; int i, clock = crtc_state->port_clock; - if (encoder->type == INTEL_OUTPUT_HDMI - && !bxt_ddi_hdmi_pll_dividers(crtc, crtc_state, - clock, &clk_div)) + if (encoder->type == INTEL_OUTPUT_HDMI && + !bxt_ddi_hdmi_set_dpll_hw_state(crtc, crtc_state, clock, + &dpll_hw_state)) return NULL; if ((encoder->type == INTEL_OUTPUT_DP || diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 8fd16adf069b..a19ec06f9e42 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -263,6 +263,7 @@ struct intel_panel { bool enabled; bool combination_mode; /* gen 2/4 only */ bool active_low_pwm; + bool alternate_pwm_increment; /* lpt+ */ /* PWM chip */ bool util_pin_active_low; /* bxt+ */ @@ -1741,9 +1742,9 @@ void ilk_wm_get_hw_state(struct drm_device *dev); void skl_wm_get_hw_state(struct drm_device *dev); void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, struct skl_ddb_allocation *ddb /* out */); -bool skl_can_enable_sagv(struct drm_atomic_state *state); -int skl_enable_sagv(struct drm_i915_private *dev_priv); -int skl_disable_sagv(struct drm_i915_private *dev_priv); +bool intel_can_enable_sagv(struct drm_atomic_state *state); +int intel_enable_sagv(struct drm_i915_private *dev_priv); +int intel_disable_sagv(struct drm_i915_private *dev_priv); bool skl_ddb_allocation_equals(const struct skl_ddb_allocation *old, const struct skl_ddb_allocation *new, enum pipe pipe); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index e405f1080296..025e232a4205 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -210,9 +210,6 @@ void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno) void intel_engine_init_hangcheck(struct intel_engine_cs *engine) { memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); - clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); - if (intel_engine_has_waiter(engine)) - i915_queue_hangcheck(engine->i915); } static void intel_engine_init_requests(struct intel_engine_cs *engine) @@ -307,18 +304,6 @@ int intel_engine_init_common(struct intel_engine_cs *engine) return 0; } -void intel_engine_reset_irq(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - spin_lock_irq(&dev_priv->irq_lock); - if (intel_engine_has_waiter(engine)) - engine->irq_enable(engine); - else - engine->irq_disable(engine); - spin_unlock_irq(&dev_priv->irq_lock); -} - /** * intel_engines_cleanup_common - cleans up the engine state created by * the common initiailizers. diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index b1ba86958811..5cdf7aa75be5 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -160,6 +160,7 @@ extern int intel_guc_resume(struct drm_device *dev); int i915_guc_submission_init(struct drm_i915_private *dev_priv); int i915_guc_submission_enable(struct drm_i915_private *dev_priv); int i915_guc_wq_reserve(struct drm_i915_gem_request *rq); +void i915_guc_wq_unreserve(struct drm_i915_gem_request *request); void i915_guc_submission_disable(struct drm_i915_private *dev_priv); void i915_guc_submission_fini(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 251143361f31..0adb879833ff 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -226,10 +226,16 @@ enum { /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ +#define WA_TAIL_DWORDS 2 + static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine); static int intel_lr_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine); +static void execlists_init_reg_state(u32 *reg_state, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct intel_ring *ring); /** * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists @@ -621,6 +627,10 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request request->ring = ce->ring; + ret = intel_lr_context_pin(request->ctx, engine); + if (ret) + return ret; + if (i915.enable_guc_submission) { /* * Check that the GuC has space for the request before @@ -629,21 +639,17 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request */ ret = i915_guc_wq_reserve(request); if (ret) - return ret; + goto err_unpin; } - ret = intel_lr_context_pin(request->ctx, engine); - if (ret) - return ret; - ret = intel_ring_begin(request, 0); if (ret) - goto err_unpin; + goto err_unreserve; if (!ce->initialised) { ret = engine->init_context(request); if (ret) - goto err_unpin; + goto err_unreserve; ce->initialised = true; } @@ -658,6 +664,9 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request request->reserved_space -= EXECLISTS_REQUEST_SIZE; return 0; +err_unreserve: + if (i915.enable_guc_submission) + i915_guc_wq_unreserve(request); err_unpin: intel_lr_context_unpin(request->ctx, engine); return ret; @@ -708,7 +717,6 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, { struct intel_context *ce = &ctx->engine[engine->id]; void *vaddr; - u32 *lrc_reg_state; int ret; lockdep_assert_held(&ctx->i915->drm.struct_mutex); @@ -727,17 +735,16 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, goto unpin_vma; } - lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; - ret = intel_ring_pin(ce->ring); if (ret) goto unpin_map; intel_lr_context_descriptor_update(ctx, engine); - lrc_reg_state[CTX_RING_BUFFER_START+1] = + ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; + ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = i915_ggtt_offset(ce->ring->vma); - ce->lrc_reg_state = lrc_reg_state; + ce->state->obj->dirty = true; /* Invalidate GuC TLB. */ @@ -1231,7 +1238,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) lrc_init_hws(engine); - intel_engine_reset_irq(engine); + intel_engine_reset_breadcrumbs(engine); I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff); @@ -1289,8 +1296,21 @@ static void reset_common_ring(struct intel_engine_cs *engine, struct execlist_port *port = engine->execlist_port; struct intel_context *ce = &request->ctx->engine[engine->id]; + /* We want a simple context + ring to execute the breadcrumb update. + * We cannot rely on the context being intact across the GPU hang, + * so clear it and rebuild just what we need for the breadcrumb. + * All pending requests for this context will be zapped, and any + * future request will be after userspace has had the opportunity + * to recreate its own state. + */ + execlists_init_reg_state(ce->lrc_reg_state, + request->ctx, engine, ce->ring); + /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */ + ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = + i915_ggtt_offset(ce->ring->vma); ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix; + request->ring->head = request->postfix; request->ring->last_retired_head = -1; intel_ring_update_space(request->ring); @@ -1310,6 +1330,9 @@ static void reset_common_ring(struct intel_engine_cs *engine, GEM_BUG_ON(request->ctx != port[0].request->ctx); port[0].count = 0; port[1].count = 0; + + /* Reset WaIdleLiteRestore:bdw,skl as well */ + request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32); } static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) @@ -1547,7 +1570,6 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs *engine) * used as a workaround for not being allowed to do lite * restore with HEAD==TAIL (WaIdleLiteRestore). */ -#define WA_TAIL_DWORDS 2 static int gen8_emit_request(struct drm_i915_gem_request *request) { @@ -1894,38 +1916,13 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) return indirect_ctx_offset; } -static int -populate_lr_context(struct i915_gem_context *ctx, - struct drm_i915_gem_object *ctx_obj, - struct intel_engine_cs *engine, - struct intel_ring *ring) +static void execlists_init_reg_state(u32 *reg_state, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct intel_ring *ring) { - struct drm_i915_private *dev_priv = ctx->i915; - struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; - void *vaddr; - u32 *reg_state; - int ret; - - if (!ppgtt) - ppgtt = dev_priv->mm.aliasing_ppgtt; - - ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true); - if (ret) { - DRM_DEBUG_DRIVER("Could not set to CPU domain\n"); - return ret; - } - - vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret); - return ret; - } - ctx_obj->dirty = true; - - /* The second page of the context object contains some fields which must - * be set up prior to the first execution. */ - reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; + struct drm_i915_private *dev_priv = engine->i915; + struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt; /* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM * commands followed by (reg, value) pairs. The values we are setting here are @@ -1939,14 +1936,11 @@ populate_lr_context(struct i915_gem_context *ctx, _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | (HAS_RESOURCE_STREAMER(dev_priv) ? - CTX_CTRL_RS_CTX_ENABLE : 0))); + CTX_CTRL_RS_CTX_ENABLE : 0))); ASSIGN_CTX_REG(reg_state, CTX_RING_HEAD, RING_HEAD(engine->mmio_base), 0); ASSIGN_CTX_REG(reg_state, CTX_RING_TAIL, RING_TAIL(engine->mmio_base), 0); - /* Ring buffer start address is not known until the buffer is pinned. - * It is written to the context image in execlists_update_context() - */ ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_START, RING_START(engine->mmio_base), 0); ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL, @@ -2029,6 +2023,36 @@ populate_lr_context(struct i915_gem_context *ctx, ASSIGN_CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, make_rpcs(dev_priv)); } +} + +static int +populate_lr_context(struct i915_gem_context *ctx, + struct drm_i915_gem_object *ctx_obj, + struct intel_engine_cs *engine, + struct intel_ring *ring) +{ + void *vaddr; + int ret; + + ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true); + if (ret) { + DRM_DEBUG_DRIVER("Could not set to CPU domain\n"); + return ret; + } + + vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret); + return ret; + } + ctx_obj->dirty = true; + + /* The second page of the context object contains some fields which must + * be set up prior to the first execution. */ + + execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE, + ctx, engine, ring); i915_gem_object_unpin_map(ctx_obj); diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index c10e9b0405e8..be4b4d546fd9 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -841,7 +841,7 @@ static void lpt_enable_backlight(struct intel_connector *connector) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; - u32 pch_ctl1, pch_ctl2; + u32 pch_ctl1, pch_ctl2, schicken; pch_ctl1 = I915_READ(BLC_PWM_PCH_CTL1); if (pch_ctl1 & BLM_PCH_PWM_ENABLE) { @@ -850,6 +850,22 @@ static void lpt_enable_backlight(struct intel_connector *connector) I915_WRITE(BLC_PWM_PCH_CTL1, pch_ctl1); } + if (HAS_PCH_LPT(dev_priv)) { + schicken = I915_READ(SOUTH_CHICKEN2); + if (panel->backlight.alternate_pwm_increment) + schicken |= LPT_PWM_GRANULARITY; + else + schicken &= ~LPT_PWM_GRANULARITY; + I915_WRITE(SOUTH_CHICKEN2, schicken); + } else { + schicken = I915_READ(SOUTH_CHICKEN1); + if (panel->backlight.alternate_pwm_increment) + schicken |= SPT_PWM_GRANULARITY; + else + schicken &= ~SPT_PWM_GRANULARITY; + I915_WRITE(SOUTH_CHICKEN1, schicken); + } + pch_ctl2 = panel->backlight.max << 16; I915_WRITE(BLC_PWM_PCH_CTL2, pch_ctl2); @@ -1242,10 +1258,10 @@ static u32 bxt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) */ static u32 spt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) { - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_panel *panel = &connector->panel; u32 mul; - if (I915_READ(SOUTH_CHICKEN1) & SPT_PWM_GRANULARITY) + if (panel->backlight.alternate_pwm_increment) mul = 128; else mul = 16; @@ -1261,9 +1277,10 @@ static u32 spt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) static u32 lpt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_panel *panel = &connector->panel; u32 mul, clock; - if (I915_READ(SOUTH_CHICKEN2) & LPT_PWM_GRANULARITY) + if (panel->backlight.alternate_pwm_increment) mul = 16; else mul = 128; @@ -1414,6 +1431,13 @@ static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unus struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; u32 pch_ctl1, pch_ctl2, val; + bool alt; + + if (HAS_PCH_LPT(dev_priv)) + alt = I915_READ(SOUTH_CHICKEN2) & LPT_PWM_GRANULARITY; + else + alt = I915_READ(SOUTH_CHICKEN1) & SPT_PWM_GRANULARITY; + panel->backlight.alternate_pwm_increment = alt; pch_ctl1 = I915_READ(BLC_PWM_PCH_CTL1); panel->backlight.active_low_pwm = pch_ctl1 & BLM_PCH_POLARITY; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 2df06b703e3d..a2f751cd187a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2127,32 +2127,34 @@ static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8]) GEN9_MEM_LATENCY_LEVEL_MASK; /* + * If a level n (n > 1) has a 0us latency, all levels m (m >= n) + * need to be disabled. We make sure to sanitize the values out + * of the punit to satisfy this requirement. + */ + for (level = 1; level <= max_level; level++) { + if (wm[level] == 0) { + for (i = level + 1; i <= max_level; i++) + wm[i] = 0; + break; + } + } + + /* * WaWmMemoryReadLatency:skl * * punit doesn't take into account the read latency so we need - * to add 2us to the various latency levels we retrieve from - * the punit. - * - W0 is a bit special in that it's the only level that - * can't be disabled if we want to have display working, so - * we always add 2us there. - * - For levels >=1, punit returns 0us latency when they are - * disabled, so we respect that and don't add 2us then - * - * Additionally, if a level n (n > 1) has a 0us latency, all - * levels m (m >= n) need to be disabled. We make sure to - * sanitize the values out of the punit to satisfy this - * requirement. + * to add 2us to the various latency levels we retrieve from the + * punit when level 0 response data us 0us. */ - wm[0] += 2; - for (level = 1; level <= max_level; level++) - if (wm[level] != 0) + if (wm[0] == 0) { + wm[0] += 2; + for (level = 1; level <= max_level; level++) { + if (wm[level] == 0) + break; wm[level] += 2; - else { - for (i = level + 1; i <= max_level; i++) - wm[i] = 0; - - break; } + } + } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { uint64_t sskpd = I915_READ64(MCH_SSKPD); @@ -2877,6 +2879,19 @@ skl_wm_plane_id(const struct intel_plane *plane) } } +static bool +intel_has_sagv(struct drm_i915_private *dev_priv) +{ + if (IS_KABYLAKE(dev_priv)) + return true; + + if (IS_SKYLAKE(dev_priv) && + dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED) + return true; + + return false; +} + /* * SAGV dynamically adjusts the system agent voltage and clock frequencies * depending on power and performance requirements. The display engine access @@ -2889,12 +2904,14 @@ skl_wm_plane_id(const struct intel_plane *plane) * - We're not using an interlaced display configuration */ int -skl_enable_sagv(struct drm_i915_private *dev_priv) +intel_enable_sagv(struct drm_i915_private *dev_priv) { int ret; - if (dev_priv->skl_sagv_status == I915_SKL_SAGV_NOT_CONTROLLED || - dev_priv->skl_sagv_status == I915_SKL_SAGV_ENABLED) + if (!intel_has_sagv(dev_priv)) + return 0; + + if (dev_priv->sagv_status == I915_SAGV_ENABLED) return 0; DRM_DEBUG_KMS("Enabling the SAGV\n"); @@ -2910,21 +2927,21 @@ skl_enable_sagv(struct drm_i915_private *dev_priv) * Some skl systems, pre-release machines in particular, * don't actually have an SAGV. */ - if (ret == -ENXIO) { + if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); - dev_priv->skl_sagv_status = I915_SKL_SAGV_NOT_CONTROLLED; + dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; return 0; } else if (ret < 0) { DRM_ERROR("Failed to enable the SAGV\n"); return ret; } - dev_priv->skl_sagv_status = I915_SKL_SAGV_ENABLED; + dev_priv->sagv_status = I915_SAGV_ENABLED; return 0; } static int -skl_do_sagv_disable(struct drm_i915_private *dev_priv) +intel_do_sagv_disable(struct drm_i915_private *dev_priv) { int ret; uint32_t temp = GEN9_SAGV_DISABLE; @@ -2938,19 +2955,21 @@ skl_do_sagv_disable(struct drm_i915_private *dev_priv) } int -skl_disable_sagv(struct drm_i915_private *dev_priv) +intel_disable_sagv(struct drm_i915_private *dev_priv) { int ret, result; - if (dev_priv->skl_sagv_status == I915_SKL_SAGV_NOT_CONTROLLED || - dev_priv->skl_sagv_status == I915_SKL_SAGV_DISABLED) + if (!intel_has_sagv(dev_priv)) + return 0; + + if (dev_priv->sagv_status == I915_SAGV_DISABLED) return 0; DRM_DEBUG_KMS("Disabling the SAGV\n"); mutex_lock(&dev_priv->rps.hw_lock); /* bspec says to keep retrying for at least 1 ms */ - ret = wait_for(result = skl_do_sagv_disable(dev_priv), 1); + ret = wait_for(result = intel_do_sagv_disable(dev_priv), 1); mutex_unlock(&dev_priv->rps.hw_lock); if (ret == -ETIMEDOUT) { @@ -2962,20 +2981,20 @@ skl_disable_sagv(struct drm_i915_private *dev_priv) * Some skl systems, pre-release machines in particular, * don't actually have an SAGV. */ - if (result == -ENXIO) { + if (IS_SKYLAKE(dev_priv) && result == -ENXIO) { DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); - dev_priv->skl_sagv_status = I915_SKL_SAGV_NOT_CONTROLLED; + dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; return 0; } else if (result < 0) { DRM_ERROR("Failed to disable the SAGV\n"); return result; } - dev_priv->skl_sagv_status = I915_SKL_SAGV_DISABLED; + dev_priv->sagv_status = I915_SAGV_DISABLED; return 0; } -bool skl_can_enable_sagv(struct drm_atomic_state *state) +bool intel_can_enable_sagv(struct drm_atomic_state *state) { struct drm_device *dev = state->dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -2984,6 +3003,9 @@ bool skl_can_enable_sagv(struct drm_atomic_state *state) enum pipe pipe; int level, plane; + if (!intel_has_sagv(dev_priv)) + return false; + /* * SKL workaround: bspec recommends we disable the SAGV when we have * more then one pipe enabled @@ -3472,29 +3494,14 @@ static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latenc } static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, - uint32_t horiz_pixels, uint8_t cpp, - uint64_t tiling, uint32_t latency) + uint32_t latency, uint32_t plane_blocks_per_line) { uint32_t ret; - uint32_t plane_bytes_per_line, plane_blocks_per_line; uint32_t wm_intermediate_val; if (latency == 0) return UINT_MAX; - plane_bytes_per_line = horiz_pixels * cpp; - - if (tiling == I915_FORMAT_MOD_Y_TILED || - tiling == I915_FORMAT_MOD_Yf_TILED) { - plane_bytes_per_line *= 4; - plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); - plane_blocks_per_line /= 4; - } else if (tiling == DRM_FORMAT_MOD_NONE) { - plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1; - } else { - plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); - } - wm_intermediate_val = latency * pixel_rate; ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) * plane_blocks_per_line; @@ -3545,6 +3552,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, uint8_t cpp; uint32_t width = 0, height = 0; uint32_t plane_pixel_rate; + uint32_t y_tile_minimum, y_min_scanlines; if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) { *enabled = false; @@ -3560,38 +3568,51 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, cpp = drm_format_plane_cpp(fb->pixel_format, 0); plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate); + if (intel_rotation_90_or_270(pstate->rotation)) { + int cpp = (fb->pixel_format == DRM_FORMAT_NV12) ? + drm_format_plane_cpp(fb->pixel_format, 1) : + drm_format_plane_cpp(fb->pixel_format, 0); + + switch (cpp) { + case 1: + y_min_scanlines = 16; + break; + case 2: + y_min_scanlines = 8; + break; + default: + WARN(1, "Unsupported pixel depth for rotation"); + case 4: + y_min_scanlines = 4; + break; + } + } else { + y_min_scanlines = 4; + } + + plane_bytes_per_line = width * cpp; + if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || + fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { + plane_blocks_per_line = + DIV_ROUND_UP(plane_bytes_per_line * y_min_scanlines, 512); + plane_blocks_per_line /= y_min_scanlines; + } else if (fb->modifier[0] == DRM_FORMAT_MOD_NONE) { + plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512) + + 1; + } else { + plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); + } + method1 = skl_wm_method1(plane_pixel_rate, cpp, latency); method2 = skl_wm_method2(plane_pixel_rate, cstate->base.adjusted_mode.crtc_htotal, - width, - cpp, - fb->modifier[0], - latency); + latency, + plane_blocks_per_line); - plane_bytes_per_line = width * cpp; - plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); + y_tile_minimum = plane_blocks_per_line * y_min_scanlines; if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { - uint32_t min_scanlines = 4; - uint32_t y_tile_minimum; - if (intel_rotation_90_or_270(pstate->rotation)) { - int cpp = (fb->pixel_format == DRM_FORMAT_NV12) ? - drm_format_plane_cpp(fb->pixel_format, 1) : - drm_format_plane_cpp(fb->pixel_format, 0); - - switch (cpp) { - case 1: - min_scanlines = 16; - break; - case 2: - min_scanlines = 8; - break; - case 8: - WARN(1, "Unsupported pixel depth for rotation"); - } - } - y_tile_minimum = plane_blocks_per_line * min_scanlines; selected_result = max(method2, y_tile_minimum); } else { if ((ddb_allocation / plane_blocks_per_line) >= 1) @@ -3605,10 +3626,12 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if (level >= 1 && level <= 7) { if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || - fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) - res_lines += 4; - else + fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { + res_blocks += y_tile_minimum; + res_lines += y_min_scanlines; + } else { res_blocks++; + } } if (res_blocks >= ddb_allocation || res_lines > 31) { @@ -3939,6 +3962,41 @@ pipes_modified(struct drm_atomic_state *state) return ret; } +int +skl_ddb_add_affected_planes(struct intel_crtc_state *cstate) +{ + struct drm_atomic_state *state = cstate->base.state; + struct drm_device *dev = state->dev; + struct drm_crtc *crtc = cstate->base.crtc; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb; + struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb; + struct drm_plane_state *plane_state; + struct drm_plane *plane; + enum pipe pipe = intel_crtc->pipe; + int id; + + WARN_ON(!drm_atomic_get_existing_crtc_state(state, crtc)); + + drm_for_each_plane_mask(plane, dev, crtc->state->plane_mask) { + id = skl_wm_plane_id(to_intel_plane(plane)); + + if (skl_ddb_entry_equal(&cur_ddb->plane[pipe][id], + &new_ddb->plane[pipe][id]) && + skl_ddb_entry_equal(&cur_ddb->y_plane[pipe][id], + &new_ddb->y_plane[pipe][id])) + continue; + + plane_state = drm_atomic_get_plane_state(state, plane); + if (IS_ERR(plane_state)) + return PTR_ERR(plane_state); + } + + return 0; +} + static int skl_compute_ddb(struct drm_atomic_state *state) { @@ -4003,7 +4061,7 @@ skl_compute_ddb(struct drm_atomic_state *state) if (ret) return ret; - ret = drm_atomic_add_affected_planes(state, &intel_crtc->base); + ret = skl_ddb_add_affected_planes(cstate); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 7a74750076c5..ed9955dce156 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -564,7 +564,7 @@ static int init_ring_common(struct intel_engine_cs *engine) else intel_ring_setup_status_page(engine); - intel_engine_reset_irq(engine); + intel_engine_reset_breadcrumbs(engine); /* Enforce ordering by reading HEAD register back */ I915_READ_HEAD(engine); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 7f64d611159b..ec0b4a0c605d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -328,6 +328,7 @@ struct intel_engine_cs { * inspecting request list. */ u32 last_submitted_seqno; + u32 last_pending_seqno; /* An RCU guarded pointer to the last request. No reference is * held to the request, users must carefully acquire a reference to @@ -492,7 +493,6 @@ int __intel_ring_space(int head, int tail, int size); void intel_ring_update_space(struct intel_ring *ring); void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno); -void intel_engine_reset_irq(struct intel_engine_cs *engine); void intel_engine_setup_common(struct intel_engine_cs *engine); int intel_engine_init_common(struct intel_engine_cs *engine); @@ -584,6 +584,7 @@ static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine) return wakeup; } +void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); unsigned int intel_kick_waiters(struct drm_i915_private *i915); unsigned int intel_kick_signalers(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index a9b6c936aadd..ee2306a79747 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -796,10 +796,9 @@ __unclaimed_reg_debug(struct drm_i915_private *dev_priv, const bool read, const bool before) { - if (WARN(check_for_unclaimed_mmio(dev_priv), - "Unclaimed register detected %s %s register 0x%x\n", - before ? "before" : "after", - read ? "reading" : "writing to", + if (WARN(check_for_unclaimed_mmio(dev_priv) && !before, + "Unclaimed %s register 0x%x\n", + read ? "read from" : "write to", i915_mmio_reg_offset(reg))) i915.mmio_debug--; /* Only report the first N failures */ } diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index a4e9f35da3a2..74f99bac08b1 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1638,8 +1638,8 @@ static int avivo_crtc_do_set_base(struct drm_crtc *crtc, WREG32(AVIVO_D1MODE_VIEWPORT_SIZE + radeon_crtc->crtc_offset, (viewport_w << 16) | viewport_h); - /* set pageflip to happen anywhere in vblank interval */ - WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 0); + /* set pageflip to happen only at start of vblank interval (front porch) */ + WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 3); if (!atomic && fb && fb != crtc->primary->fb) { radeon_fb = to_radeon_framebuffer(fb); diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 890171f08987..b8ab30a7dd6d 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -321,16 +321,30 @@ void radeon_crtc_handle_vblank(struct radeon_device *rdev, int crtc_id) update_pending = radeon_page_flip_pending(rdev, crtc_id); /* Has the pageflip already completed in crtc, or is it certain - * to complete in this vblank? + * to complete in this vblank? GET_DISTANCE_TO_VBLANKSTART provides + * distance to start of "fudged earlier" vblank in vpos, distance to + * start of real vblank in hpos. vpos >= 0 && hpos < 0 means we are in + * the last few scanlines before start of real vblank, where the vblank + * irq can fire, so we have sampled update_pending a bit too early and + * know the flip will complete at leading edge of the upcoming real + * vblank. On pre-AVIVO hardware, flips also complete inside the real + * vblank, not only at leading edge, so if update_pending for hpos >= 0 + * == inside real vblank, the flip will complete almost immediately. + * Note that this method of completion handling is still not 100% race + * free, as we could execute before the radeon_flip_work_func managed + * to run and set the RADEON_FLIP_SUBMITTED status, thereby we no-op, + * but the flip still gets programmed into hw and completed during + * vblank, leading to a delayed emission of the flip completion event. + * This applies at least to pre-AVIVO hardware, where flips are always + * completing inside vblank, not only at leading edge of vblank. */ if (update_pending && - (DRM_SCANOUTPOS_VALID & radeon_get_crtc_scanoutpos(rdev->ddev, - crtc_id, - USE_REAL_VBLANKSTART, - &vpos, &hpos, NULL, NULL, - &rdev->mode_info.crtcs[crtc_id]->base.hwmode)) && - ((vpos >= (99 * rdev->mode_info.crtcs[crtc_id]->base.hwmode.crtc_vdisplay)/100) || - (vpos < 0 && !ASIC_IS_AVIVO(rdev)))) { + (DRM_SCANOUTPOS_VALID & + radeon_get_crtc_scanoutpos(rdev->ddev, crtc_id, + GET_DISTANCE_TO_VBLANKSTART, + &vpos, &hpos, NULL, NULL, + &rdev->mode_info.crtcs[crtc_id]->base.hwmode)) && + ((vpos >= 0 && hpos < 0) || (hpos >= 0 && !ASIC_IS_AVIVO(rdev)))) { /* crtc didn't flip in this target vblank interval, * but flip is pending in crtc. Based on the current * scanout position we know that the current frame is @@ -438,16 +452,19 @@ static void radeon_flip_work_func(struct work_struct *__work) } /* Wait until we're out of the vertical blank period before the one - * targeted by the flip + * targeted by the flip. Always wait on pre DCE4 to avoid races with + * flip completion handling from vblank irq, as these old asics don't + * have reliable pageflip completion interrupts. */ while (radeon_crtc->enabled && - (radeon_get_crtc_scanoutpos(dev, work->crtc_id, 0, - &vpos, &hpos, NULL, NULL, - &crtc->hwmode) + (radeon_get_crtc_scanoutpos(dev, work->crtc_id, 0, + &vpos, &hpos, NULL, NULL, + &crtc->hwmode) & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK)) == - (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) && - (int)(work->target_vblank - - dev->driver->get_vblank_counter(dev, work->crtc_id)) > 0) + (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) && + (!ASIC_IS_AVIVO(rdev) || + ((int) (work->target_vblank - + dev->driver->get_vblank_counter(dev, work->crtc_id)) > 0))) usleep_range(1000, 2000); /* We borrow the event spin lock for protecting flip_status */ diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 76c55c5d11ec..c55d653aaf5f 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -406,8 +406,9 @@ void rv515_mc_resume(struct radeon_device *rdev, struct rv515_mc_save *save) for (i = 0; i < rdev->num_crtc; i++) { if (save->crtc_enabled[i]) { tmp = RREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + crtc_offsets[i]); - if ((tmp & 0x7) != 0) { + if ((tmp & 0x7) != 3) { tmp &= ~0x7; + tmp |= 0x3; WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + crtc_offsets[i], tmp); } tmp = RREG32(AVIVO_D1GRPH_UPDATE + crtc_offsets[i]); diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 2682f07d8f1e..7f08d681a74b 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -229,7 +229,7 @@ int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, * and need to make things up in a approximative but consistent way. */ ret |= DRM_SCANOUTPOS_IN_VBLANK; - vblank_lines = mode->crtc_vtotal - mode->crtc_vdisplay; + vblank_lines = mode->vtotal - mode->vdisplay; if (flags & DRM_CALLED_FROM_VBLIRQ) { /* @@ -378,7 +378,7 @@ static void vc4_crtc_mode_set_nofb(struct drm_crtc *crtc) struct drm_crtc_state *state = crtc->state; struct drm_display_mode *mode = &state->adjusted_mode; bool interlace = mode->flags & DRM_MODE_FLAG_INTERLACE; - u32 vactive = (mode->vdisplay >> (interlace ? 1 : 0)); + u32 pixel_rep = (mode->flags & DRM_MODE_FLAG_DBLCLK) ? 2 : 1; u32 format = PV_CONTROL_FORMAT_24; bool debug_dump_regs = false; int clock_select = vc4_get_clock_select(crtc); @@ -394,47 +394,65 @@ static void vc4_crtc_mode_set_nofb(struct drm_crtc *crtc) CRTC_WRITE(PV_CONTROL, 0); CRTC_WRITE(PV_HORZA, - VC4_SET_FIELD(mode->htotal - mode->hsync_end, + VC4_SET_FIELD((mode->htotal - + mode->hsync_end) * pixel_rep, PV_HORZA_HBP) | - VC4_SET_FIELD(mode->hsync_end - mode->hsync_start, + VC4_SET_FIELD((mode->hsync_end - + mode->hsync_start) * pixel_rep, PV_HORZA_HSYNC)); CRTC_WRITE(PV_HORZB, - VC4_SET_FIELD(mode->hsync_start - mode->hdisplay, + VC4_SET_FIELD((mode->hsync_start - + mode->hdisplay) * pixel_rep, PV_HORZB_HFP) | - VC4_SET_FIELD(mode->hdisplay, PV_HORZB_HACTIVE)); + VC4_SET_FIELD(mode->hdisplay * pixel_rep, PV_HORZB_HACTIVE)); CRTC_WRITE(PV_VERTA, - VC4_SET_FIELD(mode->vtotal - mode->vsync_end, + VC4_SET_FIELD(mode->crtc_vtotal - mode->crtc_vsync_end, PV_VERTA_VBP) | - VC4_SET_FIELD(mode->vsync_end - mode->vsync_start, + VC4_SET_FIELD(mode->crtc_vsync_end - mode->crtc_vsync_start, PV_VERTA_VSYNC)); CRTC_WRITE(PV_VERTB, - VC4_SET_FIELD(mode->vsync_start - mode->vdisplay, + VC4_SET_FIELD(mode->crtc_vsync_start - mode->crtc_vdisplay, PV_VERTB_VFP) | - VC4_SET_FIELD(vactive, PV_VERTB_VACTIVE)); + VC4_SET_FIELD(mode->crtc_vdisplay, PV_VERTB_VACTIVE)); if (interlace) { CRTC_WRITE(PV_VERTA_EVEN, - VC4_SET_FIELD(mode->vtotal - mode->vsync_end - 1, + VC4_SET_FIELD(mode->crtc_vtotal - + mode->crtc_vsync_end - 1, PV_VERTA_VBP) | - VC4_SET_FIELD(mode->vsync_end - mode->vsync_start, + VC4_SET_FIELD(mode->crtc_vsync_end - + mode->crtc_vsync_start, PV_VERTA_VSYNC)); CRTC_WRITE(PV_VERTB_EVEN, - VC4_SET_FIELD(mode->vsync_start - mode->vdisplay, + VC4_SET_FIELD(mode->crtc_vsync_start - + mode->crtc_vdisplay, PV_VERTB_VFP) | - VC4_SET_FIELD(vactive, PV_VERTB_VACTIVE)); + VC4_SET_FIELD(mode->crtc_vdisplay, PV_VERTB_VACTIVE)); + + /* We set up first field even mode for HDMI. VEC's + * NTSC mode would want first field odd instead, once + * we support it (to do so, set ODD_FIRST and put the + * delay in VSYNCD_EVEN instead). + */ + CRTC_WRITE(PV_V_CONTROL, + PV_VCONTROL_CONTINUOUS | + PV_VCONTROL_INTERLACE | + VC4_SET_FIELD(mode->htotal * pixel_rep / 2, + PV_VCONTROL_ODD_DELAY)); + CRTC_WRITE(PV_VSYNCD_EVEN, 0); + } else { + CRTC_WRITE(PV_V_CONTROL, PV_VCONTROL_CONTINUOUS); } - CRTC_WRITE(PV_HACT_ACT, mode->hdisplay); + CRTC_WRITE(PV_HACT_ACT, mode->hdisplay * pixel_rep); - CRTC_WRITE(PV_V_CONTROL, - PV_VCONTROL_CONTINUOUS | - (interlace ? PV_VCONTROL_INTERLACE : 0)); CRTC_WRITE(PV_CONTROL, VC4_SET_FIELD(format, PV_CONTROL_FORMAT) | VC4_SET_FIELD(vc4_get_fifo_full_level(format), PV_CONTROL_FIFO_LEVEL) | + VC4_SET_FIELD(pixel_rep - 1, PV_CONTROL_PIXEL_REP) | PV_CONTROL_CLR_AT_START | PV_CONTROL_TRIGGER_UNDERFLOW | PV_CONTROL_WAIT_HSTART | @@ -544,16 +562,6 @@ static bool vc4_crtc_mode_fixup(struct drm_crtc *crtc, return false; } - /* - * Interlaced video modes got CRTC_INTERLACE_HALVE_V applied when - * coming from user space. We don't want this, as it screws up - * vblank timestamping, so fix it up. - */ - drm_mode_set_crtcinfo(adjusted_mode, 0); - - DRM_DEBUG_KMS("[CRTC:%d] adjusted_mode :\n", crtc->base.id); - drm_mode_debug_printmodeline(adjusted_mode); - return true; } diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 428e24919ef1..7c1e4d97486f 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -122,9 +122,16 @@ to_vc4_dev(struct drm_device *dev) struct vc4_bo { struct drm_gem_cma_object base; - /* seqno of the last job to render to this BO. */ + /* seqno of the last job to render using this BO. */ uint64_t seqno; + /* seqno of the last job to use the RCL to write to this BO. + * + * Note that this doesn't include binner overflow memory + * writes. + */ + uint64_t write_seqno; + /* List entry for the BO's position in either * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list */ @@ -216,6 +223,9 @@ struct vc4_exec_info { /* Sequence number for this bin/render job. */ uint64_t seqno; + /* Latest write_seqno of any BO that binning depends on. */ + uint64_t bin_dep_seqno; + /* Last current addresses the hardware was processing when the * hangcheck timer checked on us. */ @@ -230,6 +240,13 @@ struct vc4_exec_info { struct drm_gem_cma_object **bo; uint32_t bo_count; + /* List of BOs that are being written by the RCL. Other than + * the binner temporary storage, this is all the BOs written + * by the job. + */ + struct drm_gem_cma_object *rcl_write_bo[4]; + uint32_t rcl_write_bo_count; + /* Pointers for our position in vc4->job_list */ struct list_head head; @@ -307,18 +324,15 @@ struct vc4_exec_info { static inline struct vc4_exec_info * vc4_first_bin_job(struct vc4_dev *vc4) { - if (list_empty(&vc4->bin_job_list)) - return NULL; - return list_first_entry(&vc4->bin_job_list, struct vc4_exec_info, head); + return list_first_entry_or_null(&vc4->bin_job_list, + struct vc4_exec_info, head); } static inline struct vc4_exec_info * vc4_first_render_job(struct vc4_dev *vc4) { - if (list_empty(&vc4->render_job_list)) - return NULL; - return list_first_entry(&vc4->render_job_list, - struct vc4_exec_info, head); + return list_first_entry_or_null(&vc4->render_job_list, + struct vc4_exec_info, head); } static inline struct vc4_exec_info * diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 77daea6cb866..47a095f392f8 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -467,6 +467,11 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) list_for_each_entry(bo, &exec->unref_list, unref_head) { bo->seqno = seqno; } + + for (i = 0; i < exec->rcl_write_bo_count; i++) { + bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); + bo->write_seqno = seqno; + } } /* Queues a struct vc4_exec_info for execution. If no job is @@ -669,6 +674,14 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) goto fail; ret = vc4_validate_shader_recs(dev, exec); + if (ret) + goto fail; + + /* Block waiting on any previous rendering into the CS's VBO, + * IB, or textures, so that pixels are actually written by the + * time we try to read them. + */ + ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true); fail: drm_free_large(temp); diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 68ad10634b29..c4cb2e26de32 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -62,6 +62,8 @@ struct vc4_hdmi { struct vc4_hdmi_encoder { struct vc4_encoder base; bool hdmi_monitor; + bool limited_rgb_range; + bool rgb_range_selectable; }; static inline struct vc4_hdmi_encoder * @@ -174,6 +176,9 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force) return connector_status_disconnected; } + if (drm_probe_ddc(vc4->hdmi->ddc)) + return connector_status_connected; + if (HDMI_READ(VC4_HDMI_HOTPLUG) & VC4_HDMI_HOTPLUG_CONNECTED) return connector_status_connected; else @@ -202,41 +207,22 @@ static int vc4_hdmi_connector_get_modes(struct drm_connector *connector) return -ENODEV; vc4_encoder->hdmi_monitor = drm_detect_hdmi_monitor(edid); + + if (edid && edid->input & DRM_EDID_INPUT_DIGITAL) { + vc4_encoder->rgb_range_selectable = + drm_rgb_quant_range_selectable(edid); + } + drm_mode_connector_update_edid_property(connector, edid); ret = drm_add_edid_modes(connector, edid); return ret; } -/* - * drm_helper_probe_single_connector_modes() applies drm_mode_set_crtcinfo to - * all modes with flag CRTC_INTERLACE_HALVE_V. We don't want this, as it - * screws up vblank timestamping for interlaced modes, so fix it up. - */ -static int vc4_hdmi_connector_probe_modes(struct drm_connector *connector, - uint32_t maxX, uint32_t maxY) -{ - struct drm_display_mode *mode; - int count; - - count = drm_helper_probe_single_connector_modes(connector, maxX, maxY); - if (count == 0) - return 0; - - DRM_DEBUG_KMS("[CONNECTOR:%d:%s] probed adapted modes :\n", - connector->base.id, connector->name); - list_for_each_entry(mode, &connector->modes, head) { - drm_mode_set_crtcinfo(mode, 0); - drm_mode_debug_printmodeline(mode); - } - - return count; -} - static const struct drm_connector_funcs vc4_hdmi_connector_funcs = { .dpms = drm_atomic_helper_connector_dpms, .detect = vc4_hdmi_connector_detect, - .fill_modes = vc4_hdmi_connector_probe_modes, + .fill_modes = drm_helper_probe_single_connector_modes, .destroy = vc4_hdmi_connector_destroy, .reset = drm_atomic_helper_connector_reset, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, @@ -294,25 +280,143 @@ static const struct drm_encoder_funcs vc4_hdmi_encoder_funcs = { .destroy = vc4_hdmi_encoder_destroy, }; +static int vc4_hdmi_stop_packet(struct drm_encoder *encoder, + enum hdmi_infoframe_type type) +{ + struct drm_device *dev = encoder->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + u32 packet_id = type - 0x80; + + HDMI_WRITE(VC4_HDMI_RAM_PACKET_CONFIG, + HDMI_READ(VC4_HDMI_RAM_PACKET_CONFIG) & ~BIT(packet_id)); + + return wait_for(!(HDMI_READ(VC4_HDMI_RAM_PACKET_STATUS) & + BIT(packet_id)), 100); +} + +static void vc4_hdmi_write_infoframe(struct drm_encoder *encoder, + union hdmi_infoframe *frame) +{ + struct drm_device *dev = encoder->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + u32 packet_id = frame->any.type - 0x80; + u32 packet_reg = VC4_HDMI_GCP_0 + VC4_HDMI_PACKET_STRIDE * packet_id; + uint8_t buffer[VC4_HDMI_PACKET_STRIDE]; + ssize_t len, i; + int ret; + + WARN_ONCE(!(HDMI_READ(VC4_HDMI_RAM_PACKET_CONFIG) & + VC4_HDMI_RAM_PACKET_ENABLE), + "Packet RAM has to be on to store the packet."); + + len = hdmi_infoframe_pack(frame, buffer, sizeof(buffer)); + if (len < 0) + return; + + ret = vc4_hdmi_stop_packet(encoder, frame->any.type); + if (ret) { + DRM_ERROR("Failed to wait for infoframe to go idle: %d\n", ret); + return; + } + + for (i = 0; i < len; i += 7) { + HDMI_WRITE(packet_reg, + buffer[i + 0] << 0 | + buffer[i + 1] << 8 | + buffer[i + 2] << 16); + packet_reg += 4; + + HDMI_WRITE(packet_reg, + buffer[i + 3] << 0 | + buffer[i + 4] << 8 | + buffer[i + 5] << 16 | + buffer[i + 6] << 24); + packet_reg += 4; + } + + HDMI_WRITE(VC4_HDMI_RAM_PACKET_CONFIG, + HDMI_READ(VC4_HDMI_RAM_PACKET_CONFIG) | BIT(packet_id)); + ret = wait_for((HDMI_READ(VC4_HDMI_RAM_PACKET_STATUS) & + BIT(packet_id)), 100); + if (ret) + DRM_ERROR("Failed to wait for infoframe to start: %d\n", ret); +} + +static void vc4_hdmi_set_avi_infoframe(struct drm_encoder *encoder) +{ + struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder); + struct drm_crtc *crtc = encoder->crtc; + const struct drm_display_mode *mode = &crtc->state->adjusted_mode; + union hdmi_infoframe frame; + int ret; + + ret = drm_hdmi_avi_infoframe_from_display_mode(&frame.avi, mode); + if (ret < 0) { + DRM_ERROR("couldn't fill AVI infoframe\n"); + return; + } + + if (vc4_encoder->rgb_range_selectable) { + if (vc4_encoder->limited_rgb_range) { + frame.avi.quantization_range = + HDMI_QUANTIZATION_RANGE_LIMITED; + } else { + frame.avi.quantization_range = + HDMI_QUANTIZATION_RANGE_FULL; + } + } + + vc4_hdmi_write_infoframe(encoder, &frame); +} + +static void vc4_hdmi_set_spd_infoframe(struct drm_encoder *encoder) +{ + union hdmi_infoframe frame; + int ret; + + ret = hdmi_spd_infoframe_init(&frame.spd, "Broadcom", "Videocore"); + if (ret < 0) { + DRM_ERROR("couldn't fill SPD infoframe\n"); + return; + } + + frame.spd.sdi = HDMI_SPD_SDI_PC; + + vc4_hdmi_write_infoframe(encoder, &frame); +} + +static void vc4_hdmi_set_infoframes(struct drm_encoder *encoder) +{ + vc4_hdmi_set_avi_infoframe(encoder); + vc4_hdmi_set_spd_infoframe(encoder); +} + static void vc4_hdmi_encoder_mode_set(struct drm_encoder *encoder, struct drm_display_mode *unadjusted_mode, struct drm_display_mode *mode) { + struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder); struct drm_device *dev = encoder->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); bool debug_dump_regs = false; bool hsync_pos = mode->flags & DRM_MODE_FLAG_PHSYNC; bool vsync_pos = mode->flags & DRM_MODE_FLAG_PVSYNC; - u32 vactive = (mode->vdisplay >> - ((mode->flags & DRM_MODE_FLAG_INTERLACE) ? 1 : 0)); - u32 verta = (VC4_SET_FIELD(mode->vsync_end - mode->vsync_start, + bool interlaced = mode->flags & DRM_MODE_FLAG_INTERLACE; + u32 pixel_rep = (mode->flags & DRM_MODE_FLAG_DBLCLK) ? 2 : 1; + u32 verta = (VC4_SET_FIELD(mode->crtc_vsync_end - mode->crtc_vsync_start, VC4_HDMI_VERTA_VSP) | - VC4_SET_FIELD(mode->vsync_start - mode->vdisplay, + VC4_SET_FIELD(mode->crtc_vsync_start - mode->crtc_vdisplay, VC4_HDMI_VERTA_VFP) | - VC4_SET_FIELD(vactive, VC4_HDMI_VERTA_VAL)); + VC4_SET_FIELD(mode->crtc_vdisplay, VC4_HDMI_VERTA_VAL)); u32 vertb = (VC4_SET_FIELD(0, VC4_HDMI_VERTB_VSPO) | - VC4_SET_FIELD(mode->vtotal - mode->vsync_end, + VC4_SET_FIELD(mode->crtc_vtotal - mode->crtc_vsync_end, VC4_HDMI_VERTB_VBP)); + u32 vertb_even = (VC4_SET_FIELD(0, VC4_HDMI_VERTB_VSPO) | + VC4_SET_FIELD(mode->crtc_vtotal - + mode->crtc_vsync_end - + interlaced, + VC4_HDMI_VERTB_VBP)); + u32 csc_ctl; if (debug_dump_regs) { DRM_INFO("HDMI regs before:\n"); @@ -321,7 +425,8 @@ static void vc4_hdmi_encoder_mode_set(struct drm_encoder *encoder, HD_WRITE(VC4_HD_VID_CTL, 0); - clk_set_rate(vc4->hdmi->pixel_clock, mode->clock * 1000); + clk_set_rate(vc4->hdmi->pixel_clock, mode->clock * 1000 * + ((mode->flags & DRM_MODE_FLAG_DBLCLK) ? 2 : 1)); HDMI_WRITE(VC4_HDMI_SCHEDULER_CONTROL, HDMI_READ(VC4_HDMI_SCHEDULER_CONTROL) | @@ -331,29 +436,62 @@ static void vc4_hdmi_encoder_mode_set(struct drm_encoder *encoder, HDMI_WRITE(VC4_HDMI_HORZA, (vsync_pos ? VC4_HDMI_HORZA_VPOS : 0) | (hsync_pos ? VC4_HDMI_HORZA_HPOS : 0) | - VC4_SET_FIELD(mode->hdisplay, VC4_HDMI_HORZA_HAP)); + VC4_SET_FIELD(mode->hdisplay * pixel_rep, + VC4_HDMI_HORZA_HAP)); HDMI_WRITE(VC4_HDMI_HORZB, - VC4_SET_FIELD(mode->htotal - mode->hsync_end, + VC4_SET_FIELD((mode->htotal - + mode->hsync_end) * pixel_rep, VC4_HDMI_HORZB_HBP) | - VC4_SET_FIELD(mode->hsync_end - mode->hsync_start, + VC4_SET_FIELD((mode->hsync_end - + mode->hsync_start) * pixel_rep, VC4_HDMI_HORZB_HSP) | - VC4_SET_FIELD(mode->hsync_start - mode->hdisplay, + VC4_SET_FIELD((mode->hsync_start - + mode->hdisplay) * pixel_rep, VC4_HDMI_HORZB_HFP)); HDMI_WRITE(VC4_HDMI_VERTA0, verta); HDMI_WRITE(VC4_HDMI_VERTA1, verta); - HDMI_WRITE(VC4_HDMI_VERTB0, vertb); + HDMI_WRITE(VC4_HDMI_VERTB0, vertb_even); HDMI_WRITE(VC4_HDMI_VERTB1, vertb); HD_WRITE(VC4_HD_VID_CTL, (vsync_pos ? 0 : VC4_HD_VID_CTL_VSYNC_LOW) | (hsync_pos ? 0 : VC4_HD_VID_CTL_HSYNC_LOW)); + csc_ctl = VC4_SET_FIELD(VC4_HD_CSC_CTL_ORDER_BGR, + VC4_HD_CSC_CTL_ORDER); + + if (vc4_encoder->hdmi_monitor && drm_match_cea_mode(mode) > 1) { + /* CEA VICs other than #1 requre limited range RGB + * output unless overridden by an AVI infoframe. + * Apply a colorspace conversion to squash 0-255 down + * to 16-235. The matrix here is: + * + * [ 0 0 0.8594 16] + * [ 0 0.8594 0 16] + * [ 0.8594 0 0 16] + * [ 0 0 0 1] + */ + csc_ctl |= VC4_HD_CSC_CTL_ENABLE; + csc_ctl |= VC4_HD_CSC_CTL_RGB2YCC; + csc_ctl |= VC4_SET_FIELD(VC4_HD_CSC_CTL_MODE_CUSTOM, + VC4_HD_CSC_CTL_MODE); + + HD_WRITE(VC4_HD_CSC_12_11, (0x000 << 16) | 0x000); + HD_WRITE(VC4_HD_CSC_14_13, (0x100 << 16) | 0x6e0); + HD_WRITE(VC4_HD_CSC_22_21, (0x6e0 << 16) | 0x000); + HD_WRITE(VC4_HD_CSC_24_23, (0x100 << 16) | 0x000); + HD_WRITE(VC4_HD_CSC_32_31, (0x000 << 16) | 0x6e0); + HD_WRITE(VC4_HD_CSC_34_33, (0x100 << 16) | 0x000); + vc4_encoder->limited_rgb_range = true; + } else { + vc4_encoder->limited_rgb_range = false; + } + /* The RGB order applies even when CSC is disabled. */ - HD_WRITE(VC4_HD_CSC_CTL, VC4_SET_FIELD(VC4_HD_CSC_CTL_ORDER_BGR, - VC4_HD_CSC_CTL_ORDER)); + HD_WRITE(VC4_HD_CSC_CTL, csc_ctl); HDMI_WRITE(VC4_HDMI_FIFO_CTL, VC4_HDMI_FIFO_CTL_MASTER_SLAVE_N); @@ -368,6 +506,8 @@ static void vc4_hdmi_encoder_disable(struct drm_encoder *encoder) struct drm_device *dev = encoder->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); + HDMI_WRITE(VC4_HDMI_RAM_PACKET_CONFIG, 0); + HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0xf << 16); HD_WRITE(VC4_HD_VID_CTL, HD_READ(VC4_HD_VID_CTL) & ~VC4_HD_VID_CTL_ENABLE); @@ -394,7 +534,7 @@ static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder) VC4_HDMI_SCHEDULER_CONTROL_MODE_HDMI); ret = wait_for(HDMI_READ(VC4_HDMI_SCHEDULER_CONTROL) & - VC4_HDMI_SCHEDULER_CONTROL_HDMI_ACTIVE, 1); + VC4_HDMI_SCHEDULER_CONTROL_HDMI_ACTIVE, 1000); WARN_ONCE(ret, "Timeout waiting for " "VC4_HDMI_SCHEDULER_CONTROL_HDMI_ACTIVE\n"); } else { @@ -406,7 +546,7 @@ static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder) ~VC4_HDMI_SCHEDULER_CONTROL_MODE_HDMI); ret = wait_for(!(HDMI_READ(VC4_HDMI_SCHEDULER_CONTROL) & - VC4_HDMI_SCHEDULER_CONTROL_HDMI_ACTIVE), 1); + VC4_HDMI_SCHEDULER_CONTROL_HDMI_ACTIVE), 1000); WARN_ONCE(ret, "Timeout waiting for " "!VC4_HDMI_SCHEDULER_CONTROL_HDMI_ACTIVE\n"); } @@ -420,9 +560,10 @@ static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder) HDMI_READ(VC4_HDMI_SCHEDULER_CONTROL) | VC4_HDMI_SCHEDULER_CONTROL_VERT_ALWAYS_KEEPOUT); - /* XXX: Set HDMI_RAM_PACKET_CONFIG (1 << 16) and set - * up the infoframe. - */ + HDMI_WRITE(VC4_HDMI_RAM_PACKET_CONFIG, + VC4_HDMI_RAM_PACKET_ENABLE); + + vc4_hdmi_set_infoframes(encoder); drift = HDMI_READ(VC4_HDMI_FIFO_CTL); drift &= VC4_HDMI_FIFO_VALID_WRITE_MASK; diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index 160942a9180e..1aa44c2db556 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -175,6 +175,8 @@ # define PV_CONTROL_CLR_AT_START BIT(14) # define PV_CONTROL_TRIGGER_UNDERFLOW BIT(13) # define PV_CONTROL_WAIT_HSTART BIT(12) +# define PV_CONTROL_PIXEL_REP_MASK VC4_MASK(5, 4) +# define PV_CONTROL_PIXEL_REP_SHIFT 4 # define PV_CONTROL_CLK_SELECT_DSI_VEC 0 # define PV_CONTROL_CLK_SELECT_DPI_SMI_HDMI 1 # define PV_CONTROL_CLK_SELECT_MASK VC4_MASK(3, 2) @@ -183,6 +185,9 @@ # define PV_CONTROL_EN BIT(0) #define PV_V_CONTROL 0x04 +# define PV_VCONTROL_ODD_DELAY_MASK VC4_MASK(22, 6) +# define PV_VCONTROL_ODD_DELAY_SHIFT 6 +# define PV_VCONTROL_ODD_FIRST BIT(5) # define PV_VCONTROL_INTERLACE BIT(4) # define PV_VCONTROL_CONTINUOUS BIT(1) # define PV_VCONTROL_VIDEN BIT(0) @@ -438,6 +443,8 @@ #define VC4_HDMI_RAM_PACKET_CONFIG 0x0a0 # define VC4_HDMI_RAM_PACKET_ENABLE BIT(16) +#define VC4_HDMI_RAM_PACKET_STATUS 0x0a4 + #define VC4_HDMI_HORZA 0x0c4 # define VC4_HDMI_HORZA_VPOS BIT(14) # define VC4_HDMI_HORZA_HPOS BIT(13) @@ -499,6 +506,9 @@ #define VC4_HDMI_TX_PHY_RESET_CTL 0x2c0 +#define VC4_HDMI_GCP_0 0x400 +#define VC4_HDMI_PACKET_STRIDE 0x24 + #define VC4_HD_M_CTL 0x00c # define VC4_HD_M_REGISTER_FILE_STANDBY (3 << 6) # define VC4_HD_M_RAM_STANDBY (3 << 4) @@ -528,10 +538,17 @@ # define VC4_HD_CSC_CTL_MODE_SHIFT 2 # define VC4_HD_CSC_CTL_MODE_RGB_TO_SD_YPRPB 0 # define VC4_HD_CSC_CTL_MODE_RGB_TO_HD_YPRPB 1 -# define VC4_HD_CSC_CTL_MODE_CUSTOM 2 +# define VC4_HD_CSC_CTL_MODE_CUSTOM 3 # define VC4_HD_CSC_CTL_RGB2YCC BIT(1) # define VC4_HD_CSC_CTL_ENABLE BIT(0) +#define VC4_HD_CSC_12_11 0x044 +#define VC4_HD_CSC_14_13 0x048 +#define VC4_HD_CSC_22_21 0x04c +#define VC4_HD_CSC_24_23 0x050 +#define VC4_HD_CSC_32_31 0x054 +#define VC4_HD_CSC_34_33 0x058 + #define VC4_HD_FRAME_COUNT 0x068 /* HVS display list information. */ diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c index 0f12418725e5..08886a309757 100644 --- a/drivers/gpu/drm/vc4/vc4_render_cl.c +++ b/drivers/gpu/drm/vc4/vc4_render_cl.c @@ -45,6 +45,8 @@ struct vc4_rcl_setup { struct drm_gem_cma_object *rcl; u32 next_offset; + + u32 next_write_bo_index; }; static inline void rcl_u8(struct vc4_rcl_setup *setup, u8 val) @@ -407,6 +409,8 @@ static int vc4_rcl_msaa_surface_setup(struct vc4_exec_info *exec, if (!*obj) return -EINVAL; + exec->rcl_write_bo[exec->rcl_write_bo_count++] = *obj; + if (surf->offset & 0xf) { DRM_ERROR("MSAA write must be 16b aligned.\n"); return -EINVAL; @@ -417,7 +421,8 @@ static int vc4_rcl_msaa_surface_setup(struct vc4_exec_info *exec, static int vc4_rcl_surface_setup(struct vc4_exec_info *exec, struct drm_gem_cma_object **obj, - struct drm_vc4_submit_rcl_surface *surf) + struct drm_vc4_submit_rcl_surface *surf, + bool is_write) { uint8_t tiling = VC4_GET_FIELD(surf->bits, VC4_LOADSTORE_TILE_BUFFER_TILING); @@ -440,6 +445,9 @@ static int vc4_rcl_surface_setup(struct vc4_exec_info *exec, if (!*obj) return -EINVAL; + if (is_write) + exec->rcl_write_bo[exec->rcl_write_bo_count++] = *obj; + if (surf->flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { if (surf == &exec->args->zs_write) { DRM_ERROR("general zs write may not be a full-res.\n"); @@ -542,6 +550,8 @@ vc4_rcl_render_config_surface_setup(struct vc4_exec_info *exec, if (!*obj) return -EINVAL; + exec->rcl_write_bo[exec->rcl_write_bo_count++] = *obj; + if (tiling > VC4_TILING_FORMAT_LT) { DRM_ERROR("Bad tiling format\n"); return -EINVAL; @@ -599,15 +609,18 @@ int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec) if (ret) return ret; - ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read); + ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read, + false); if (ret) return ret; - ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read); + ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read, + false); if (ret) return ret; - ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write); + ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write, + true); if (ret) return ret; diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c index 9ce1d0adf882..26503e307438 100644 --- a/drivers/gpu/drm/vc4/vc4_validate.c +++ b/drivers/gpu/drm/vc4/vc4_validate.c @@ -267,6 +267,9 @@ validate_indexed_prim_list(VALIDATE_ARGS) if (!ib) return -EINVAL; + exec->bin_dep_seqno = max(exec->bin_dep_seqno, + to_vc4_bo(&ib->base)->write_seqno); + if (offset > ib->base.size || (ib->base.size - offset) / index_size < length) { DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n", @@ -555,8 +558,7 @@ static bool reloc_tex(struct vc4_exec_info *exec, void *uniform_data_u, struct vc4_texture_sample_info *sample, - uint32_t texture_handle_index) - + uint32_t texture_handle_index, bool is_cs) { struct drm_gem_cma_object *tex; uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]); @@ -714,6 +716,11 @@ reloc_tex(struct vc4_exec_info *exec, *validated_p0 = tex->paddr + p0; + if (is_cs) { + exec->bin_dep_seqno = max(exec->bin_dep_seqno, + to_vc4_bo(&tex->base)->write_seqno); + } + return true; fail: DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0); @@ -835,7 +842,8 @@ validate_gl_shader_rec(struct drm_device *dev, if (!reloc_tex(exec, uniform_data_u, &validated_shader->texture_samples[tex], - texture_handles_u[tex])) { + texture_handles_u[tex], + i == 2)) { return -EINVAL; } } @@ -867,6 +875,9 @@ validate_gl_shader_rec(struct drm_device *dev, uint32_t stride = *(uint8_t *)(pkt_u + o + 5); uint32_t max_index; + exec->bin_dep_seqno = max(exec->bin_dep_seqno, + to_vc4_bo(&vbo->base)->write_seqno); + if (state->addr & 0x8) stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff; |