From 62659920cf2113b76607b87595dbebe2f5f8601c Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Thu, 29 Jan 2015 14:13:40 +0000 Subject: drm/i915/skl: Remove the check enforcing VCS2 to be gen8 only We already track this in the intel_info struct. Signed-off-by: Damien Lespiau Reviewed-by: Rodrigo Vivi [danvet: Make the commit message a bit less terse.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0bd3976d88e1..e1036c8e3dd1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2597,19 +2597,13 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) } /** - * Initialize the second BSD ring for Broadwell GT3. - * It is noted that this only exists on Broadwell GT3. + * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3) */ int intel_init_bsd2_ring_buffer(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring = &dev_priv->ring[VCS2]; - if ((INTEL_INFO(dev)->gen != 8)) { - DRM_ERROR("No dual-BSD ring on non-BDW machine\n"); - return -EINVAL; - } - ring->name = "bsd2 ring"; ring->id = VCS2; -- cgit v1.2.3 From 3b10653178473c44e8909b5f23ab6f515fb63259 Mon Sep 17 00:00:00 2001 From: "Hoath, Nicholas" Date: Thu, 5 Feb 2015 10:47:16 +0000 Subject: drm/i915: ring w/a initialisation for gen 9 Add framework for gen 9 HW WAs v1: Changed SOC specific WA function to gen 9 common function (Req: Damien Lespiau) Signed-off-by: Nick Hoath Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e1036c8e3dd1..fbfe65ac590b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -873,6 +873,11 @@ static int chv_init_workarounds(struct intel_engine_cs *ring) return 0; } +static int gen9_init_workarounds(struct intel_engine_cs *ring) +{ + return 0; +} + int init_workarounds_ring(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; @@ -888,6 +893,9 @@ int init_workarounds_ring(struct intel_engine_cs *ring) if (IS_CHERRYVIEW(dev)) return chv_init_workarounds(ring); + if (IS_GEN9(dev)) + return gen9_init_workarounds(ring); + return 0; } -- cgit v1.2.3 From ab0dfafefd85b0abf61f0efad769803bee8e49fb Mon Sep 17 00:00:00 2001 From: "Hoath, Nicholas" Date: Thu, 5 Feb 2015 10:47:18 +0000 Subject: drm/i915/gen9: Implement WaDisablePartialInstShootdown v2: Dont add WaDisableThreadStallDopClockGating as not SKL WA. (Found by Damien Lespiau) Signed-off-by: Nick Hoath Reviewed-by: Damien Lespiau [danvet: Bikeshed commit message a bit as per Damien's suggestions.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index fbfe65ac590b..b869f1c68753 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -875,6 +875,13 @@ static int chv_init_workarounds(struct intel_engine_cs *ring) static int gen9_init_workarounds(struct intel_engine_cs *ring) { + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + /* WaDisablePartialInstShootdown:skl */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + return 0; } -- cgit v1.2.3 From 1de4582f98db7eafd12695139aed7df6a0a0ff3a Mon Sep 17 00:00:00 2001 From: Nick Hoath Date: Thu, 5 Feb 2015 10:47:19 +0000 Subject: drm/i915/gen9: Implement WaDisableDgMirrorFixInHalfSliceChicken5 Move WaDisableDgMirrorFixInHalfSliceChicken5 to gen9_init_workarounds v2: Added stepping check v3: Removed unused register bitmap Signed-off-by: Nick Hoath [danvet: Bikesheds.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 8 -------- drivers/gpu/drm/i915/intel_ringbuffer.c | 10 ++++++++++ 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index bebefe79f7ce..2b89aacdda90 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -63,14 +63,6 @@ static void gen9_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); - /* - * WaDisableDgMirrorFixInHalfSliceChicken5:skl - * This is a pre-production w/a. - */ - I915_WRITE(GEN9_HALF_SLICE_CHICKEN5, - I915_READ(GEN9_HALF_SLICE_CHICKEN5) & - ~GEN9_DG_MIRROR_FIX_ENABLE); - /* Wa4x4STCOptimizationDisable:skl */ I915_WRITE(CACHE_MODE_1, _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b869f1c68753..248db5157e02 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -882,6 +882,16 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + if (INTEL_REVID(dev) == SKL_REVID_A0) { + /* + * WaDisableDgMirrorFixInHalfSliceChicken5:skl + * This is a pre-production w/a. + */ + I915_WRITE(GEN9_HALF_SLICE_CHICKEN5, + I915_READ(GEN9_HALF_SLICE_CHICKEN5) & + ~GEN9_DG_MIRROR_FIX_ENABLE); + } + return 0; } -- cgit v1.2.3 From 8424171e135ce956ed2473493b061909199572c7 Mon Sep 17 00:00:00 2001 From: Nick Hoath Date: Thu, 5 Feb 2015 10:47:20 +0000 Subject: drm/i915/gen9: h/w w/a: syncing dependencies between camera and graphics This one doesn't have one of these nice cryptic names unfortunately. v2: Added missing register bitmap Signed-off-by: Nick Hoath Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index cd3430f931ed..dab4c1ebbc08 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6213,6 +6213,7 @@ enum skl_disp_power_wells { #define HALF_SLICE_CHICKEN3 0xe184 #define HSW_SAMPLE_C_PERFORMANCE (1<<9) #define GEN8_CENTROID_PIXEL_OPT_DIS (1<<8) +#define GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC (1<<5) #define GEN8_SAMPLER_POWER_BYPASS_DIS (1<<1) /* Audio */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 248db5157e02..909430ffa7fe 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -882,6 +882,10 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + /* Syncing dependencies between camera and graphics */ + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); + if (INTEL_REVID(dev) == SKL_REVID_A0) { /* * WaDisableDgMirrorFixInHalfSliceChicken5:skl -- cgit v1.2.3 From cac23df48af531168625f5510912d227c7ce6b8b Mon Sep 17 00:00:00 2001 From: Nick Hoath Date: Thu, 5 Feb 2015 10:47:22 +0000 Subject: drm/i915/gen9: Implement WaEnableYV12BugFixInHalfSliceChicken7 Move WaEnableYV12BugFixInHalfSliceChicken7 to gen9_init_workarounds v2: Add stepping check. Signed-off-by: Nick Hoath Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 3 +++ drivers/gpu/drm/i915/intel_ringbuffer.c | 6 ++++++ 2 files changed, 9 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index dab4c1ebbc08..4ee1964d2c7b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6216,6 +6216,9 @@ enum skl_disp_power_wells { #define GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC (1<<5) #define GEN8_SAMPLER_POWER_BYPASS_DIS (1<<1) +#define GEN9_HALF_SLICE_CHICKEN7 0xe194 +#define GEN9_ENABLE_YV12_BUGFIX (1<<4) + /* Audio */ #define G4X_AUD_VID_DID (dev_priv->info.display_mmio_offset + 0x62020) #define INTEL_AUDIO_DEVCL 0x808629FB diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 909430ffa7fe..2ab447c711bc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -896,6 +896,12 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) ~GEN9_DG_MIRROR_FIX_ENABLE); } + if (INTEL_REVID(dev) >= SKL_REVID_C0) { + /* WaEnableYV12BugFixInHalfSliceChicken7:skl */ + WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, + GEN9_ENABLE_YV12_BUGFIX); + } + return 0; } -- cgit v1.2.3 From 1840481f536b40289b61c13f9111f30f4019e5ff Mon Sep 17 00:00:00 2001 From: "Hoath, Nicholas" Date: Thu, 5 Feb 2015 10:47:23 +0000 Subject: drm/i915/gen9: Implement Wa4x4STCOptimizationDisable Move Wa4x4STCOptimizationDisable to gen9_init_workarounds v2: rebase Signed-off-by: Nick Hoath Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 4 ---- drivers/gpu/drm/i915/intel_ringbuffer.c | 3 +++ 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 874ec9f2023c..3c64810d6853 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -64,10 +64,6 @@ static void gen9_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); } - - /* Wa4x4STCOptimizationDisable:skl */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); } static void i915_pineview_get_mem_freq(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 2ab447c711bc..e35b341c3cef 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -902,6 +902,9 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) GEN9_ENABLE_YV12_BUGFIX); } + /* Wa4x4STCOptimizationDisable:skl */ + WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); + return 0; } -- cgit v1.2.3 From 13bea49c8b203b0d2eb789c6f91c03de4e09cf4d Mon Sep 17 00:00:00 2001 From: "Hoath, Nicholas" Date: Thu, 5 Feb 2015 10:47:24 +0000 Subject: drm/i915/gen9: Implement WaForceEnableNonCoherent v2: Don't add WaHdcDisableFetchWhenMasked. Add stepping check for WaForceEnableNonCoherent Signed-off-by: Nick Hoath Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e35b341c3cef..573b80f0c153 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -902,6 +902,17 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) GEN9_ENABLE_YV12_BUGFIX); } + if (INTEL_REVID(dev) <= SKL_REVID_D0) { + /* + *Use Force Non-Coherent whenever executing a 3D context. This + * is a workaround for a possible hang in the unlikely event + * a TLB invalidation occurs during a PSD flush. + */ + /* WaForceEnableNonCoherent:skl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_NON_COHERENT); + } + /* Wa4x4STCOptimizationDisable:skl */ WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); -- cgit v1.2.3 From e90fff154ecb4517bad4b015bbe2af4699e96dca Mon Sep 17 00:00:00 2001 From: Nick Hoath Date: Fri, 6 Feb 2015 11:30:03 +0000 Subject: drm/i915: gen 9 h/w w/a Fix stepping check Fixed the stepping check on WaDisableDgMirrorFixInHalfSliceChicken5 to be for the correct SOC (Skylake) Signed-off-by: Nick Hoath Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 573b80f0c153..fb71e33ac4d7 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -886,7 +886,8 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); - if (INTEL_REVID(dev) == SKL_REVID_A0) { + if (INTEL_REVID(dev) >= SKL_REVID_A0 && + INTEL_REVID(dev) <= SKL_REVID_B0) { /* * WaDisableDgMirrorFixInHalfSliceChicken5:skl * This is a pre-production w/a. -- cgit v1.2.3 From 35cb6f3b4ee352bff28d2541909e30f193788b52 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 10 Feb 2015 10:31:00 +0000 Subject: drm/i915/bdw: Implement WaForceContextSaveRestoreNonCoherent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: Reorder defines (Ben) v3: More bikesheds, this time re-ordering comments! (Chris) Reviewed-by: Ben Widawsky Reviewed-by: Ville Syrjälä Signed-off-by: Damien Lespiau [danvet: Resolve conflict.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 5 +++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 8 +++++--- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 4ee1964d2c7b..f13e4e4f29e2 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5260,9 +5260,10 @@ enum skl_disp_power_wells { /* GEN8 chicken */ #define HDC_CHICKEN0 0x7300 -#define HDC_FORCE_NON_COHERENT (1<<4) -#define HDC_DONOT_FETCH_MEM_WHEN_MASKED (1<<11) #define HDC_FENCE_DEST_SLM_DISABLE (1<<14) +#define HDC_DONOT_FETCH_MEM_WHEN_MASKED (1<<11) +#define HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT (1<<5) +#define HDC_FORCE_NON_COHERENT (1<<4) /* WaCatErrorRejectionIssue */ #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG 0x9030 diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index fb71e33ac4d7..d62681748b87 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -788,12 +788,14 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring) * workaround for for a possible hang in the unlikely event a TLB * invalidation occurs during a PSD flush. */ - /* WaForceEnableNonCoherent:bdw */ - /* WaHdcDisableFetchWhenMasked:bdw */ - /* WaDisableFenceDestinationToSLM:bdw (GT3 pre-production) */ WA_SET_BIT_MASKED(HDC_CHICKEN0, + /* WaForceEnableNonCoherent:bdw */ HDC_FORCE_NON_COHERENT | + /* WaForceContextSaveRestoreNonCoherent:bdw */ + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + /* WaHdcDisableFetchWhenMasked:bdw */ HDC_DONOT_FETCH_MEM_WHEN_MASKED | + /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: -- cgit v1.2.3 From af75f2691870c575030bbd42adf17457afbe7242 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 10 Feb 2015 19:32:17 +0000 Subject: drm/i915: Make intel_ring_setup_status_page() static This function is only used in intel_ringbuffer.c, so restrict it to that file. The function was moved around to avoid a forward declaration and group it with its user. Signed-off-by: Damien Lespiau [danvet: Squash in fixup from Wu Fengguang.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 124 ++++++++++++++++---------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 2 files changed, 62 insertions(+), 63 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index d62681748b87..9ebc11e6bb49 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -502,6 +502,68 @@ static void ring_setup_phys_status_page(struct intel_engine_cs *ring) I915_WRITE(HWS_PGA, addr); } +static void intel_ring_setup_status_page(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = ring->dev->dev_private; + u32 mmio = 0; + + /* The ring status page addresses are no longer next to the rest of + * the ring registers as of gen7. + */ + if (IS_GEN7(dev)) { + switch (ring->id) { + case RCS: + mmio = RENDER_HWS_PGA_GEN7; + break; + case BCS: + mmio = BLT_HWS_PGA_GEN7; + break; + /* + * VCS2 actually doesn't exist on Gen7. Only shut up + * gcc switch check warning + */ + case VCS2: + case VCS: + mmio = BSD_HWS_PGA_GEN7; + break; + case VECS: + mmio = VEBOX_HWS_PGA_GEN7; + break; + } + } else if (IS_GEN6(ring->dev)) { + mmio = RING_HWS_PGA_GEN6(ring->mmio_base); + } else { + /* XXX: gen8 returns to sanity */ + mmio = RING_HWS_PGA(ring->mmio_base); + } + + I915_WRITE(mmio, (u32)ring->status_page.gfx_addr); + POSTING_READ(mmio); + + /* + * Flush the TLB for this page + * + * FIXME: These two bits have disappeared on gen8, so a question + * arises: do we still need this and if so how should we go about + * invalidating the TLB? + */ + if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) { + u32 reg = RING_INSTPM(ring->mmio_base); + + /* ring should be idle before issuing a sync flush*/ + WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0); + + I915_WRITE(reg, + _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | + INSTPM_SYNC_FLUSH)); + if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0, + 1000)) + DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n", + ring->name); + } +} + static bool stop_ring(struct intel_engine_cs *ring) { struct drm_i915_private *dev_priv = to_i915(ring->dev); @@ -1438,68 +1500,6 @@ i8xx_ring_put_irq(struct intel_engine_cs *ring) spin_unlock_irqrestore(&dev_priv->irq_lock, flags); } -void intel_ring_setup_status_page(struct intel_engine_cs *ring) -{ - struct drm_device *dev = ring->dev; - struct drm_i915_private *dev_priv = ring->dev->dev_private; - u32 mmio = 0; - - /* The ring status page addresses are no longer next to the rest of - * the ring registers as of gen7. - */ - if (IS_GEN7(dev)) { - switch (ring->id) { - case RCS: - mmio = RENDER_HWS_PGA_GEN7; - break; - case BCS: - mmio = BLT_HWS_PGA_GEN7; - break; - /* - * VCS2 actually doesn't exist on Gen7. Only shut up - * gcc switch check warning - */ - case VCS2: - case VCS: - mmio = BSD_HWS_PGA_GEN7; - break; - case VECS: - mmio = VEBOX_HWS_PGA_GEN7; - break; - } - } else if (IS_GEN6(ring->dev)) { - mmio = RING_HWS_PGA_GEN6(ring->mmio_base); - } else { - /* XXX: gen8 returns to sanity */ - mmio = RING_HWS_PGA(ring->mmio_base); - } - - I915_WRITE(mmio, (u32)ring->status_page.gfx_addr); - POSTING_READ(mmio); - - /* - * Flush the TLB for this page - * - * FIXME: These two bits have disappeared on gen8, so a question - * arises: do we still need this and if so how should we go about - * invalidating the TLB? - */ - if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) { - u32 reg = RING_INSTPM(ring->mmio_base); - - /* ring should be idle before issuing a sync flush*/ - WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0); - - I915_WRITE(reg, - _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | - INSTPM_SYNC_FLUSH)); - if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0, - 1000)) - DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n", - ring->name); - } -} - static int bsd_ring_flush(struct intel_engine_cs *ring, u32 invalidate_domains, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 714f3fdd57d2..b6c484fe7a59 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -425,7 +425,6 @@ int intel_init_blt_ring_buffer(struct drm_device *dev); int intel_init_vebox_ring_buffer(struct drm_device *dev); u64 intel_ring_get_active_head(struct intel_engine_cs *ring); -void intel_ring_setup_status_page(struct intel_engine_cs *ring); int init_workarounds_ring(struct intel_engine_cs *ring); -- cgit v1.2.3 From 8d2054941071d10c8bbe120dd2160be8ae21f267 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Mon, 9 Feb 2015 19:33:15 +0000 Subject: drm/i915/skl: Introduce a SKL specific init_workarounds() This function will host SKL-only W/As. Signed-off-by: Damien Lespiau Reviewed-by: Nick Hoath Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 9ebc11e6bb49..ad9d7eb86ef6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -984,6 +984,13 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) return 0; } +static int skl_init_workarounds(struct intel_engine_cs *ring) +{ + gen9_init_workarounds(ring); + + return 0; +} + int init_workarounds_ring(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; @@ -999,7 +1006,9 @@ int init_workarounds_ring(struct intel_engine_cs *ring) if (IS_CHERRYVIEW(dev)) return chv_init_workarounds(ring); - if (IS_GEN9(dev)) + if (IS_SKYLAKE(dev)) + return skl_init_workarounds(ring); + else if (IS_GEN9(dev)) return gen9_init_workarounds(ring); return 0; -- cgit v1.2.3 From 9370cd987e91d6d652ebe6d883fbc51b10df2403 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Mon, 9 Feb 2015 19:33:17 +0000 Subject: drm/i915/skl: Implement WaDisablePartialResolveInVc Signed-off-by: Damien Lespiau Reviewed-by: Nick Hoath Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b00d323095af..b610764768d7 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1483,6 +1483,7 @@ enum skl_disp_power_wells { #define CACHE_MODE_1 0x7004 /* IVB+ */ #define PIXEL_SUBSPAN_COLLECT_OPT_DISABLE (1<<6) #define GEN8_4x4_STC_OPTIMIZATION_DISABLE (1<<6) +#define GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE (1<<1) #define GEN6_BLITTER_ECOSKPD 0x221d0 #define GEN6_BLITTER_LOCK_SHIFT 16 diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ad9d7eb86ef6..29873ff2dd8d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -981,6 +981,9 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) /* Wa4x4STCOptimizationDisable:skl */ WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); + /* WaDisablePartialResolveInVc:skl */ + WA_SET_BIT_MASKED(CACHE_MODE_1, GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); + return 0; } -- cgit v1.2.3 From e2db7071f14b7ac095a24448e9edd036ba332da3 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Mon, 9 Feb 2015 19:33:21 +0000 Subject: drm/i915/skl: Implement WaCcsTlbPrefetchDisable:skl Signed-off-by: Damien Lespiau Reviewed-by: Nick Hoath Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ecc14f558744..8c9e15073e38 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6209,6 +6209,7 @@ enum skl_disp_power_wells { #define GEN9_HALF_SLICE_CHICKEN5 0xe188 #define GEN9_DG_MIRROR_FIX_ENABLE (1<<5) +#define GEN9_CCS_TLB_PREFETCH_ENABLE (1<<3) #define GEN8_ROW_CHICKEN 0xe4f0 #define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE (1<<8) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 29873ff2dd8d..3c66d80d050a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -984,6 +984,10 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) /* WaDisablePartialResolveInVc:skl */ WA_SET_BIT_MASKED(CACHE_MODE_1, GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); + /* WaCcsTlbPrefetchDisable:skl */ + WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, + GEN9_CCS_TLB_PREFETCH_ENABLE); + return 0; } -- cgit v1.2.3 From 65ca7514e21adbee25b8175fc909759c735d00ff Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Mon, 9 Feb 2015 19:33:22 +0000 Subject: drm/i915/skl: Implement WaBarrierPerformanceFixDisable Signed-off-by: Damien Lespiau Reviewed-by: Nick Hoath Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8c9e15073e38..39bdbf9688e4 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5272,6 +5272,7 @@ enum skl_disp_power_wells { #define HDC_DONOT_FETCH_MEM_WHEN_MASKED (1<<11) #define HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT (1<<5) #define HDC_FORCE_NON_COHERENT (1<<4) +#define HDC_BARRIER_PERFORMANCE_DISABLE (1<<10) /* WaCatErrorRejectionIssue */ #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG 0x9030 diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 3c66d80d050a..dde0bec7aefd 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -934,6 +934,13 @@ static int chv_init_workarounds(struct intel_engine_cs *ring) GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4); + if (INTEL_REVID(dev) == SKL_REVID_C0 || + INTEL_REVID(dev) == SKL_REVID_D0) + /* WaBarrierPerformanceFixDisable:skl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FENCE_DEST_SLM_DISABLE | + HDC_BARRIER_PERFORMANCE_DISABLE); + return 0; } -- cgit v1.2.3 From 183c6daceb7efa512ba93efd95766c59f175bbcf Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Mon, 9 Feb 2015 19:33:11 +0000 Subject: drm/i915/skl: Implement WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken Signed-off-by: Damien Lespiau Reviewed-by: Nick Hoath Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 4 ++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 8 ++++++++ 2 files changed, 12 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 39bdbf9688e4..0b522d3f529d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5242,12 +5242,16 @@ enum skl_disp_power_wells { /* GEN7 chicken */ #define GEN7_COMMON_SLICE_CHICKEN1 0x7010 # define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC ((1<<10) | (1<<26)) +# define GEN9_RHWO_OPTIMIZATION_DISABLE (1<<14) #define COMMON_SLICE_CHICKEN2 0x7014 # define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE (1<<0) #define HIZ_CHICKEN 0x7018 # define CHV_HZ_8X8_MODE_IN_1X (1<<15) +#define GEN9_SLICE_COMMON_ECO_CHICKEN0 0x7308 +#define DISABLE_PIXEL_MASK_CAMMING (1<<14) + #define GEN7_L3SQCREG1 0xB010 #define VLV_B0_WA_L3SQCREG1_VALUE 0x00D30000 diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index dde0bec7aefd..e9a85a575a1c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -968,6 +968,14 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) ~GEN9_DG_MIRROR_FIX_ENABLE); } + if (IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) { + /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl */ + WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, + GEN9_RHWO_OPTIMIZATION_DISABLE); + WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN0, + DISABLE_PIXEL_MASK_CAMMING); + } + if (INTEL_REVID(dev) >= SKL_REVID_C0) { /* WaEnableYV12BugFixInHalfSliceChicken7:skl */ WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, -- cgit v1.2.3 From 35c8ce6ac5c2efedf76912c86b2021a6e4ec1655 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Wed, 11 Feb 2015 18:21:43 +0000 Subject: drm/i915/skl: Fix always true comparison in a revision id check It's always a good idea to keep static analysis happy (also because it prompts doing the check like I proposed :), this time smatch complains: drivers/gpu/drm/i915/intel_ringbuffer.c:891 gen9_init_workarounds() warn: always true condition '((->dev->pdev->revision) >= (0)) => (0-255 >= 0)' That's because revision is a u8. Tweak a bit the condition then. Cc: Nick Hoath Signed-off-by: Damien Lespiau Reviewed-by: Nick Hoath Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e9a85a575a1c..ab8ce4ceed2e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -957,8 +957,8 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); - if (INTEL_REVID(dev) >= SKL_REVID_A0 && - INTEL_REVID(dev) <= SKL_REVID_B0) { + if (INTEL_REVID(dev) == SKL_REVID_A0 || + INTEL_REVID(dev) == SKL_REVID_B0) { /* * WaDisableDgMirrorFixInHalfSliceChicken5:skl * This is a pre-production w/a. -- cgit v1.2.3 From a86eb582e3863daa00f6f60355d7fb91252d0e43 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Wed, 11 Feb 2015 18:21:44 +0000 Subject: drm/i915/skl: Use a LRI for WaDisableDgMirrorFixInHalfSliceChicken5 I have no idea how that crept in, but we need to do the write from the ring and this is a masked register. Two fixes in 1! Cc: Nick Hoath Signed-off-by: Damien Lespiau Reviewed-by: Nick Hoath Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ab8ce4ceed2e..e758c0592675 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -959,13 +959,9 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) if (INTEL_REVID(dev) == SKL_REVID_A0 || INTEL_REVID(dev) == SKL_REVID_B0) { - /* - * WaDisableDgMirrorFixInHalfSliceChicken5:skl - * This is a pre-production w/a. - */ - I915_WRITE(GEN9_HALF_SLICE_CHICKEN5, - I915_READ(GEN9_HALF_SLICE_CHICKEN5) & - ~GEN9_DG_MIRROR_FIX_ENABLE); + /* WaDisableDgMirrorFixInHalfSliceChicken5:skl */ + WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, + GEN9_DG_MIRROR_FIX_ENABLE); } if (IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) { -- cgit v1.2.3 From d0bbbc4faf7bc1225ffd5d159fbe2c8dfef75333 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Mon, 9 Feb 2015 19:33:16 +0000 Subject: drm/i915/skl: Implement WaDisablePowerCompilerClockGating Signed-off-by: Damien Lespiau Reviewed-by: Nick Hoath Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 5 +++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 8 ++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5814f67ae86d..f67e290b5475 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5256,8 +5256,9 @@ enum skl_disp_power_wells { #define COMMON_SLICE_CHICKEN2 0x7014 # define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE (1<<0) -#define HIZ_CHICKEN 0x7018 -# define CHV_HZ_8X8_MODE_IN_1X (1<<15) +#define HIZ_CHICKEN 0x7018 +# define CHV_HZ_8X8_MODE_IN_1X (1<<15) +# define BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE (1<<3) #define GEN9_SLICE_COMMON_ECO_CHICKEN0 0x7308 #define DISABLE_PIXEL_MASK_CAMMING (1<<14) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e758c0592675..4570fe172b79 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1004,8 +1004,16 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) static int skl_init_workarounds(struct intel_engine_cs *ring) { + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + gen9_init_workarounds(ring); + /* WaDisablePowerCompilerClockGating:skl */ + if (INTEL_REVID(dev) == SKL_REVID_B0) + WA_SET_BIT_MASKED(HIZ_CHICKEN, + BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE); + return 0; } -- cgit v1.2.3 From b76687910693b1f6c32a3251a8291d67363bba34 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Sat, 14 Feb 2015 18:30:29 +0000 Subject: drm/i915/skl: Tune IZ hashing when subslices are unbalanced When one EU is disabled in a particular subslice, we can tune how the work is spread between subslices to improve EU utilization. v2: - Use a bitfield to record which subslice(s) has(have) 7 EUs. That will also make the machinery work if several sublices have 7 EUs. (Jeff Mcgee) - Only apply the different hashing algorithm if the slice is effectively unbalanced by checking there's a single subslice with 7 EUs. (Jeff Mcgee) v3: Fix typo in comment (Jeff Mcgee) Issue: VIZ-3845 Cc: Jeff Mcgee Reviewed-by: Jeff Mcgee Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 17 ++++++++++--- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_reg.h | 2 ++ drivers/gpu/drm/i915/intel_ringbuffer.c | 45 ++++++++++++++++++++++++++++++++- 4 files changed, 62 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 9a365b40b50e..f9992ca11d10 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -650,13 +650,24 @@ static void intel_device_info_runtime_init(struct drm_device *dev) continue; for (ss = 0; ss < ss_max; ss++) { + u32 n_disabled; + if (ss_disable & (0x1 << ss)) /* skip disabled subslice */ continue; - info->eu_total += eu_max - - hweight8(eu_disable[s] >> - (ss * eu_max)); + n_disabled = hweight8(eu_disable[s] >> + (ss * eu_max)); + + /* + * Record which subslice(s) has(have) 7 EUs. we + * can tune the hash used to spread work among + * subslices if they are unbalanced. + */ + if (eu_max - n_disabled == 7) + info->subslice_7eu[s] |= 1 << ss; + + info->eu_total += eu_max - n_disabled; } } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 61d41abde2e9..4280d0b292da 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -700,6 +700,8 @@ struct intel_device_info { u8 subslice_per_slice; u8 eu_total; u8 eu_per_subslice; + /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ + u8 subslice_7eu[3]; u8 has_slice_pg:1; u8 has_subslice_pg:1; u8 has_eu_pg:1; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5fab90c84c5d..c2124119692d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1351,6 +1351,8 @@ enum skl_disp_power_wells { #define GEN6_WIZ_HASHING_16x4 GEN6_WIZ_HASHING(1, 0) #define GEN6_WIZ_HASHING_MASK GEN6_WIZ_HASHING(1, 1) #define GEN6_TD_FOUR_ROW_DISPATCH_DISABLE (1 << 5) +#define GEN9_IZ_HASHING_MASK(slice) (0x3 << (slice * 2)) +#define GEN9_IZ_HASHING(slice, val) ((val) << (slice * 2)) #define GFX_MODE 0x02520 #define GFX_MODE_GEN7 0x0229c diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4570fe172b79..665985d5fcf4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1002,6 +1002,49 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) return 0; } +static int skl_tune_iz_hashing(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + u8 vals[3] = { 0, 0, 0 }; + unsigned int i; + + for (i = 0; i < 3; i++) { + u8 ss; + + /* + * Only consider slices where one, and only one, subslice has 7 + * EUs + */ + if (hweight8(dev_priv->info.subslice_7eu[i]) != 1) + continue; + + /* + * subslice_7eu[i] != 0 (because of the check above) and + * ss_max == 4 (maximum number of subslices possible per slice) + * + * -> 0 <= ss <= 3; + */ + ss = ffs(dev_priv->info.subslice_7eu[i]) - 1; + vals[i] = 3 - ss; + } + + if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) + return 0; + + /* Tune IZ hashing. See intel_device_info_runtime_init() */ + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN9_IZ_HASHING_MASK(2) | + GEN9_IZ_HASHING_MASK(1) | + GEN9_IZ_HASHING_MASK(0), + GEN9_IZ_HASHING(2, vals[2]) | + GEN9_IZ_HASHING(1, vals[1]) | + GEN9_IZ_HASHING(0, vals[0])); + + return 0; +} + + static int skl_init_workarounds(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; @@ -1014,7 +1057,7 @@ static int skl_init_workarounds(struct intel_engine_cs *ring) WA_SET_BIT_MASKED(HIZ_CHICKEN, BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE); - return 0; + return skl_tune_iz_hashing(ring); } int init_workarounds_ring(struct intel_engine_cs *ring) -- cgit v1.2.3 From 8e004efc16541e7f6e35673449195db5d1f92f40 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 13 Feb 2015 11:48:10 +0000 Subject: drm/i915: Rename 'flags' to 'dispatch_flags' for better code reading There is a flags word that is passed through the execbuffer code path all the way from initial decoding of the user parameters down to the very final dispatch buffer call. It is simply called 'flags'. Unfortuantely, there are many other flags words floating around in the same blocks of code. Even more once the GPU scheduler arrives. This patch makes it more obvious exactly which flags word is which by renaming 'flags' to 'dispatch_flags'. Note that the bit definitions for this flags word already have an 'I915_DISPATCH_' prefix on them and so are not quite so ambiguous. OTC-Jira: VIZ-1587 Signed-off-by: John Harrison [danvet: Resolve conflict with Chris' rework of the bb parsing.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 25 +++++++++++---------- drivers/gpu/drm/i915/intel_lrc.c | 10 ++++----- drivers/gpu/drm/i915/intel_lrc.h | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 35 +++++++++++++++++------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 ++-- 5 files changed, 41 insertions(+), 35 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 82636aa7052d..85a6adaba258 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1127,7 +1127,7 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args, struct list_head *vmas, struct drm_i915_gem_object *batch_obj, - u64 exec_start, u32 flags) + u64 exec_start, u32 dispatch_flags) { struct drm_clip_rect *cliprects = NULL; struct drm_i915_private *dev_priv = dev->dev_private; @@ -1255,19 +1255,19 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, ret = ring->dispatch_execbuffer(ring, exec_start, exec_len, - flags); + dispatch_flags); if (ret) goto error; } } else { ret = ring->dispatch_execbuffer(ring, exec_start, exec_len, - flags); + dispatch_flags); if (ret) return ret; } - trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), flags); + trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags); i915_gem_execbuffer_move_to_active(vmas, ring); i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj); @@ -1342,7 +1342,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct i915_address_space *vm; const u32 ctx_id = i915_execbuffer2_get_context_id(*args); u64 exec_start = args->batch_start_offset; - u32 flags; + u32 dispatch_flags; int ret; bool need_relocs; @@ -1353,15 +1353,15 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (ret) return ret; - flags = 0; + dispatch_flags = 0; if (args->flags & I915_EXEC_SECURE) { if (!file->is_master || !capable(CAP_SYS_ADMIN)) return -EPERM; - flags |= I915_DISPATCH_SECURE; + dispatch_flags |= I915_DISPATCH_SECURE; } if (args->flags & I915_EXEC_IS_PINNED) - flags |= I915_DISPATCH_PINNED; + dispatch_flags |= I915_DISPATCH_PINNED; if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) { DRM_DEBUG("execbuf with unknown ring: %d\n", @@ -1501,7 +1501,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * this check when that is fixed. */ if (USES_FULL_PPGTT(dev)) - flags |= I915_DISPATCH_SECURE; + dispatch_flags |= I915_DISPATCH_SECURE; exec_start = 0; } @@ -1511,7 +1511,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ - if (flags & I915_DISPATCH_SECURE) { + if (dispatch_flags & I915_DISPATCH_SECURE) { /* * So on first glance it looks freaky that we pin the batch here * outside of the reservation loop. But: @@ -1531,7 +1531,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, exec_start += i915_gem_obj_offset(batch_obj, vm); ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args, - &eb->vmas, batch_obj, exec_start, flags); + &eb->vmas, batch_obj, exec_start, + dispatch_flags); /* * FIXME: We crucially rely upon the active tracking for the (ppgtt) @@ -1539,7 +1540,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * needs to be adjusted to also track the ggtt batch vma properly as * active. */ - if (flags & I915_DISPATCH_SECURE) + if (dispatch_flags & I915_DISPATCH_SECURE) i915_gem_object_ggtt_unpin(batch_obj); err: /* the request owns the ref now */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9ef5fcde1300..82c6aaf05803 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -620,7 +620,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, * @vmas: list of vmas. * @batch_obj: the batchbuffer to submit. * @exec_start: batchbuffer start virtual address pointer. - * @flags: translated execbuffer call flags. + * @dispatch_flags: translated execbuffer call flags. * * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts * away the submission details of the execbuffer ioctl call. @@ -633,7 +633,7 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args, struct list_head *vmas, struct drm_i915_gem_object *batch_obj, - u64 exec_start, u32 flags) + u64 exec_start, u32 dispatch_flags) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; @@ -706,7 +706,7 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, dev_priv->relative_constants_mode = instp_mode; } - ret = ring->emit_bb_start(ringbuf, ctx, exec_start, flags); + ret = ring->emit_bb_start(ringbuf, ctx, exec_start, dispatch_flags); if (ret) return ret; @@ -1163,9 +1163,9 @@ static int gen9_init_render_ring(struct intel_engine_cs *ring) static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf, struct intel_context *ctx, - u64 offset, unsigned flags) + u64 offset, unsigned dispatch_flags) { - bool ppgtt = !(flags & I915_DISPATCH_SECURE); + bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE); int ret; ret = intel_logical_ring_begin(ringbuf, ctx, 4); diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 5dd0ecaf6128..adb731e49c57 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -84,7 +84,7 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args, struct list_head *vmas, struct drm_i915_gem_object *batch_obj, - u64 exec_start, u32 flags); + u64 exec_start, u32 dispatch_flags); u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj); void intel_lrc_irq_handler(struct intel_engine_cs *ring); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 665985d5fcf4..4a4a7aec0fc3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1741,7 +1741,7 @@ gen8_ring_put_irq(struct intel_engine_cs *ring) static int i965_dispatch_execbuffer(struct intel_engine_cs *ring, u64 offset, u32 length, - unsigned flags) + unsigned dispatch_flags) { int ret; @@ -1752,7 +1752,8 @@ i965_dispatch_execbuffer(struct intel_engine_cs *ring, intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT | - (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); + (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_NON_SECURE_I965)); intel_ring_emit(ring, offset); intel_ring_advance(ring); @@ -1765,8 +1766,8 @@ i965_dispatch_execbuffer(struct intel_engine_cs *ring, #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) static int i830_dispatch_execbuffer(struct intel_engine_cs *ring, - u64 offset, u32 len, - unsigned flags) + u64 offset, u32 len, + unsigned dispatch_flags) { u32 cs_offset = ring->scratch.gtt_offset; int ret; @@ -1784,7 +1785,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring, intel_ring_emit(ring, MI_NOOP); intel_ring_advance(ring); - if ((flags & I915_DISPATCH_PINNED) == 0) { + if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { if (len > I830_BATCH_LIMIT) return -ENOSPC; @@ -1816,7 +1817,8 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring, return ret; intel_ring_emit(ring, MI_BATCH_BUFFER); - intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); + intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_NON_SECURE)); intel_ring_emit(ring, offset + len - 8); intel_ring_emit(ring, MI_NOOP); intel_ring_advance(ring); @@ -1827,7 +1829,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring, static int i915_dispatch_execbuffer(struct intel_engine_cs *ring, u64 offset, u32 len, - unsigned flags) + unsigned dispatch_flags) { int ret; @@ -1836,7 +1838,8 @@ i915_dispatch_execbuffer(struct intel_engine_cs *ring, return ret; intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); - intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); + intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_NON_SECURE)); intel_ring_advance(ring); return 0; @@ -2395,9 +2398,10 @@ static int gen6_bsd_ring_flush(struct intel_engine_cs *ring, static int gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, u64 offset, u32 len, - unsigned flags) + unsigned dispatch_flags) { - bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE); + bool ppgtt = USES_PPGTT(ring->dev) && + !(dispatch_flags & I915_DISPATCH_SECURE); int ret; ret = intel_ring_begin(ring, 4); @@ -2416,8 +2420,8 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, static int hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, - u64 offset, u32 len, - unsigned flags) + u64 offset, u32 len, + unsigned dispatch_flags) { int ret; @@ -2427,7 +2431,7 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, intel_ring_emit(ring, MI_BATCH_BUFFER_START | - (flags & I915_DISPATCH_SECURE ? + (dispatch_flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW)); /* bit0-7 is the length on GEN6+ */ intel_ring_emit(ring, offset); @@ -2439,7 +2443,7 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, static int gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring, u64 offset, u32 len, - unsigned flags) + unsigned dispatch_flags) { int ret; @@ -2449,7 +2453,8 @@ gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring, intel_ring_emit(ring, MI_BATCH_BUFFER_START | - (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); + (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_NON_SECURE_I965)); /* bit0-7 is the length on GEN6+ */ intel_ring_emit(ring, offset); intel_ring_advance(ring); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 39183fcbdcf3..8f3b49a23ccf 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -164,7 +164,7 @@ struct intel_engine_cs { u32 seqno); int (*dispatch_execbuffer)(struct intel_engine_cs *ring, u64 offset, u32 length, - unsigned flags); + unsigned dispatch_flags); #define I915_DISPATCH_SECURE 0x1 #define I915_DISPATCH_PINNED 0x2 void (*cleanup)(struct intel_engine_cs *ring); @@ -242,7 +242,7 @@ struct intel_engine_cs { u32 flush_domains); int (*emit_bb_start)(struct intel_ringbuffer *ringbuf, struct intel_context *ctx, - u64 offset, unsigned flags); + u64 offset, unsigned dispatch_flags); /** * List of objects currently involved in rendering from the -- cgit v1.2.3 From 98e1bd4ae68e0a122de21795c946ba36a8259f70 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 13 Feb 2015 11:48:12 +0000 Subject: drm/i915: Cache ringbuf pointer in request structure In execlist mode, the ringbuf is a function of the ring and context whereas in legacy mode, it is derived from the ring alone. Thus the calculation required to determine the ringbuf pointer from the ring (and context) also needs to test execlist mode or not. This is messy. Further, the request structure holds a pointer to both the ring and the context for which it was created. Thus, given a request, it is possible to derive the ringbuf in either legacy or execlist mode. Hence it is necessary to pass just the request in to all the low level functions rather than some combination of request, ring, context and ringbuf. However, rather than recalculating it each time, it is much simpler to just cache the ringbuf pointer in the request structure itself. Caching the pointer means the calculation is done once at request creation time and all further code and simply read it directly from the request structure. OTC-Jira: VIZ-5115 Signed-off-by: John Harrison [danvet: Drop contentless comment in lrc alloc request entirely. And spelling fix in the commit message.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 3 ++- drivers/gpu/drm/i915/i915_gem.c | 14 +------------- drivers/gpu/drm/i915/intel_lrc.c | 5 +---- drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + 4 files changed, 5 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ba0f5b690291..239a382c8b55 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2156,8 +2156,9 @@ struct drm_i915_gem_request { /** Position in the ringbuffer of the end of the whole request */ u32 tail; - /** Context related to this request */ + /** Context and ring buffer related to this request */ struct intel_context *ctx; + struct intel_ringbuffer *ringbuf; /** Batch buffer related to this request if any */ struct drm_i915_gem_object *batch_obj; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f28f0dea6c96..14ca4cd5e6d7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2763,7 +2763,6 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) while (!list_empty(&ring->request_list)) { struct drm_i915_gem_request *request; - struct intel_ringbuffer *ringbuf; request = list_first_entry(&ring->request_list, struct drm_i915_gem_request, @@ -2774,23 +2773,12 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) trace_i915_gem_request_retire(request); - /* This is one of the few common intersection points - * between legacy ringbuffer submission and execlists: - * we need to tell them apart in order to find the correct - * ringbuffer to which the request belongs to. - */ - if (i915.enable_execlists) { - struct intel_context *ctx = request->ctx; - ringbuf = ctx->engine[ring->id].ringbuf; - } else - ringbuf = ring->buffer; - /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position * of tail of the request to update the last known position * of the GPU head. */ - ringbuf->last_retired_head = request->postfix; + request->ringbuf->last_retired_head = request->postfix; i915_gem_free_request(request); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 09af288da6d4..a1a2a61118ba 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -888,12 +888,9 @@ static int logical_ring_alloc_request(struct intel_engine_cs *ring, return ret; } - /* Hold a reference to the context this request belongs to - * (we will need it when the time comes to emit/retire the - * request). - */ request->ctx = ctx; i915_gem_context_reference(request->ctx); + request->ringbuf = ctx->engine[ring->id].ringbuf; ring->outstanding_lazy_request = request; return 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4a4a7aec0fc3..94dc98b44adc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2230,6 +2230,7 @@ intel_ring_alloc_request(struct intel_engine_cs *ring) kref_init(&request->ref); request->ring = ring; + request->ringbuf = ring->buffer; request->uniq = dev_private->request_uniq++; ret = i915_gem_get_seqno(ring->dev, &request->seqno); -- cgit v1.2.3 From dbef0f15b5c83231dacb214dbf9a6dba063ca21c Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 13 Feb 2015 17:23:46 -0200 Subject: drm/i915: add frontbuffer tracking to FBC Kill the blt/render tracking we currently have and use the frontbuffer tracking infrastructure. Don't enable things by default yet. v2: (Rodrigo) Fix small conflict on rebase and typo at subject. v3: (Paulo) Rebase on RENDER_CS change. v4: (Paulo) Rebase. v5: (Paulo) Simplify: flushes don't have origin (Daniel). Also rebase due to patch order changes. Signed-off-by: Rodrigo Vivi Signed-off-by: Paulo Zanoni Reviewed-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 10 +--- drivers/gpu/drm/i915/intel_drv.h | 6 ++- drivers/gpu/drm/i915/intel_fbc.c | 91 +++++++++++++++++++------------- drivers/gpu/drm/i915/intel_frontbuffer.c | 14 +---- drivers/gpu/drm/i915/intel_ringbuffer.c | 41 +------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 6 files changed, 65 insertions(+), 98 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2858e013642f..032459a50457 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -805,6 +805,8 @@ struct i915_fbc { unsigned long uncompressed_size; unsigned threshold; unsigned int fb_id; + unsigned int possible_framebuffer_bits; + unsigned int busy_bits; struct intel_crtc *crtc; int y; @@ -817,14 +819,6 @@ struct i915_fbc { * possible. */ bool enabled; - /* On gen8 some rings cannont perform fbc clean operation so for now - * we are doing this on SW with mmio. - * This variable works in the opposite information direction - * of ring->fbc_dirty telling software on frontbuffer tracking - * to perform the cache clean on sw side. - */ - bool need_sw_cache_clean; - struct intel_fbc_work { struct delayed_work work; struct drm_crtc *crtc; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 61a40ab61d6b..fbf81499b736 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1115,7 +1115,11 @@ bool intel_fbc_enabled(struct drm_device *dev); void intel_fbc_update(struct drm_device *dev); void intel_fbc_init(struct drm_i915_private *dev_priv); void intel_fbc_disable(struct drm_device *dev); -void bdw_fbc_sw_flush(struct drm_device *dev, u32 value); +void intel_fbc_invalidate(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits, + enum fb_op_origin origin); +void intel_fbc_flush(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits); /* intel_hdmi.c */ void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port); diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 618f7bdab0ba..9fcf446e95f5 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -174,29 +174,10 @@ static bool g4x_fbc_enabled(struct drm_device *dev) return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN; } -static void snb_fbc_blit_update(struct drm_device *dev) +static void intel_fbc_nuke(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - u32 blt_ecoskpd; - - /* Make sure blitter notifies FBC of writes */ - - /* Blitter is part of Media powerwell on VLV. No impact of - * his param in other platforms for now */ - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA); - - blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD); - blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY << - GEN6_BLITTER_LOCK_SHIFT; - I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd); - blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY; - I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd); - blt_ecoskpd &= ~(GEN6_BLITTER_FBC_NOTIFY << - GEN6_BLITTER_LOCK_SHIFT); - I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd); - POSTING_READ(GEN6_BLITTER_ECOSKPD); - - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA); + I915_WRITE(MSG_FBC_REND_STATE, FBC_REND_NUKE); + POSTING_READ(MSG_FBC_REND_STATE); } static void ilk_fbc_enable(struct drm_crtc *crtc) @@ -239,9 +220,10 @@ static void ilk_fbc_enable(struct drm_crtc *crtc) I915_WRITE(SNB_DPFC_CTL_SA, SNB_CPU_FENCE_ENABLE | obj->fence_reg); I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y); - snb_fbc_blit_update(dev); } + intel_fbc_nuke(dev_priv); + DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane)); } @@ -320,7 +302,7 @@ static void gen7_fbc_enable(struct drm_crtc *crtc) SNB_CPU_FENCE_ENABLE | obj->fence_reg); I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y); - snb_fbc_blit_update(dev); + intel_fbc_nuke(dev_priv); DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane)); } @@ -340,19 +322,6 @@ bool intel_fbc_enabled(struct drm_device *dev) return dev_priv->fbc.enabled; } -void bdw_fbc_sw_flush(struct drm_device *dev, u32 value) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - if (!IS_GEN8(dev)) - return; - - if (!intel_fbc_enabled(dev)) - return; - - I915_WRITE(MSG_FBC_REND_STATE, value); -} - static void intel_fbc_work_fn(struct work_struct *__work) { struct intel_fbc_work *work = @@ -685,6 +654,44 @@ out_disable: i915_gem_stolen_cleanup_compression(dev); } +void intel_fbc_invalidate(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits, + enum fb_op_origin origin) +{ + struct drm_device *dev = dev_priv->dev; + unsigned int fbc_bits; + + if (origin == ORIGIN_GTT) + return; + + if (dev_priv->fbc.enabled) + fbc_bits = INTEL_FRONTBUFFER_PRIMARY(dev_priv->fbc.crtc->pipe); + else if (dev_priv->fbc.fbc_work) + fbc_bits = INTEL_FRONTBUFFER_PRIMARY( + to_intel_crtc(dev_priv->fbc.fbc_work->crtc)->pipe); + else + fbc_bits = dev_priv->fbc.possible_framebuffer_bits; + + dev_priv->fbc.busy_bits |= (fbc_bits & frontbuffer_bits); + + if (dev_priv->fbc.busy_bits) + intel_fbc_disable(dev); +} + +void intel_fbc_flush(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits) +{ + struct drm_device *dev = dev_priv->dev; + + if (!dev_priv->fbc.busy_bits) + return; + + dev_priv->fbc.busy_bits &= ~frontbuffer_bits; + + if (!dev_priv->fbc.busy_bits) + intel_fbc_update(dev); +} + /** * intel_fbc_init - Initialize FBC * @dev_priv: the i915 device @@ -693,12 +700,22 @@ out_disable: */ void intel_fbc_init(struct drm_i915_private *dev_priv) { + enum pipe pipe; + if (!HAS_FBC(dev_priv)) { dev_priv->fbc.enabled = false; dev_priv->fbc.no_fbc_reason = FBC_UNSUPPORTED; return; } + for_each_pipe(dev_priv, pipe) { + dev_priv->fbc.possible_framebuffer_bits |= + INTEL_FRONTBUFFER_PRIMARY(pipe); + + if (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8) + break; + } + if (INTEL_INFO(dev_priv)->gen >= 7) { dev_priv->display.fbc_enabled = ilk_fbc_enabled; dev_priv->display.enable_fbc = gen7_fbc_enable; diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c index 5da73f0124ce..0a1bac8ac72b 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -118,8 +118,6 @@ static void intel_mark_fb_busy(struct drm_device *dev, continue; intel_increase_pllclock(dev, pipe); - if (ring && intel_fbc_enabled(dev)) - ring->fbc_dirty = true; } } @@ -160,6 +158,7 @@ void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, intel_psr_invalidate(dev, obj->frontbuffer_bits); intel_edp_drrs_invalidate(dev, obj->frontbuffer_bits); + intel_fbc_invalidate(dev_priv, obj->frontbuffer_bits, origin); } /** @@ -187,16 +186,7 @@ void intel_frontbuffer_flush(struct drm_device *dev, intel_edp_drrs_flush(dev, frontbuffer_bits); intel_psr_flush(dev, frontbuffer_bits); - - /* - * FIXME: Unconditional fbc flushing here is a rather gross hack and - * needs to be reworked into a proper frontbuffer tracking scheme like - * psr employs. - */ - if (dev_priv->fbc.need_sw_cache_clean) { - dev_priv->fbc.need_sw_cache_clean = false; - bdw_fbc_sw_flush(dev, FBC_REND_CACHE_CLEAN); - } + intel_fbc_flush(dev_priv, frontbuffer_bits); } /** diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index cd79c3843452..e9858d2e92d0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -317,29 +317,6 @@ gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring) return 0; } -static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value) -{ - int ret; - - if (!ring->fbc_dirty) - return 0; - - ret = intel_ring_begin(ring, 6); - if (ret) - return ret; - /* WaFbcNukeOn3DBlt:ivb/hsw */ - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit(ring, MSG_FBC_REND_STATE); - intel_ring_emit(ring, value); - intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) | MI_SRM_LRM_GLOBAL_GTT); - intel_ring_emit(ring, MSG_FBC_REND_STATE); - intel_ring_emit(ring, ring->scratch.gtt_offset + 256); - intel_ring_advance(ring); - - ring->fbc_dirty = false; - return 0; -} - static int gen7_render_ring_flush(struct intel_engine_cs *ring, u32 invalidate_domains, u32 flush_domains) @@ -398,9 +375,6 @@ gen7_render_ring_flush(struct intel_engine_cs *ring, intel_ring_emit(ring, 0); intel_ring_advance(ring); - if (!invalidate_domains && flush_domains) - return gen7_ring_fbc_flush(ring, FBC_REND_NUKE); - return 0; } @@ -462,9 +436,6 @@ gen8_render_ring_flush(struct intel_engine_cs *ring, if (ret) return ret; - if (!invalidate_domains && flush_domains) - return gen7_ring_fbc_flush(ring, FBC_REND_NUKE); - return 0; } @@ -2477,7 +2448,6 @@ static int gen6_ring_flush(struct intel_engine_cs *ring, u32 invalidate, u32 flush) { struct drm_device *dev = ring->dev; - struct drm_i915_private *dev_priv = dev->dev_private; uint32_t cmd; int ret; @@ -2486,7 +2456,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring, return ret; cmd = MI_FLUSH_DW; - if (INTEL_INFO(ring->dev)->gen >= 8) + if (INTEL_INFO(dev)->gen >= 8) cmd += 1; /* We always require a command barrier so that subsequent @@ -2506,7 +2476,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring, cmd |= MI_INVALIDATE_TLB; intel_ring_emit(ring, cmd); intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); - if (INTEL_INFO(ring->dev)->gen >= 8) { + if (INTEL_INFO(dev)->gen >= 8) { intel_ring_emit(ring, 0); /* upper addr */ intel_ring_emit(ring, 0); /* value */ } else { @@ -2515,13 +2485,6 @@ static int gen6_ring_flush(struct intel_engine_cs *ring, } intel_ring_advance(ring); - if (!invalidate && flush) { - if (IS_GEN7(dev)) - return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN); - else if (IS_BROADWELL(dev)) - dev_priv->fbc.need_sw_cache_clean = true; - } - return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 8f3b49a23ccf..c761fe05ad6f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -267,7 +267,6 @@ struct intel_engine_cs { */ struct drm_i915_gem_request *outstanding_lazy_request; bool gpu_caches_dirty; - bool fbc_dirty; wait_queue_head_t irq_queue; -- cgit v1.2.3 From 6e0b3f8d64252664f240f0cbd23e3d22ce3df001 Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Thu, 5 Mar 2015 22:03:08 +0800 Subject: drm/i915: fix simple_return.cocci warnings drivers/gpu/drm/i915/intel_ringbuffer.c:435:1-4: WARNING: end returns can be simpified Simplify a trivial if-return sequence. Possibly combine with a preceding function call. Generated by: scripts/coccinelle/misc/simple_return.cocci CC: Paulo Zanoni Signed-off-by: Fengguang Wu Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e9858d2e92d0..441e2502b889 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -432,11 +432,7 @@ gen8_render_ring_flush(struct intel_engine_cs *ring, return ret; } - ret = gen8_emit_pipe_control(ring, flags, scratch_addr); - if (ret) - return ret; - - return 0; + return gen8_emit_pipe_control(ring, flags, scratch_addr); } static void ring_write_tail(struct intel_engine_cs *ring, -- cgit v1.2.3