summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt/intel_workarounds.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_workarounds.c')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c290
1 files changed, 260 insertions, 30 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 5176ad1a3976..85d2bef51524 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -179,6 +179,12 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
}
static void
+wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
+{
+ wa_write_masked_or(wal, reg, clr, 0);
+}
+
+static void
wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
{
wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val);
@@ -485,25 +491,14 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
- struct drm_i915_private *i915 = engine->i915;
-
/* WaForceContextSaveRestoreNonCoherent:cnl */
WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
- /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
- if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
- WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
-
/* WaDisableReplayBufferBankArbitrationOptimization:cnl */
WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
- /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
- if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
- WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
- GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
-
/* WaPushConstantDereferenceHoldDisable:cnl */
WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
@@ -698,6 +693,227 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
}
static void
+gen4_gt_workarounds_init(struct drm_i915_private *i915,
+ struct i915_wa_list *wal)
+{
+ /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
+ wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
+}
+
+static void
+g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ gen4_gt_workarounds_init(i915, wal);
+
+ /* WaDisableRenderCachePipelinedFlush:g4x,ilk */
+ wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE);
+}
+
+static void
+ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ g4x_gt_workarounds_init(i915, wal);
+
+ wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED);
+}
+
+static void
+snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
+ wa_masked_en(wal,
+ _3D_CHICKEN,
+ _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);
+
+ /* WaDisable_RenderCache_OperationalFlush:snb */
+ wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
+
+ /*
+ * BSpec recommends 8x4 when MSAA is used,
+ * however in practice 16x4 seems fastest.
+ *
+ * Note that PS/WM thread counts depend on the WIZ hashing
+ * disable bit, which we don't touch here, but it's good
+ * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+ */
+ wa_add(wal,
+ GEN6_GT_MODE, 0,
+ _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
+ GEN6_WIZ_HASHING_16x4);
+
+ wa_masked_dis(wal, CACHE_MODE_0, CM0_STC_EVICT_DISABLE_LRA_SNB);
+
+ wa_masked_en(wal,
+ _3D_CHICKEN3,
+ /* WaStripsFansDisableFastClipPerformanceFix:snb */
+ _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
+ /*
+ * Bspec says:
+ * "This bit must be set if 3DSTATE_CLIP clip mode is set
+ * to normal and 3DSTATE_SF number of SF output attributes
+ * is more than 16."
+ */
+ _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);
+}
+
+static void
+ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ /* WaDisableEarlyCull:ivb */
+ wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
+
+ /* WaDisablePSDDualDispatchEnable:ivb */
+ if (IS_IVB_GT1(i915))
+ wa_masked_en(wal,
+ GEN7_HALF_SLICE_CHICKEN1,
+ GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
+
+ /* WaDisable_RenderCache_OperationalFlush:ivb */
+ wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
+
+ /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
+ wa_masked_dis(wal,
+ GEN7_COMMON_SLICE_CHICKEN1,
+ GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
+
+ /* WaApplyL3ControlAndL3ChickenMode:ivb */
+ wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
+ wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
+
+ /* WaForceL3Serialization:ivb */
+ wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
+
+ /*
+ * WaVSThreadDispatchOverride:ivb,vlv
+ *
+ * This actually overrides the dispatch
+ * mode for all thread types.
+ */
+ wa_write_masked_or(wal, GEN7_FF_THREAD_MODE,
+ GEN7_FF_SCHED_MASK,
+ GEN7_FF_TS_SCHED_HW |
+ GEN7_FF_VS_SCHED_HW |
+ GEN7_FF_DS_SCHED_HW);
+
+ if (0) { /* causes HiZ corruption on ivb:gt1 */
+ /* enable HiZ Raw Stall Optimization */
+ wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
+ }
+
+ /* WaDisable4x2SubspanOptimization:ivb */
+ wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
+
+ /*
+ * BSpec recommends 8x4 when MSAA is used,
+ * however in practice 16x4 seems fastest.
+ *
+ * Note that PS/WM thread counts depend on the WIZ hashing
+ * disable bit, which we don't touch here, but it's good
+ * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+ */
+ wa_add(wal, GEN7_GT_MODE, 0,
+ _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
+ GEN6_WIZ_HASHING_16x4);
+}
+
+static void
+vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ /* WaDisableEarlyCull:vlv */
+ wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
+
+ /* WaPsdDispatchEnable:vlv */
+ /* WaDisablePSDDualDispatchEnable:vlv */
+ wa_masked_en(wal,
+ GEN7_HALF_SLICE_CHICKEN1,
+ GEN7_MAX_PS_THREAD_DEP |
+ GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
+
+ /* WaDisable_RenderCache_OperationalFlush:vlv */
+ wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
+
+ /* WaForceL3Serialization:vlv */
+ wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
+
+ /*
+ * WaVSThreadDispatchOverride:ivb,vlv
+ *
+ * This actually overrides the dispatch
+ * mode for all thread types.
+ */
+ wa_write_masked_or(wal,
+ GEN7_FF_THREAD_MODE,
+ GEN7_FF_SCHED_MASK,
+ GEN7_FF_TS_SCHED_HW |
+ GEN7_FF_VS_SCHED_HW |
+ GEN7_FF_DS_SCHED_HW);
+
+ /*
+ * BSpec says this must be set, even though
+ * WaDisable4x2SubspanOptimization isn't listed for VLV.
+ */
+ wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
+
+ /*
+ * BSpec recommends 8x4 when MSAA is used,
+ * however in practice 16x4 seems fastest.
+ *
+ * Note that PS/WM thread counts depend on the WIZ hashing
+ * disable bit, which we don't touch here, but it's good
+ * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+ */
+ wa_add(wal, GEN7_GT_MODE, 0,
+ _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
+ GEN6_WIZ_HASHING_16x4);
+
+ /*
+ * WaIncreaseL3CreditsForVLVB0:vlv
+ * This is the hardware default actually.
+ */
+ wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
+}
+
+static void
+hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ /* L3 caching of data atomics doesn't work -- disable it. */
+ wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
+
+ wa_add(wal,
+ HSW_ROW_CHICKEN3, 0,
+ _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
+ 0 /* XXX does this reg exist? */);
+
+ /* WaVSRefCountFullforceMissDisable:hsw */
+ wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
+
+ wa_masked_dis(wal,
+ CACHE_MODE_0_GEN7,
+ /* WaDisable_RenderCache_OperationalFlush:hsw */
+ RC_OP_FLUSH_ENABLE |
+ /* enable HiZ Raw Stall Optimization */
+ HIZ_RAW_STALL_OPT_DISABLE);
+
+ /* WaDisable4x2SubspanOptimization:hsw */
+ wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
+
+ /*
+ * BSpec recommends 8x4 when MSAA is used,
+ * however in practice 16x4 seems fastest.
+ *
+ * Note that PS/WM thread counts depend on the WIZ hashing
+ * disable bit, which we don't touch here, but it's good
+ * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+ */
+ wa_add(wal, GEN7_GT_MODE, 0,
+ _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
+ GEN6_WIZ_HASHING_16x4);
+
+ /* WaSampleCChickenBitEnable:hsw */
+ wa_masked_en(wal, HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
+}
+
+static void
gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
/* WaDisableKillLogic:bxt,skl,kbl */
@@ -837,7 +1053,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) &
GEN10_L3BANK_MASK;
- DRM_DEBUG_DRIVER("L3 fuse = %x\n", l3_fuse);
+ drm_dbg(&i915->drm, "L3 fuse = %x\n", l3_fuse);
l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse);
} else {
l3_en = ~0;
@@ -846,7 +1062,8 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
slice = fls(sseu->slice_mask) - 1;
subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
if (!subslice) {
- DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n",
+ drm_warn(&i915->drm,
+ "No common index found between subslice mask %x and L3 bank mask %x!\n",
intel_sseu_get_subslices(sseu, slice), l3_en);
subslice = fls(l3_en);
drm_WARN_ON(&i915->drm, !subslice);
@@ -861,7 +1078,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
}
- DRM_DEBUG_DRIVER("MCR slice/subslice = %x\n", mcr);
+ drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr);
wa_write_masked_or(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
}
@@ -871,12 +1088,6 @@ cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
wa_init_mcr(i915, wal);
- /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
- if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
- wa_write_or(wal,
- GAMT_CHKN_BIT_REG,
- GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
-
/* WaInPlaceDecompressionHang:cnl */
wa_write_or(wal,
GEN9_GAMT_ECO_REG_RW_IA,
@@ -933,15 +1144,20 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
GAMT_CHKN_BIT_REG,
GAMT_CHKN_DISABLE_L3_COH_PIPE);
- /* Wa_1607087056:icl */
- wa_write_or(wal,
- SLICE_UNIT_LEVEL_CLKGATE,
- L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
+ /* Wa_1607087056:icl,ehl,jsl */
+ if (IS_ICELAKE(i915) ||
+ IS_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) {
+ wa_write_or(wal,
+ SLICE_UNIT_LEVEL_CLKGATE,
+ L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
+ }
}
static void
tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
+ wa_init_mcr(i915, wal);
+
/* Wa_1409420604:tgl */
if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
wa_write_or(wal,
@@ -974,6 +1190,20 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
bxt_gt_workarounds_init(i915, wal);
else if (IS_SKYLAKE(i915))
skl_gt_workarounds_init(i915, wal);
+ else if (IS_HASWELL(i915))
+ hsw_gt_workarounds_init(i915, wal);
+ else if (IS_VALLEYVIEW(i915))
+ vlv_gt_workarounds_init(i915, wal);
+ else if (IS_IVYBRIDGE(i915))
+ ivb_gt_workarounds_init(i915, wal);
+ else if (IS_GEN(i915, 6))
+ snb_gt_workarounds_init(i915, wal);
+ else if (IS_GEN(i915, 5))
+ ilk_gt_workarounds_init(i915, wal);
+ else if (IS_G4X(i915))
+ g4x_gt_workarounds_init(i915, wal);
+ else if (IS_GEN(i915, 4))
+ gen4_gt_workarounds_init(i915, wal);
else if (INTEL_GEN(i915) <= 8)
return;
else
@@ -1379,12 +1609,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN7_FF_THREAD_MODE,
GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
- /*
- * Wa_1409085225:tgl
- * Wa_14010229206:tgl
- */
- wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
-
/* Wa_1408615072:tgl */
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
VSUNIT_CLKGATE_DIS_TGL);
@@ -1402,6 +1626,12 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
wa_masked_en(wal,
GEN9_CS_DEBUG_MODE1,
FF_DOP_CLOCK_GATE_DISABLE);
+
+ /*
+ * Wa_1409085225:tgl
+ * Wa_14010229206:tgl
+ */
+ wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
}
if (IS_GEN(i915, 11)) {