From a5c89f7c43c12c592a882a0ec2a15e9df0011e80 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 4 May 2022 16:46:36 -0700 Subject: drm/i915/guc: Support programming the EU priority in the GuC descriptor In GuC submission mode the EU priority must be updated by the GuC rather than the driver as the GuC owns the programming of the context descriptor. Given that the GuC code uses the GuC priorities, we can't use a generic function using i915 priorities for both execlists and GuC submission. The existing function has therefore been pushed to the execlists back-end while a new one has been added for GuC. v2: correctly use the GuC prio. Cc: John Harrison Cc: Matt Roper Signed-off-by: Matthew Brost Signed-off-by: Aravind Iddamsetty Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220504234636.2119794-1-daniele.ceraolospurio@intel.com --- .../gpu/drm/i915/gt/intel_execlists_submission.c | 12 +++++++++++- drivers/gpu/drm/i915/gt/intel_lrc.h | 10 ---------- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 22 ++++++++++++++++++++++ 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 86f7a9ac1c39..2b0266cab66b 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -661,6 +661,16 @@ static inline void execlists_schedule_out(struct i915_request *rq) i915_request_put(rq); } +static u32 map_i915_prio_to_lrc_desc_prio(int prio) +{ + if (prio > I915_PRIORITY_NORMAL) + return GEN12_CTX_PRIORITY_HIGH; + else if (prio < I915_PRIORITY_NORMAL) + return GEN12_CTX_PRIORITY_LOW; + else + return GEN12_CTX_PRIORITY_NORMAL; +} + static u64 execlists_update_context(struct i915_request *rq) { struct intel_context *ce = rq->context; @@ -669,7 +679,7 @@ static u64 execlists_update_context(struct i915_request *rq) desc = ce->lrc.desc; if (rq->engine->flags & I915_ENGINE_HAS_EU_PRIORITY) - desc |= lrc_desc_priority(rq_prio(rq)); + desc |= map_i915_prio_to_lrc_desc_prio(rq_prio(rq)); /* * WaIdleLiteRestore:bdw,skl diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 31be734010db..a390f0813c8b 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -111,16 +111,6 @@ enum { #define XEHP_SW_COUNTER_SHIFT 58 #define XEHP_SW_COUNTER_WIDTH 6 -static inline u32 lrc_desc_priority(int prio) -{ - if (prio > I915_PRIORITY_NORMAL) - return GEN12_CTX_PRIORITY_HIGH; - else if (prio < I915_PRIORITY_NORMAL) - return GEN12_CTX_PRIORITY_LOW; - else - return GEN12_CTX_PRIORITY_NORMAL; -} - static inline void lrc_runtime_start(struct intel_context *ce) { struct intel_context_stats *stats = &ce->stats; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 75291e9846c5..8bf8b6d588d4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2394,6 +2394,26 @@ static int guc_context_policy_init(struct intel_context *ce, bool loop) return ret; } +static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) +{ + /* + * this matches the mapping we do in map_i915_prio_to_guc_prio() + * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL) + */ + switch (prio) { + default: + MISSING_CASE(prio); + fallthrough; + case GUC_CLIENT_PRIORITY_KMD_NORMAL: + return GEN12_CTX_PRIORITY_NORMAL; + case GUC_CLIENT_PRIORITY_NORMAL: + return GEN12_CTX_PRIORITY_LOW; + case GUC_CLIENT_PRIORITY_HIGH: + case GUC_CLIENT_PRIORITY_KMD_HIGH: + return GEN12_CTX_PRIORITY_HIGH; + } +} + static void prepare_context_registration_info(struct intel_context *ce, struct guc_ctxt_registration_info *info) { @@ -2420,6 +2440,8 @@ static void prepare_context_registration_info(struct intel_context *ce, */ info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); + if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY) + info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio); info->flags = CONTEXT_REGISTRATION_FLAG_KMD; /* -- cgit v1.2.3 From 315241d2d9102a90f71bd6c9e7dd06a1c831a184 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 4 May 2022 13:48:13 -0700 Subject: drm/i915/huc: drop intel_huc_is_authenticated The function name is confusing, because it doesn't check the actual auth status in HW but the SW status. Given that there is only one user (the huc_auth function itself), just get rid of it and use the FW status checker directly. Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20220504204816.2082588-2-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_huc.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_huc.h | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 556829de9c17..7b759b99cf3c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -96,7 +96,7 @@ int intel_huc_auth(struct intel_huc *huc) struct intel_guc *guc = >->uc.guc; int ret; - GEM_BUG_ON(intel_huc_is_authenticated(huc)); + GEM_BUG_ON(intel_uc_fw_is_running(&huc->fw)); if (!intel_uc_fw_is_loaded(&huc->fw)) return -ENOEXEC; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h index 73ec670800f2..77d813840d76 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -50,11 +50,6 @@ static inline bool intel_huc_is_used(struct intel_huc *huc) return intel_uc_fw_is_available(&huc->fw); } -static inline bool intel_huc_is_authenticated(struct intel_huc *huc) -{ - return intel_uc_fw_is_running(&huc->fw); -} - void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p); #endif -- cgit v1.2.3 From a7b516bd981f11feb0c9f5ee3d149855d48cb2c8 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 4 May 2022 13:48:14 -0700 Subject: drm/i915/huc: Add fetch support for gsc-loaded HuC binary On newer platforms (starting DG2 G10 B-step and G11 A-step), ownership of HuC loading has been moved from the GuC to the GSC. As part of the change, the header format of the HuC binary has been updated and does not match the GuC anymore. The GSC will perform all the required checks on the binary size, so we only need to check that the version matches. Note that since we still haven't added any gsc-loaded FWs, the loaded_via_gsc variable will always be kept to its initialization value of zero. v2: Add a note about loaded_via_gsc being zero (Alan) Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20220504204816.2082588-3-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 99 +++++++++++++++++----------- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 2 + drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h | 9 +++ 3 files changed, 72 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index d078f884b5e3..9361532726d6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -301,45 +301,31 @@ static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, int e) } } -/** - * intel_uc_fw_fetch - fetch uC firmware - * @uc_fw: uC firmware - * - * Fetch uC firmware into GEM obj. - * - * Return: 0 on success, a negative errno code on failure. - */ -int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) +static int check_gsc_manifest(const struct firmware *fw, + struct intel_uc_fw *uc_fw) { - struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915; - struct device *dev = i915->drm.dev; - struct drm_i915_gem_object *obj; - const struct firmware *fw = NULL; - struct uc_css_header *css; - size_t size; - int err; + u32 *dw = (u32 *)fw->data; + u32 version = dw[HUC_GSC_VERSION_DW]; - GEM_BUG_ON(!i915->wopcm.size); - GEM_BUG_ON(!intel_uc_fw_is_enabled(uc_fw)); - - err = i915_inject_probe_error(i915, -ENXIO); - if (err) - goto fail; + uc_fw->major_ver_found = FIELD_GET(HUC_GSC_MAJOR_VER_MASK, version); + uc_fw->minor_ver_found = FIELD_GET(HUC_GSC_MINOR_VER_MASK, version); - __force_fw_fetch_failures(uc_fw, -EINVAL); - __force_fw_fetch_failures(uc_fw, -ESTALE); + return 0; +} - err = request_firmware(&fw, uc_fw->path, dev); - if (err) - goto fail; +static int check_ccs_header(struct drm_i915_private *i915, + const struct firmware *fw, + struct intel_uc_fw *uc_fw) +{ + struct uc_css_header *css; + size_t size; /* Check the size of the blob before examining buffer contents */ if (unlikely(fw->size < sizeof(struct uc_css_header))) { drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu < %zu\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, fw->size, sizeof(struct uc_css_header)); - err = -ENODATA; - goto fail; + return -ENODATA; } css = (struct uc_css_header *)fw->data; @@ -352,8 +338,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) "%s firmware %s: unexpected header size: %zu != %zu\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, fw->size, sizeof(struct uc_css_header)); - err = -EPROTO; - goto fail; + return -EPROTO; } /* uCode size must calculated from other sizes */ @@ -368,8 +353,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu < %zu\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, fw->size, size); - err = -ENOEXEC; - goto fail; + return -ENOEXEC; } /* Sanity check whether this fw is not larger than whole WOPCM memory */ @@ -378,8 +362,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu > %zu\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, size, (size_t)i915->wopcm.size); - err = -E2BIG; - goto fail; + return -E2BIG; } /* Get version numbers from the CSS header */ @@ -388,6 +371,49 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->sw_version); + if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) + uc_fw->private_data_size = css->private_data_size; + + return 0; +} + +/** + * intel_uc_fw_fetch - fetch uC firmware + * @uc_fw: uC firmware + * + * Fetch uC firmware into GEM obj. + * + * Return: 0 on success, a negative errno code on failure. + */ +int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) +{ + struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915; + struct device *dev = i915->drm.dev; + struct drm_i915_gem_object *obj; + const struct firmware *fw = NULL; + int err; + + GEM_BUG_ON(!i915->wopcm.size); + GEM_BUG_ON(!intel_uc_fw_is_enabled(uc_fw)); + + err = i915_inject_probe_error(i915, -ENXIO); + if (err) + goto fail; + + __force_fw_fetch_failures(uc_fw, -EINVAL); + __force_fw_fetch_failures(uc_fw, -ESTALE); + + err = request_firmware(&fw, uc_fw->path, dev); + if (err) + goto fail; + + if (uc_fw->loaded_via_gsc) + err = check_gsc_manifest(fw, uc_fw); + else + err = check_ccs_header(i915, fw, uc_fw); + if (err) + goto fail; + if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { drm_notice(&i915->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", @@ -400,9 +426,6 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) } } - if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) - uc_fw->private_data_size = css->private_data_size; - if (HAS_LMEM(i915)) { obj = i915_gem_object_create_lmem_from_data(i915, fw->data, fw->size); if (!IS_ERR(obj)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 3229018877d3..4f169035f504 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -102,6 +102,8 @@ struct intel_uc_fw { u32 ucode_size; u32 private_data_size; + + bool loaded_via_gsc; }; #ifdef CONFIG_DRM_I915_DEBUG_GUC diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h index e41ffc7a7fbc..b05e0e35b734 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h @@ -39,6 +39,11 @@ * 3. Length info of each component can be found in header, in dwords. * 4. Modulus and exponent key are not required by driver. They may not appear * in fw. So driver will load a truncated firmware in this case. + * + * Starting from DG2, the HuC is loaded by the GSC instead of i915. The GSC + * firmware performs all the required integrity checks, we just need to check + * the version. Note that the header for GSC-managed blobs is different from the + * CSS used for dma-loaded firmwares. */ struct uc_css_header { @@ -78,4 +83,8 @@ struct uc_css_header { } __packed; static_assert(sizeof(struct uc_css_header) == 128); +#define HUC_GSC_VERSION_DW 44 +#define HUC_GSC_MAJOR_VER_MASK (0xFF << 0) +#define HUC_GSC_MINOR_VER_MASK (0xFF << 16) + #endif /* _INTEL_UC_FW_ABI_H */ -- cgit v1.2.3 From 6f67930af78f10ac7a1a9ba81ec606a9bd07749f Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 4 May 2022 13:48:15 -0700 Subject: drm/i915/huc: Prepare for GSC-loaded HuC HuC loading via GSC is performed via a PXP command sent through the mei modules, so we need both MEI_GSC and MEI_PXP to be available. Given that the GSC will do both the transfer and the authentication, the legacy HuC loading paths can be safely skipped. Also note that the GSC-loaded HuC survives GT reset. v2: move the huc_is_authenticated() function to this patch. Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20220504204816.2082588-4-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_huc.c | 95 +++++++++++++++++++++++++----- drivers/gpu/drm/i915/gt/uc/intel_huc.h | 6 ++ drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c | 5 +- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 11 +++- 5 files changed, 100 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h index 66027a42cda9..2516705b9f36 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h @@ -96,6 +96,7 @@ #define GUC_SHIM_CONTROL2 _MMIO(0xc068) #define GUC_IS_PRIVILEGED (1<<29) +#define GSC_LOADS_HUC (1<<30) #define GUC_SEND_INTERRUPT _MMIO(0xc4c8) #define GUC_SEND_TRIGGER (1<<0) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 7b759b99cf3c..c36e2bf9b0f2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -6,6 +6,7 @@ #include #include "gt/intel_gt.h" +#include "intel_guc_reg.h" #include "intel_huc.h" #include "i915_drv.h" @@ -17,11 +18,15 @@ * capabilities by adding HuC specific commands to batch buffers. * * The kernel driver is only responsible for loading the HuC firmware and - * triggering its security authentication, which is performed by the GuC. For - * The GuC to correctly perform the authentication, the HuC binary must be - * loaded before the GuC one. Loading the HuC is optional; however, not using - * the HuC might negatively impact power usage and/or performance of media - * workloads, depending on the use-cases. + * triggering its security authentication, which is performed by the GuC on + * older platforms and by the GSC on newer ones. For the GuC to correctly + * perform the authentication, the HuC binary must be loaded before the GuC one. + * Loading the HuC is optional; however, not using the HuC might negatively + * impact power usage and/or performance of media workloads, depending on the + * use-cases. + * HuC must be reloaded on events that cause the WOPCM to lose its contents + * (S3/S4, FLR); GuC-authenticated HuC must also be reloaded on GuC/GT reset, + * while GSC-managed HuC will survive that. * * See https://github.com/intel/media-driver for the latest details on HuC * functionality. @@ -54,11 +59,51 @@ void intel_huc_init_early(struct intel_huc *huc) } } +#define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy") +static int check_huc_loading_mode(struct intel_huc *huc) +{ + struct intel_gt *gt = huc_to_gt(huc); + bool fw_needs_gsc = intel_huc_is_loaded_by_gsc(huc); + bool hw_uses_gsc = false; + + /* + * The fuse for HuC load via GSC is only valid on platforms that have + * GuC deprivilege. + */ + if (HAS_GUC_DEPRIVILEGE(gt->i915)) + hw_uses_gsc = intel_uncore_read(gt->uncore, GUC_SHIM_CONTROL2) & + GSC_LOADS_HUC; + + if (fw_needs_gsc != hw_uses_gsc) { + drm_err(>->i915->drm, + "mismatch between HuC FW (%s) and HW (%s) load modes\n", + HUC_LOAD_MODE_STRING(fw_needs_gsc), + HUC_LOAD_MODE_STRING(hw_uses_gsc)); + return -ENOEXEC; + } + + /* make sure we can access the GSC via the mei driver if we need it */ + if (!(IS_ENABLED(CONFIG_INTEL_MEI_PXP) && IS_ENABLED(CONFIG_INTEL_MEI_GSC)) && + fw_needs_gsc) { + drm_info(>->i915->drm, + "Can't load HuC due to missing MEI modules\n"); + return -EIO; + } + + drm_dbg(>->i915->drm, "GSC loads huc=%s\n", str_yes_no(fw_needs_gsc)); + + return 0; +} + int intel_huc_init(struct intel_huc *huc) { struct drm_i915_private *i915 = huc_to_gt(huc)->i915; int err; + err = check_huc_loading_mode(huc); + if (err) + goto out; + err = intel_uc_fw_init(&huc->fw); if (err) goto out; @@ -96,17 +141,20 @@ int intel_huc_auth(struct intel_huc *huc) struct intel_guc *guc = >->uc.guc; int ret; - GEM_BUG_ON(intel_uc_fw_is_running(&huc->fw)); - if (!intel_uc_fw_is_loaded(&huc->fw)) return -ENOEXEC; + /* GSC will do the auth */ + if (intel_huc_is_loaded_by_gsc(huc)) + return -ENODEV; + ret = i915_inject_probe_error(gt->i915, -ENXIO); if (ret) goto fail; - ret = intel_guc_auth_huc(guc, - intel_guc_ggtt_offset(guc, huc->fw.rsa_data)); + GEM_BUG_ON(intel_uc_fw_is_running(&huc->fw)); + + ret = intel_guc_auth_huc(guc, intel_guc_ggtt_offset(guc, huc->fw.rsa_data)); if (ret) { DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret); goto fail; @@ -133,6 +181,18 @@ fail: return ret; } +static bool huc_is_authenticated(struct intel_huc *huc) +{ + struct intel_gt *gt = huc_to_gt(huc); + intel_wakeref_t wakeref; + u32 status = 0; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + status = intel_uncore_read(gt->uncore, huc->status.reg); + + return (status & huc->status.mask) == huc->status.value; +} + /** * intel_huc_check_status() - check HuC status * @huc: intel_huc structure @@ -150,10 +210,6 @@ fail: */ int intel_huc_check_status(struct intel_huc *huc) { - struct intel_gt *gt = huc_to_gt(huc); - intel_wakeref_t wakeref; - u32 status = 0; - switch (__intel_uc_fw_status(&huc->fw)) { case INTEL_UC_FIRMWARE_NOT_SUPPORTED: return -ENODEV; @@ -167,10 +223,17 @@ int intel_huc_check_status(struct intel_huc *huc) break; } - with_intel_runtime_pm(gt->uncore->rpm, wakeref) - status = intel_uncore_read(gt->uncore, huc->status.reg); + return huc_is_authenticated(huc); +} - return (status & huc->status.mask) == huc->status.value; +void intel_huc_update_auth_status(struct intel_huc *huc) +{ + if (!intel_uc_fw_is_loadable(&huc->fw)) + return; + + if (huc_is_authenticated(huc)) + intel_uc_fw_change_status(&huc->fw, + INTEL_UC_FIRMWARE_RUNNING); } /** diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h index 77d813840d76..d7e25b6e879e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -27,6 +27,7 @@ int intel_huc_init(struct intel_huc *huc); void intel_huc_fini(struct intel_huc *huc); int intel_huc_auth(struct intel_huc *huc); int intel_huc_check_status(struct intel_huc *huc); +void intel_huc_update_auth_status(struct intel_huc *huc); static inline int intel_huc_sanitize(struct intel_huc *huc) { @@ -50,6 +51,11 @@ static inline bool intel_huc_is_used(struct intel_huc *huc) return intel_uc_fw_is_available(&huc->fw); } +static inline bool intel_huc_is_loaded_by_gsc(const struct intel_huc *huc) +{ + return huc->fw.loaded_via_gsc; +} + void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c index e5ef509c70e8..9d6ab1e01639 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c @@ -8,7 +8,7 @@ #include "i915_drv.h" /** - * intel_huc_fw_upload() - load HuC uCode to device + * intel_huc_fw_upload() - load HuC uCode to device via DMA transfer * @huc: intel_huc structure * * Called from intel_uc_init_hw() during driver load, resume from sleep and @@ -21,6 +21,9 @@ */ int intel_huc_fw_upload(struct intel_huc *huc) { + if (intel_huc_is_loaded_by_gsc(huc)) + return -ENODEV; + /* HW doesn't look at destination address for HuC, so set it to 0 */ return intel_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 8c9ef690ac9d..0dce94f896a8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -509,7 +509,16 @@ static int __uc_init_hw(struct intel_uc *uc) if (ret) goto err_log_capture; - intel_huc_auth(huc); + /* + * GSC-loaded HuC is authenticated by the GSC, so we don't need to + * trigger the auth here. However, given that the HuC loaded this way + * survive GT reset, we still need to update our SW bookkeeping to make + * sure it reflects the correct HW status. + */ + if (intel_huc_is_loaded_by_gsc(huc)) + intel_huc_update_auth_status(huc); + else + intel_huc_auth(huc); if (intel_uc_uses_guc_submission(uc)) intel_guc_submission_enable(guc); -- cgit v1.2.3 From 56ca3117f77a23a8b24e73e458bc85c11e5dea31 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 4 May 2022 13:48:16 -0700 Subject: drm/i915/huc: Don't fail the probe if HuC init fails The previous patch introduced new failure cases in the HuC init flow that can be hit by simply changing the config, so we want to avoid failing the probe in those scenarios. HuC load failure is already considered a non-fatal error and we have a way to report to userspace if the HuC is not available via a dedicated getparam, so no changes in expectation there. The error message in the HuC init code has also been lowered to info to avoid throwing error message for an expected behavior. Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20220504204816.2082588-5-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_huc.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 11 ++--------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index c36e2bf9b0f2..3bb8838e325a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -113,7 +113,7 @@ int intel_huc_init(struct intel_huc *huc) return 0; out: - i915_probe_error(i915, "failed with %d\n", err); + drm_info(&i915->drm, "HuC init failed with %d\n", err); return err; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 0dce94f896a8..ecf149c5fdb0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -323,17 +323,10 @@ static int __uc_init(struct intel_uc *uc) if (ret) return ret; - if (intel_uc_uses_huc(uc)) { - ret = intel_huc_init(huc); - if (ret) - goto out_guc; - } + if (intel_uc_uses_huc(uc)) + intel_huc_init(huc); return 0; - -out_guc: - intel_guc_fini(guc); - return ret; } static void __uc_fini(struct intel_uc *uc) -- cgit v1.2.3 From e6c2db2be986158afb9991d9fa8a38fe65a88516 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 5 May 2022 12:00:06 +0100 Subject: drm/i915: Don't use DRM_DEBUG_WARN_ON for unexpected l3bank/mslice config DRM_DEBUG_WARN_ON should only be used when we are certain CI is guaranteed to exercise a certain code path, so in case of values coming from MMIO reads we cannot be sure CI will have all the possible SKUs and parts. Use drm_warn instead and move logging to init phase while at it. v2: * GEM_WARN_ON in intel_gt_get_valid_steering. Signed-off-by: Tvrtko Ursulin Cc: Matt Roper Cc: Jani Nikula Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20220505110007.943449-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 53307ca0eed0..034182f85501 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -153,11 +153,14 @@ int intel_gt_init_mmio(struct intel_gt *gt) * An mslice is unavailable only if both the meml3 for the slice is * disabled *and* all of the DSS in the slice (quadrant) are disabled. */ - if (HAS_MSLICES(i915)) + if (HAS_MSLICES(i915)) { gt->info.mslice_mask = slicemask(gt, GEN_DSS_PER_MSLICE) | (intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) & GEN12_MEML3_EN_MASK); + if (!gt->info.mslice_mask) /* should be impossible! */ + drm_warn(&i915->drm, "mslice mask all zero!\n"); + } if (IS_DG2(i915)) { gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table; @@ -171,6 +174,8 @@ int intel_gt_init_mmio(struct intel_gt *gt) gt->info.l3bank_mask = ~intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) & GEN10_L3BANK_MASK; + if (!gt->info.l3bank_mask) /* should be impossible! */ + drm_warn(&i915->drm, "L3 bank mask is all zero!\n"); } else if (HAS_MSLICES(i915)) { MISSING_CASE(INTEL_INFO(i915)->platform); } @@ -882,24 +887,20 @@ static void intel_gt_get_valid_steering(struct intel_gt *gt, { switch (type) { case L3BANK: - GEM_DEBUG_WARN_ON(!gt->info.l3bank_mask); /* should be impossible! */ - *sliceid = 0; /* unused */ *subsliceid = __ffs(gt->info.l3bank_mask); break; case MSLICE: - GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */ - + GEM_WARN_ON(!HAS_MSLICES(gt->i915)); *sliceid = __ffs(gt->info.mslice_mask); *subsliceid = 0; /* unused */ break; case LNCF: - GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */ - /* * An LNCF is always present if its mslice is present, so we * can safely just steer to LNCF 0 in all cases. */ + GEM_WARN_ON(!HAS_MSLICES(gt->i915)); *sliceid = __ffs(gt->info.mslice_mask) << 1; *subsliceid = 0; /* unused */ break; -- cgit v1.2.3 From 91875c22a31be0bdf91d7ec651bb6b083b35ac37 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 5 May 2022 12:00:07 +0100 Subject: drm/i915: Don't use DRM_DEBUG_WARN_ON for ring unexpectedly not idle DRM_DEBUG_WARN_ON should only be used when we are certain CI is guaranteed to exercise a certain code path, so in case of values coming from MMIO reads we cannot be sure CI will have all the possible SKUs and parts, or that it will catch all possible error conditions. Use drm_warn instead. Signed-off-by: Tvrtko Ursulin Cc: Mika Kuoppala Cc: Jani Nikula Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20220505110007.943449-2-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 5423bfd301ad..f8f279a195c0 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -117,7 +117,9 @@ static void flush_cs_tlb(struct intel_engine_cs *engine) return; /* ring should be idle before issuing a sync flush*/ - GEM_DEBUG_WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); + if ((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0) + drm_warn(&engine->i915->drm, "%s not idle before sync flush!\n", + engine->name); ENGINE_WRITE_FW(engine, RING_INSTPM, _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | -- cgit v1.2.3 From 09708b6d82ef473de91c49d90f35e38b0db463f5 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 6 May 2022 11:26:52 +0800 Subject: drm/i915/gt: Fix build error without CONFIG_PM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c: In function ‘act_freq_mhz_show’: drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c:276:20: error: implicit declaration of function ‘sysfs_gt_attribute_r_max_func’ [-Werror=implicit-function-declaration] 276 | u32 actual_freq = sysfs_gt_attribute_r_max_func(dev, attr, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Move sysfs_gt_attribute_* macros out of #ifdef block to fix this. Fixes: 56a709cf7746 ("drm/i915/gt: Create per-tile RPS sysfs interfaces") Signed-off-by: YueHaibing Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20220506032652.1856-1-yuehaibing@huawei.com --- drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index 26cbfa6477d1..e92990d514b2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -17,7 +17,6 @@ #include "intel_rc6.h" #include "intel_rps.h" -#ifdef CONFIG_PM enum intel_gt_sysfs_op { INTEL_GT_SYSFS_MIN = 0, INTEL_GT_SYSFS_MAX, @@ -92,6 +91,7 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, #define sysfs_gt_attribute_r_max_func(d, a, f) \ sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX) +#ifdef CONFIG_PM static u32 get_residency(struct intel_gt *gt, i915_reg_t reg) { intel_wakeref_t wakeref; -- cgit v1.2.3 From 222ff6db8a0dcb86f2bb65fc8656aec635a737a6 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 5 May 2022 12:35:18 -0700 Subject: drm/i915: Drop has_gt_uc from device info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to have this parameter in intel_device_info struct as all platforms with graphics version 9 or newer has graphics microcontroller. As a side effect of the of removal this flag, it will not be printed in dmesg during driver load anymore and developers will have to rely on to check the macro and compare with platform being used and IP versions of it. Reviewed-by: Matt Roper Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220505193524.276400-1-jose.souza@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/i915_pci.c | 3 --- drivers/gpu/drm/i915/intel_device_info.h | 1 - 4 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3ed9021c615d..d71ed8d272c5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1350,7 +1350,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, */ #define HAS_FLAT_CCS(dev_priv) (INTEL_INFO(dev_priv)->has_flat_ccs) -#define HAS_GT_UC(dev_priv) (INTEL_INFO(dev_priv)->has_gt_uc) +#define HAS_GT_UC(dev_priv) (GRAPHICS_VER(dev_priv) >= 9) #define HAS_POOLED_EU(dev_priv) (INTEL_INFO(dev_priv)->has_pooled_eu) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 0512c66fa4f3..5bd9cb899852 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -2008,7 +2008,7 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 du return ERR_PTR(-ENOMEM); } - if (INTEL_INFO(i915)->has_gt_uc) { + if (HAS_GT_UC(i915)) { error->gt->uc = gt_record_uc(error->gt, compress); if (error->gt->uc) { if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 987bdeb090a5..a5f09a2f7472 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -644,7 +644,6 @@ static const struct intel_device_info chv_info = { GEN(9), \ GEN9_DEFAULT_PAGE_SIZES, \ .display.has_dmc = 1, \ - .has_gt_uc = 1, \ .display.has_hdcp = 1, \ .display.has_ipc = 1, \ .display.has_psr = 1, \ @@ -705,7 +704,6 @@ static const struct intel_device_info skl_gt4_info = { .has_rps = true, \ .display.has_dp_mst = 1, \ .has_logical_ring_contexts = 1, \ - .has_gt_uc = 1, \ .dma_mask_size = 39, \ .ppgtt_type = INTEL_PPGTT_FULL, \ .ppgtt_size = 48, \ @@ -1008,7 +1006,6 @@ static const struct intel_device_info adl_p_info = { .has_64bit_reloc = 1, \ .has_flat_ccs = 1, \ .has_global_mocs = 1, \ - .has_gt_uc = 1, \ .has_llc = 1, \ .has_logical_ring_contexts = 1, \ .has_logical_ring_elsq = 1, \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index ec0b8095e7fa..93396c49488c 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -141,7 +141,6 @@ enum intel_ppgtt_type { func(has_4tile); \ func(has_flat_ccs); \ func(has_global_mocs); \ - func(has_gt_uc); \ func(has_heci_pxp); \ func(has_heci_gscfi); \ func(has_guc_deprivilege); \ -- cgit v1.2.3 From 218076abbcd647de46635d21331a34b814f90906 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 5 May 2022 12:35:19 -0700 Subject: drm/i915: Drop has_rc6 from device info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to have this parameter in intel_device_info struct as all platforms with graphics version 6 or newer have software support for this feature. As a side effect of the of removal this flag, it will not be printed in dmesg during driver load anymore and developers will have to rely on to check the macro and compare with platform being used and IP versions of it. Reviewed-by: Matt Roper Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220505193524.276400-2-jose.souza@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 ++- drivers/gpu/drm/i915/i915_pci.c | 8 -------- drivers/gpu/drm/i915/intel_device_info.h | 1 - 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d71ed8d272c5..8f48c3d90645 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1302,7 +1302,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_PSR2_SEL_FETCH(dev_priv) (DISPLAY_VER(dev_priv) >= 12) #define HAS_TRANSCODER(dev_priv, trans) ((INTEL_INFO(dev_priv)->display.cpu_transcoder_mask & BIT(trans)) != 0) -#define HAS_RC6(dev_priv) (INTEL_INFO(dev_priv)->has_rc6) +/* ilk does support rc6, but we do not implement [power] contexts */ +#define HAS_RC6(dev_priv) (GRAPHICS_VER(dev_priv) >= 6) #define HAS_RC6p(dev_priv) (INTEL_INFO(dev_priv)->has_rc6p) #define HAS_RC6pp(dev_priv) (false) /* HW was never validated */ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index a5f09a2f7472..1973d8e047f0 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -376,8 +376,6 @@ static const struct intel_device_info gm45_info = { .platform_engine_mask = BIT(RCS0) | BIT(VCS0), \ .has_snoop = true, \ .has_coherent_ggtt = true, \ - /* ilk does support rc6, but we do not implement [power] contexts */ \ - .has_rc6 = 0, \ .dma_mask_size = 36, \ I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ @@ -407,7 +405,6 @@ static const struct intel_device_info ilk_m_info = { .platform_engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \ .has_coherent_ggtt = true, \ .has_llc = 1, \ - .has_rc6 = 1, \ .has_rc6p = 1, \ .has_rps = true, \ .dma_mask_size = 40, \ @@ -458,7 +455,6 @@ static const struct intel_device_info snb_m_gt2_info = { .platform_engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \ .has_coherent_ggtt = true, \ .has_llc = 1, \ - .has_rc6 = 1, \ .has_rc6p = 1, \ .has_reset_engine = true, \ .has_rps = true, \ @@ -518,7 +514,6 @@ static const struct intel_device_info vlv_info = { .display.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), .has_runtime_pm = 1, - .has_rc6 = 1, .has_reset_engine = true, .has_rps = true, .display.has_gmch = 1, @@ -617,7 +612,6 @@ static const struct intel_device_info chv_info = { .platform_engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), .has_64bit_reloc = 1, .has_runtime_pm = 1, - .has_rc6 = 1, .has_rps = true, .has_logical_ring_contexts = 1, .display.has_gmch = 1, @@ -700,7 +694,6 @@ static const struct intel_device_info skl_gt4_info = { .display.has_psr_hw_tracking = 1, \ .has_runtime_pm = 1, \ .display.has_dmc = 1, \ - .has_rc6 = 1, \ .has_rps = true, \ .display.has_dp_mst = 1, \ .has_logical_ring_contexts = 1, \ @@ -1010,7 +1003,6 @@ static const struct intel_device_info adl_p_info = { .has_logical_ring_contexts = 1, \ .has_logical_ring_elsq = 1, \ .has_mslices = 1, \ - .has_rc6 = 1, \ .has_reset_engine = 1, \ .has_rps = 1, \ .has_runtime_pm = 1, \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 93396c49488c..68af6f89a366 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -151,7 +151,6 @@ enum intel_ppgtt_type { func(has_mslices); \ func(has_pooled_eu); \ func(has_pxp); \ - func(has_rc6); \ func(has_rc6p); \ func(has_rps); \ func(has_runtime_pm); \ -- cgit v1.2.3 From 922abe4d19bd21b38298f3902674774b92a49293 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 5 May 2022 12:35:20 -0700 Subject: drm/i915: Drop has_reset_engine from device info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to have this parameter in intel_device_info struct as all platforms with graphics version 7 or newer can reset engines. As a side effect of the of removal this flag, it will not be printed in dmesg during driver load anymore and developers will have to rely on to check the macro and compare with platform being used and IP versions of it. Reviewed-by: Matt Roper Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220505193524.276400-3-jose.souza@intel.com --- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/i915_pci.c | 5 ----- drivers/gpu/drm/i915/intel_device_info.h | 1 - 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 5422a3b84bd4..894f17f8b4ce 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -699,7 +699,7 @@ bool intel_has_reset_engine(const struct intel_gt *gt) if (gt->i915->params.reset < 2) return false; - return INTEL_INFO(gt->i915)->has_reset_engine; + return GRAPHICS_VER(gt->i915) >= 7; } int intel_reset_guc(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 1973d8e047f0..b86e67c69458 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -456,7 +456,6 @@ static const struct intel_device_info snb_m_gt2_info = { .has_coherent_ggtt = true, \ .has_llc = 1, \ .has_rc6p = 1, \ - .has_reset_engine = true, \ .has_rps = true, \ .dma_mask_size = 40, \ .ppgtt_type = INTEL_PPGTT_ALIASING, \ @@ -514,7 +513,6 @@ static const struct intel_device_info vlv_info = { .display.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), .has_runtime_pm = 1, - .has_reset_engine = true, .has_rps = true, .display.has_gmch = 1, .display.has_hotplug = 1, @@ -618,7 +616,6 @@ static const struct intel_device_info chv_info = { .dma_mask_size = 39, .ppgtt_type = INTEL_PPGTT_FULL, .ppgtt_size = 32, - .has_reset_engine = 1, .has_snoop = true, .has_coherent_ggtt = false, .display_mmio_offset = VLV_DISPLAY_BASE, @@ -700,7 +697,6 @@ static const struct intel_device_info skl_gt4_info = { .dma_mask_size = 39, \ .ppgtt_type = INTEL_PPGTT_FULL, \ .ppgtt_size = 48, \ - .has_reset_engine = 1, \ .has_snoop = true, \ .has_coherent_ggtt = false, \ .display.has_ipc = 1, \ @@ -1003,7 +999,6 @@ static const struct intel_device_info adl_p_info = { .has_logical_ring_contexts = 1, \ .has_logical_ring_elsq = 1, \ .has_mslices = 1, \ - .has_reset_engine = 1, \ .has_rps = 1, \ .has_runtime_pm = 1, \ .ppgtt_size = 48, \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 68af6f89a366..53777e14fa85 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -137,7 +137,6 @@ enum intel_ppgtt_type { func(has_64k_pages); \ func(needs_compact_pt); \ func(gpu_reset_clobbers_display); \ - func(has_reset_engine); \ func(has_4tile); \ func(has_flat_ccs); \ func(has_global_mocs); \ -- cgit v1.2.3 From b6411373d3954c8fe4617c27f90f773108b0ab03 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 5 May 2022 12:35:21 -0700 Subject: drm/i915: Drop has_logical_ring_elsq from device info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to have this parameter in intel_device_info struct as all platforms with graphics version 11 or newer has this feature. As a side effect of the of removal this flag, it will not be printed in dmesg during driver load anymore and developers will have to rely on to check the macro and compare with platform being used and IP versions of it. Reviewed-by: Matt Roper Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220505193524.276400-4-jose.souza@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 +-- drivers/gpu/drm/i915/i915_pci.c | 4 +--- drivers/gpu/drm/i915/intel_device_info.h | 1 - 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8f48c3d90645..bc6d8ccd662e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1241,8 +1241,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \ (INTEL_INFO(dev_priv)->has_logical_ring_contexts) -#define HAS_LOGICAL_RING_ELSQ(dev_priv) \ - (INTEL_INFO(dev_priv)->has_logical_ring_elsq) +#define HAS_LOGICAL_RING_ELSQ(dev_priv) (GRAPHICS_VER(dev_priv) >= 11) #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index b86e67c69458..f60f9babdf2a 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -809,8 +809,7 @@ static const struct intel_device_info cml_gt2_info = { .dbuf.size = 2048, \ .dbuf.slice_mask = BIT(DBUF_S1) | BIT(DBUF_S2), \ .display.has_dsc = 1, \ - .has_coherent_ggtt = false, \ - .has_logical_ring_elsq = 1 + .has_coherent_ggtt = false static const struct intel_device_info icl_info = { GEN11_FEATURES, @@ -997,7 +996,6 @@ static const struct intel_device_info adl_p_info = { .has_global_mocs = 1, \ .has_llc = 1, \ .has_logical_ring_contexts = 1, \ - .has_logical_ring_elsq = 1, \ .has_mslices = 1, \ .has_rps = 1, \ .has_runtime_pm = 1, \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 53777e14fa85..1308752219db 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -146,7 +146,6 @@ enum intel_ppgtt_type { func(has_l3_dpf); \ func(has_llc); \ func(has_logical_ring_contexts); \ - func(has_logical_ring_elsq); \ func(has_mslices); \ func(has_pooled_eu); \ func(has_pxp); \ -- cgit v1.2.3 From efd01cd3c27636bc4840057a03839e54abaf11dc Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 5 May 2022 12:35:22 -0700 Subject: drm/i915: Drop has_ddi from device info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to have this parameter in intel_device_info struct as all platforms with display version 9 or newer, haswell or broadwell supports it. As a side effect of the of removal this flag, it will not be printed in dmesg during driver load anymore and developers will have to rely on to check the macro and compare with platform being used and IP versions of it. Reviewed-by: Matt Roper Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220505193524.276400-5-jose.souza@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 4 +++- drivers/gpu/drm/i915/i915_pci.c | 3 --- drivers/gpu/drm/i915/intel_device_info.h | 1 - 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bc6d8ccd662e..eddfbf5d3dee 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1293,7 +1293,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_DP20(dev_priv) (IS_DG2(dev_priv)) #define HAS_CDCLK_CRAWL(dev_priv) (INTEL_INFO(dev_priv)->display.has_cdclk_crawl) -#define HAS_DDI(dev_priv) (INTEL_INFO(dev_priv)->display.has_ddi) +#define HAS_DDI(dev_priv) (DISPLAY_VER(dev_priv) >= 9 || \ + IS_BROADWELL(dev_priv) || \ + IS_HASWELL(dev_priv)) #define HAS_FPGA_DBG_UNCLAIMED(dev_priv) (INTEL_INFO(dev_priv)->display.has_fpga_dbg) #define HAS_PSR(dev_priv) (INTEL_INFO(dev_priv)->display.has_psr) #define HAS_PSR_HW_TRACKING(dev_priv) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index f60f9babdf2a..44a4723ff7f7 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -535,7 +535,6 @@ static const struct intel_device_info vlv_info = { .platform_engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \ .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ BIT(TRANSCODER_C) | BIT(TRANSCODER_EDP), \ - .display.has_ddi = 1, \ .display.has_fpga_dbg = 1, \ .display.has_dp_mst = 1, \ .has_rc6p = 0 /* RC6p removed-by HSW */, \ @@ -683,7 +682,6 @@ static const struct intel_device_info skl_gt4_info = { BIT(TRANSCODER_C) | BIT(TRANSCODER_EDP) | \ BIT(TRANSCODER_DSI_A) | BIT(TRANSCODER_DSI_C), \ .has_64bit_reloc = 1, \ - .display.has_ddi = 1, \ .display.has_fpga_dbg = 1, \ .display.fbc_mask = BIT(INTEL_FBC_A), \ .display.has_hdcp = 1, \ @@ -932,7 +930,6 @@ static const struct intel_device_info adl_s_info = { .dbuf.size = 4096, \ .dbuf.slice_mask = BIT(DBUF_S1) | BIT(DBUF_S2) | BIT(DBUF_S3) | \ BIT(DBUF_S4), \ - .display.has_ddi = 1, \ .display.has_dmc = 1, \ .display.has_dp_mst = 1, \ .display.has_dsb = 1, \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 1308752219db..cc317a511fb5 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -162,7 +162,6 @@ enum intel_ppgtt_type { func(cursor_needs_physical); \ func(has_cdclk_crawl); \ func(has_dmc); \ - func(has_ddi); \ func(has_dp_mst); \ func(has_dsb); \ func(has_dsc); \ -- cgit v1.2.3 From eb86f645ab9b90c47de7ebe229feae7ac999421b Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 5 May 2022 12:35:23 -0700 Subject: drm/i915: Drop has_dp_mst from device info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to have this parameter in intel_device_info struct as the requirement to support it is the DDI support. As a side effect of the of removal this flag, it will not be printed in dmesg during driver load anymore and developers will have to rely on to check the macro and compare with platform being used and IP versions of it. Reviewed-by: Matt Roper Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220505193524.276400-6-jose.souza@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_pci.c | 3 --- drivers/gpu/drm/i915/intel_device_info.h | 1 - 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eddfbf5d3dee..cbe0b19af0a8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1289,13 +1289,13 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_IPS(dev_priv) (IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv)) -#define HAS_DP_MST(dev_priv) (INTEL_INFO(dev_priv)->display.has_dp_mst) #define HAS_DP20(dev_priv) (IS_DG2(dev_priv)) #define HAS_CDCLK_CRAWL(dev_priv) (INTEL_INFO(dev_priv)->display.has_cdclk_crawl) #define HAS_DDI(dev_priv) (DISPLAY_VER(dev_priv) >= 9 || \ IS_BROADWELL(dev_priv) || \ IS_HASWELL(dev_priv)) +#define HAS_DP_MST(dev_priv) (HAS_DDI(dev_priv)) #define HAS_FPGA_DBG_UNCLAIMED(dev_priv) (INTEL_INFO(dev_priv)->display.has_fpga_dbg) #define HAS_PSR(dev_priv) (INTEL_INFO(dev_priv)->display.has_psr) #define HAS_PSR_HW_TRACKING(dev_priv) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 44a4723ff7f7..ca6461ecb7de 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -536,7 +536,6 @@ static const struct intel_device_info vlv_info = { .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ BIT(TRANSCODER_C) | BIT(TRANSCODER_EDP), \ .display.has_fpga_dbg = 1, \ - .display.has_dp_mst = 1, \ .has_rc6p = 0 /* RC6p removed-by HSW */, \ HSW_PIPE_OFFSETS, \ .has_runtime_pm = 1 @@ -690,7 +689,6 @@ static const struct intel_device_info skl_gt4_info = { .has_runtime_pm = 1, \ .display.has_dmc = 1, \ .has_rps = true, \ - .display.has_dp_mst = 1, \ .has_logical_ring_contexts = 1, \ .dma_mask_size = 39, \ .ppgtt_type = INTEL_PPGTT_FULL, \ @@ -931,7 +929,6 @@ static const struct intel_device_info adl_s_info = { .dbuf.slice_mask = BIT(DBUF_S1) | BIT(DBUF_S2) | BIT(DBUF_S3) | \ BIT(DBUF_S4), \ .display.has_dmc = 1, \ - .display.has_dp_mst = 1, \ .display.has_dsb = 1, \ .display.has_dsc = 1, \ .display.fbc_mask = BIT(INTEL_FBC_A), \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index cc317a511fb5..ffea8bff4b76 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -162,7 +162,6 @@ enum intel_ppgtt_type { func(cursor_needs_physical); \ func(has_cdclk_crawl); \ func(has_dmc); \ - func(has_dp_mst); \ func(has_dsb); \ func(has_dsc); \ func(has_fpga_dbg); \ -- cgit v1.2.3 From b15a7357a84f091fde8ce35bf2fd494150ad4bd0 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 5 May 2022 12:35:24 -0700 Subject: drm/i915: Drop has_psr from device info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to have this parameter in intel_device_info struct as all platforms with display version 9 or newer has this feature. As a side effect of the of removal this flag, it will not be printed in dmesg during driver load anymore and developers will have to rely on to check the macro and compare with platform being used and IP versions of it. Reviewed-by: Matt Roper Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220505193524.276400-7-jose.souza@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_pci.c | 3 --- drivers/gpu/drm/i915/intel_device_info.h | 1 - 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cbe0b19af0a8..9d5fab8d2051 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1297,7 +1297,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, IS_HASWELL(dev_priv)) #define HAS_DP_MST(dev_priv) (HAS_DDI(dev_priv)) #define HAS_FPGA_DBG_UNCLAIMED(dev_priv) (INTEL_INFO(dev_priv)->display.has_fpga_dbg) -#define HAS_PSR(dev_priv) (INTEL_INFO(dev_priv)->display.has_psr) +#define HAS_PSR(dev_priv) (DISPLAY_VER(dev_priv) >= 9) #define HAS_PSR_HW_TRACKING(dev_priv) \ (INTEL_INFO(dev_priv)->display.has_psr_hw_tracking) #define HAS_PSR2_SEL_FETCH(dev_priv) (DISPLAY_VER(dev_priv) >= 12) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index ca6461ecb7de..1f320245c344 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -635,7 +635,6 @@ static const struct intel_device_info chv_info = { .display.has_dmc = 1, \ .display.has_hdcp = 1, \ .display.has_ipc = 1, \ - .display.has_psr = 1, \ .display.has_psr_hw_tracking = 1, \ .dbuf.size = 896 - 4, /* 4 blocks for bypass path allocation */ \ .dbuf.slice_mask = BIT(DBUF_S1) @@ -684,7 +683,6 @@ static const struct intel_device_info skl_gt4_info = { .display.has_fpga_dbg = 1, \ .display.fbc_mask = BIT(INTEL_FBC_A), \ .display.has_hdcp = 1, \ - .display.has_psr = 1, \ .display.has_psr_hw_tracking = 1, \ .has_runtime_pm = 1, \ .display.has_dmc = 1, \ @@ -936,7 +934,6 @@ static const struct intel_device_info adl_s_info = { .display.has_hdcp = 1, \ .display.has_hotplug = 1, \ .display.has_ipc = 1, \ - .display.has_psr = 1, \ .display.ver = 13, \ .display.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), \ .pipe_offsets = { \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index ffea8bff4b76..8702d1ae8fc0 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -172,7 +172,6 @@ enum intel_ppgtt_type { func(has_ipc); \ func(has_modular_fia); \ func(has_overlay); \ - func(has_psr); \ func(has_psr_hw_tracking); \ func(overlay_needs_physical); \ func(supports_tv); -- cgit v1.2.3 From 23dd74db02d75579d8d4eb0b88c7ad119e782269 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 29 Apr 2022 11:04:13 +0100 Subject: drm/i915: Enable THP on Icelake and beyond We have a statement from HW designers that the GPU read regression when using 2M pages was fixed from Icelake onwards, which was also confirmed by bencharking Eero did last year: """ When IOMMU is disabled, enabling THP causes following perf changes on TGL-H (GT1): 10-15% SynMark Batch[0-3] 5-10% MemBW GPU texture, SynMark ShMapVsm 3-5% SynMark TerrainFly* + Geom* + Fill* + CSCloth + Batch4 1-3% GpuTest Triangle, SynMark TexMem* + DeferredAA + Batch[5-7] + few others -7% MemBW GPU blend In the above 3D benchmark names, * means all the variants of tests with the same prefix. For example "SynMark TexMem*", means both TexMem128 & TexMem512 tests in the synthetic (Intel internal) SynMark test suite. In the (public, but proprietary) GfxBench & GLB(enchmark) test suites, there are both onscreen and offscreen variants of each test. Unless explicitly stated otherwise, numbers are for both variants. All tests are run with FullHD monitor. All tests are fullscreen except for GLB and GpuTest ones, which are run in 1/2 screen window (GpuTest triangle is run both in fullscreen and 1/2 screen window). """ Since the only regression is MemBW GPU blend, against many more gains, it sounds it is time to enable THP on Gen11+. Signed-off-by: Tvrtko Ursulin References: https://gitlab.freedesktop.org/drm/intel/-/issues/430 Cc: Joonas Lahtinen Cc: Matthew Auld Cc: Eero Tamminen Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220429100414.647857-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gemfs.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c index ee87874e59dc..c5a6bbc842fc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.c +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c @@ -28,12 +28,14 @@ int i915_gemfs_init(struct drm_i915_private *i915) * * One example, although it is probably better with a per-file * control, is selecting huge page allocations ("huge=within_size"). - * However, we only do so to offset the overhead of iommu lookups - * due to bandwidth issues (slow reads) on Broadwell+. + * However, we only do so on platforms which benefit from it, or to + * offset the overhead of iommu lookups, where with latter it is a net + * win even on platforms which would otherwise see some performance + * regressions such a slow reads issue on Broadwell and Skylake. */ opts = NULL; - if (i915_vtd_active(i915)) { + if (GRAPHICS_VER(i915) >= 11 || i915_vtd_active(i915)) { if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { opts = huge_opt; drm_info(&i915->drm, @@ -41,7 +43,10 @@ int i915_gemfs_init(struct drm_i915_private *i915) opts); } else { drm_notice(&i915->drm, - "Transparent Hugepage support is recommended for optimal performance when IOMMU is enabled!\n"); + "Transparent Hugepage support is recommended for optimal performance%s\n", + GRAPHICS_VER(i915) >= 11 ? + " on this platform!" : + " when IOMMU is enabled!"); } } -- cgit v1.2.3 From b499914eb83765a27e3b43f216e9d1bdf4265418 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 29 Apr 2022 11:04:14 +0100 Subject: drm/i915: Only setup private tmpfs mount when needed and fix logging If i915 does not want to use huge pages there is a) no point in setting up the private mount and b) should former fail, it is misleading to log THP support is disabled in the caller, which does not even know if callee tried to enable it. Fix both by restructuring the flow in i915_gemfs_init and at the same time note the failure to set it up in all cases. Signed-off-by: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Matthew Auld Cc: Eero Tamminen Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220429100414.647857-2-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 11 ++------ drivers/gpu/drm/i915/gem/i915_gemfs.c | 45 ++++++++++++++----------------- drivers/gpu/drm/i915/gem/i915_gemfs.h | 3 +-- 3 files changed, 23 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index c2a3e388fcb4..955844f19193 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -671,17 +671,10 @@ fail: static int init_shmem(struct intel_memory_region *mem) { - int err; - - err = i915_gemfs_init(mem->i915); - if (err) { - DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", - err); - } - + i915_gemfs_init(mem->i915); intel_memory_region_set_name(mem, "system"); - return 0; /* Don't error, we can simply fallback to the kernel mnt */ + return 0; /* We have fallback to the kernel mnt if gemfs init failed. */ } static int release_shmem(struct intel_memory_region *mem) diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c index c5a6bbc842fc..46b9a17d6abc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.c +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c @@ -11,16 +11,11 @@ #include "i915_gemfs.h" #include "i915_utils.h" -int i915_gemfs_init(struct drm_i915_private *i915) +void i915_gemfs_init(struct drm_i915_private *i915) { char huge_opt[] = "huge=within_size"; /* r/w */ struct file_system_type *type; struct vfsmount *gemfs; - char *opts; - - type = get_fs_type("tmpfs"); - if (!type) - return -ENODEV; /* * By creating our own shmemfs mountpoint, we can pass in @@ -34,29 +29,29 @@ int i915_gemfs_init(struct drm_i915_private *i915) * regressions such a slow reads issue on Broadwell and Skylake. */ - opts = NULL; - if (GRAPHICS_VER(i915) >= 11 || i915_vtd_active(i915)) { - if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { - opts = huge_opt; - drm_info(&i915->drm, - "Transparent Hugepage mode '%s'\n", - opts); - } else { - drm_notice(&i915->drm, - "Transparent Hugepage support is recommended for optimal performance%s\n", - GRAPHICS_VER(i915) >= 11 ? - " on this platform!" : - " when IOMMU is enabled!"); - } - } + if (GRAPHICS_VER(i915) < 11 && !i915_vtd_active(i915)) + return; + + if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) + goto err; - gemfs = vfs_kern_mount(type, SB_KERNMOUNT, type->name, opts); + type = get_fs_type("tmpfs"); + if (!type) + goto err; + + gemfs = vfs_kern_mount(type, SB_KERNMOUNT, type->name, huge_opt); if (IS_ERR(gemfs)) - return PTR_ERR(gemfs); + goto err; i915->mm.gemfs = gemfs; - - return 0; + drm_info(&i915->drm, "Using Transparent Hugepages\n"); + return; + +err: + drm_notice(&i915->drm, + "Transparent Hugepage support is recommended for optimal performance%s\n", + GRAPHICS_VER(i915) >= 11 ? " on this platform!" : + " when IOMMU is enabled!"); } void i915_gemfs_fini(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.h b/drivers/gpu/drm/i915/gem/i915_gemfs.h index 2a1e59af3e4a..5d835e44c4f6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.h +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.h @@ -9,8 +9,7 @@ struct drm_i915_private; -int i915_gemfs_init(struct drm_i915_private *i915); - +void i915_gemfs_init(struct drm_i915_private *i915); void i915_gemfs_fini(struct drm_i915_private *i915); #endif -- cgit v1.2.3 From 429e1fc1b2c257f35b6a1318eb3a1ffb80bc6640 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 2 May 2022 23:15:56 -0700 Subject: drm/i915/gem: Make drop_pages() return bool Commit e4e806253003 ("drm/i915: Change shrink ordering to use locking around unbinding.") changed the return type to int without changing the return values or their meaning to "0 is success". Move it back to boolean. Signed-off-by: Lucas De Marchi Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20220503061556.513175-1-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index 6a6ff98a8746..1030053571a2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -36,7 +36,7 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) return swap_available() || obj->mm.madv == I915_MADV_DONTNEED; } -static int drop_pages(struct drm_i915_gem_object *obj, +static bool drop_pages(struct drm_i915_gem_object *obj, unsigned long shrink, bool trylock_vm) { unsigned long flags; -- cgit v1.2.3 From 9d67edba730c4663eb7d87771123c3fb86ba606d Mon Sep 17 00:00:00 2001 From: Ayaz A Siddiqui Date: Thu, 5 May 2022 14:38:03 -0700 Subject: drm/i915/pvc: Define MOCS table for PVC v2 (MattR): - Clarify comment above RING_CMD_CCTL programming. - Remove bspec reference from field definition. (Lucas) - Add WARN if we try to use a (presumably uninitialized) wb_index of 0. On most platforms 0 is an invalid MOCS entry and even on the ones where it isn't, it isn't the right setting for wb_index. (Lucas) Bspec: 45101, 72161 Cc: Lucas De Marchi Signed-off-by: Ayaz A Siddiqui Signed-off-by: Fei Yang Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220505213812.3979301-4-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_types.h | 1 + drivers/gpu/drm/i915/gt/intel_mocs.c | 24 ++++++++++++++++++++++- drivers/gpu/drm/i915/gt/intel_workarounds.c | 30 +++++++++++++++++++++++------ drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_pci.c | 3 ++- drivers/gpu/drm/i915/intel_device_info.h | 1 + 6 files changed, 53 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index b06611c1d4ad..097e10291f2d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -221,6 +221,7 @@ struct intel_gt { struct { u8 uc_index; + u8 wb_index; /* Only used on HAS_L3_CCS_READ() platforms */ } mocs; struct intel_pxp pxp; diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index c4c37585ae8c..c6ebe2781076 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -23,6 +23,7 @@ struct drm_i915_mocs_table { unsigned int n_entries; const struct drm_i915_mocs_entry *table; u8 uc_index; + u8 wb_index; /* Only used on HAS_L3_CCS_READ() platforms */ u8 unused_entries_index; }; @@ -47,6 +48,7 @@ struct drm_i915_mocs_table { /* Helper defines */ #define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ +#define PVC_NUM_MOCS_ENTRIES 3 /* (e)LLC caching options */ /* @@ -394,6 +396,17 @@ static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = { MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), }; +static const struct drm_i915_mocs_entry pvc_mocs_table[] = { + /* Error */ + MOCS_ENTRY(0, 0, L3_3_WB), + + /* UC */ + MOCS_ENTRY(1, 0, L3_1_UC), + + /* WB */ + MOCS_ENTRY(2, 0, L3_3_WB), +}; + enum { HAS_GLOBAL_MOCS = BIT(0), HAS_ENGINE_MOCS = BIT(1), @@ -423,7 +436,14 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, memset(table, 0, sizeof(struct drm_i915_mocs_table)); table->unused_entries_index = I915_MOCS_PTE; - if (IS_DG2(i915)) { + if (IS_PONTEVECCHIO(i915)) { + table->size = ARRAY_SIZE(pvc_mocs_table); + table->table = pvc_mocs_table; + table->n_entries = PVC_NUM_MOCS_ENTRIES; + table->uc_index = 1; + table->wb_index = 2; + table->unused_entries_index = 2; + } else if (IS_DG2(i915)) { if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax); table->table = dg2_mocs_table_g10_ax; @@ -622,6 +642,8 @@ void intel_set_mocs_index(struct intel_gt *gt) get_mocs_settings(gt->i915, &table); gt->mocs.uc_index = table.uc_index; + if (HAS_L3_CCS_READ(gt->i915)) + gt->mocs.wb_index = table.wb_index; } void intel_mocs_init(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index a05c4b99b3fb..756807c4b405 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1994,19 +1994,37 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine) static void engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - u8 mocs; + u8 mocs_w, mocs_r; /* - * RING_CMD_CCTL are need to be programed to un-cached - * for memory writes and reads outputted by Command - * Streamers on Gen12 onward platforms. + * RING_CMD_CCTL specifies the default MOCS entry that will be used + * by the command streamer when executing commands that don't have + * a way to explicitly specify a MOCS setting. The default should + * usually reference whichever MOCS entry corresponds to uncached + * behavior, although use of a WB cached entry is recommended by the + * spec in certain circumstances on specific platforms. */ if (GRAPHICS_VER(engine->i915) >= 12) { - mocs = engine->gt->mocs.uc_index; + mocs_r = engine->gt->mocs.uc_index; + mocs_w = engine->gt->mocs.uc_index; + + if (HAS_L3_CCS_READ(engine->i915) && + engine->class == COMPUTE_CLASS) { + mocs_r = engine->gt->mocs.wb_index; + + /* + * Even on the few platforms where MOCS 0 is a + * legitimate table entry, it's never the correct + * setting to use here; we can assume the MOCS init + * just forgot to initialize wb_index. + */ + drm_WARN_ON(&engine->i915->drm, mocs_r == 0); + } + wa_masked_field_set(wal, RING_CMD_CCTL(engine->mmio_base), CMD_CCTL_MOCS_MASK, - CMD_CCTL_MOCS_OVERRIDE(mocs, mocs)); + CMD_CCTL_MOCS_OVERRIDE(mocs_w, mocs_r)); } } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9d5fab8d2051..9c0a8c86876a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1366,6 +1366,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_LSPCON(dev_priv) (IS_DISPLAY_VER(dev_priv, 9, 10)) +#define HAS_L3_CCS_READ(i915) (INTEL_INFO(i915)->has_l3_ccs_read) + /* DPF == dynamic parity feature */ #define HAS_L3_DPF(dev_priv) (INTEL_INFO(dev_priv)->has_l3_dpf) #define NUM_L3_SLICES(dev_priv) (IS_HSW_GT3(dev_priv) ? \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 1f320245c344..0e1d0493d3fe 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1051,7 +1051,8 @@ static const struct intel_device_info ats_m_info = { #define XE_HPC_FEATURES \ XE_HP_FEATURES, \ - .dma_mask_size = 52 + .dma_mask_size = 52, \ + .has_l3_ccs_read = 1 __maybe_unused static const struct intel_device_info pvc_info = { diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 8702d1ae8fc0..55f40fa0ea49 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -143,6 +143,7 @@ enum intel_ppgtt_type { func(has_heci_pxp); \ func(has_heci_gscfi); \ func(has_guc_deprivilege); \ + func(has_l3_ccs_read); \ func(has_l3_dpf); \ func(has_llc); \ func(has_logical_ring_contexts); \ -- cgit v1.2.3 From 4de23dca7ec8dfb191ea80fbfe3f008d4ed52346 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 5 May 2022 14:38:04 -0700 Subject: drm/i915/pvc: Read correct RP_STATE_CAP register The SoC registers, including RP_STATE_CAP, have moved to a new location in GTTMMADR on Ponte Vecchio. We need to update the register offset accordingly. Cc: Rodrigo Vivi Signed-off-by: Matt Roper Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20220505213812.3979301-5-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_rps.c | 4 +++- drivers/gpu/drm/i915/i915_reg.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 3476a11f294c..3bd8415a0f1b 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1075,7 +1075,9 @@ static u32 intel_rps_read_state_cap(struct intel_rps *rps) struct drm_i915_private *i915 = rps_to_i915(rps); struct intel_uncore *uncore = rps_to_uncore(rps); - if (IS_XEHPSDV(i915)) + if (IS_PONTEVECCHIO(i915)) + return intel_uncore_read(uncore, PVC_RP_STATE_CAP); + else if (IS_XEHPSDV(i915)) return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP); else if (IS_GEN9_LP(i915)) return intel_uncore_read(uncore, BXT_RP_STATE_CAP); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index efcfe32cd8eb..f1a48d4578aa 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1845,6 +1845,7 @@ #define BXT_RP_STATE_CAP _MMIO(0x138170) #define GEN9_RP_STATE_LIMITS _MMIO(0x138148) #define XEHPSDV_RP_STATE_CAP _MMIO(0x250014) +#define PVC_RP_STATE_CAP _MMIO(0x281014) #define GT0_PERF_LIMIT_REASONS _MMIO(0x1381a8) #define GT0_PERF_LIMIT_REASONS_MASK 0xde3 -- cgit v1.2.3 From 6cd96877c7da6bc3a28ef0bcb3bc7470f4dd9aa6 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 5 May 2022 14:38:06 -0700 Subject: drm/i915/pvc: Reduce stack usage in reset selftest with extra blitter engine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PVC adds extra blitter engines (in the following patch). The reset selftest has a local array on the stack which is sized by the number of engines. The increase pushes the size of this array to the point where it trips the 'stack too large' compile warning. This patch takes the allocation of the stack and makes it dynamic instead. v2 (MattR): - Minor cosmetic changes: re-sort definition and allocate using kmalloc_array(). (Tvrtko) Cc: Tvrtko Ursulin Signed-off-by: John Harrison Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220505213812.3979301-7-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 83ff4c2e57c5..6493265d5f64 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -976,6 +976,7 @@ static int __igt_reset_engines(struct intel_gt *gt, { struct i915_gpu_error *global = >->i915->gpu_error; struct intel_engine_cs *engine, *other; + struct active_engine *threads; enum intel_engine_id id, tmp; struct hang h; int err = 0; @@ -996,8 +997,11 @@ static int __igt_reset_engines(struct intel_gt *gt, h.ctx->sched.priority = 1024; } + threads = kmalloc_array(I915_NUM_ENGINES, sizeof(*threads), GFP_KERNEL); + if (!threads) + return -ENOMEM; + for_each_engine(engine, gt, id) { - struct active_engine threads[I915_NUM_ENGINES] = {}; unsigned long device = i915_reset_count(global); unsigned long count = 0, reported; bool using_guc = intel_engine_uses_guc(engine); @@ -1016,7 +1020,7 @@ static int __igt_reset_engines(struct intel_gt *gt, break; } - memset(threads, 0, sizeof(threads)); + memset(threads, 0, sizeof(*threads) * I915_NUM_ENGINES); for_each_engine(other, gt, tmp) { struct task_struct *tsk; @@ -1236,6 +1240,7 @@ unwind: break; } } + kfree(threads); if (intel_gt_is_wedged(gt)) err = -EIO; -- cgit v1.2.3 From 93d9e0453e2bb599e0bcced1b914f9b4010180a1 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 5 May 2022 14:38:07 -0700 Subject: drm/i915/gvt: Use intel_engine_mask_t for ring mask When i915 adds additional PVC blitter instances (in an upcoming patch), the definition of VECS0 will change from bit(10) to bit(18), causing GVT's R_ALL mask to overflow the u16 storage that's currently used. Let's replace the u16 with an intel_engine_mask_t to ensure we avoid this. Cc: Tvrtko Ursulin Cc: Zhi Wang Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220505213812.3979301-8-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 2459213b6c87..efad8552d6e6 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -428,7 +428,7 @@ struct cmd_info { #define R_VECS BIT(VECS0) #define R_ALL (R_RCS | R_VCS | R_BCS | R_VECS) /* rings that support this cmd: BLT/RCS/VCS/VECS */ - u16 rings; + intel_engine_mask_t rings; /* devices that support this cmd: SNB/IVB/HSW/... */ u16 devices; -- cgit v1.2.3 From 69f8afdb45e7775840693bce42da79d9c22c2e83 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 5 May 2022 14:38:08 -0700 Subject: drm/i915/pvc: Engine definitions for new copy engines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds the basic definitions needed to support new copy engines. Also updating the cmd_info to accommodate new engines, as the engine id's of legacy engines have been changed. v2: - Add _BCS(n) definition, similar to other engines. (Tvrtko) - Add I915_MAX_BCS definition, similar to other engnes. (Prathap) - Move GVT change to avoid u16 overflow to its own patch. (Tvrtko) Original-author: CQ Tang Cc: Tvrtko Ursulin Cc: Prathap Kumar Valsan Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220505213812.3979301-9-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 56 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 12 +++++- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 8 ++++ drivers/gpu/drm/i915/i915_reg.h | 8 ++++ 4 files changed, 83 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 14c6ddbbfde8..4532c3ea9ace 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -71,6 +71,62 @@ static const struct engine_info intel_engines[] = { { .graphics_ver = 6, .base = BLT_RING_BASE } }, }, + [BCS1] = { + .class = COPY_ENGINE_CLASS, + .instance = 1, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS1_RING_BASE } + }, + }, + [BCS2] = { + .class = COPY_ENGINE_CLASS, + .instance = 2, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS2_RING_BASE } + }, + }, + [BCS3] = { + .class = COPY_ENGINE_CLASS, + .instance = 3, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS3_RING_BASE } + }, + }, + [BCS4] = { + .class = COPY_ENGINE_CLASS, + .instance = 4, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS4_RING_BASE } + }, + }, + [BCS5] = { + .class = COPY_ENGINE_CLASS, + .instance = 5, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS5_RING_BASE } + }, + }, + [BCS6] = { + .class = COPY_ENGINE_CLASS, + .instance = 6, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS6_RING_BASE } + }, + }, + [BCS7] = { + .class = COPY_ENGINE_CLASS, + .instance = 7, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS7_RING_BASE } + }, + }, + [BCS8] = { + .class = COPY_ENGINE_CLASS, + .instance = 8, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS8_RING_BASE } + }, + }, [VCS0] = { .class = VIDEO_DECODE_CLASS, .instance = 0, diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 298f2cc7a879..2286f96f5f87 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -35,7 +35,7 @@ #define OTHER_CLASS 4 #define COMPUTE_CLASS 5 #define MAX_ENGINE_CLASS 5 -#define MAX_ENGINE_INSTANCE 7 +#define MAX_ENGINE_INSTANCE 8 #define I915_MAX_SLICES 3 #define I915_MAX_SUBSLICES 8 @@ -99,6 +99,7 @@ struct i915_ctx_workarounds { #define I915_MAX_SFC (I915_MAX_VCS / 2) #define I915_MAX_CCS 4 #define I915_MAX_RCS 1 +#define I915_MAX_BCS 9 /* * Engine IDs definitions. @@ -107,6 +108,15 @@ struct i915_ctx_workarounds { enum intel_engine_id { RCS0 = 0, BCS0, + BCS1, + BCS2, + BCS3, + BCS4, + BCS5, + BCS6, + BCS7, + BCS8, +#define _BCS(n) (BCS0 + (n)) VCS0, VCS1, VCS2, diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index a0a49c16babd..aa2c0974b02c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1476,6 +1476,14 @@ #define GEN11_KCR (19) #define GEN11_GTPM (16) #define GEN11_BCS (15) +#define XEHPC_BCS1 (14) +#define XEHPC_BCS2 (13) +#define XEHPC_BCS3 (12) +#define XEHPC_BCS4 (11) +#define XEHPC_BCS5 (10) +#define XEHPC_BCS6 (9) +#define XEHPC_BCS7 (8) +#define XEHPC_BCS8 (23) #define GEN12_CCS3 (7) #define GEN12_CCS2 (6) #define GEN12_CCS1 (5) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f1a48d4578aa..117f9d909e95 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -976,6 +976,14 @@ #define GEN12_COMPUTE2_RING_BASE 0x1e000 #define GEN12_COMPUTE3_RING_BASE 0x26000 #define BLT_RING_BASE 0x22000 +#define XEHPC_BCS1_RING_BASE 0x3e0000 +#define XEHPC_BCS2_RING_BASE 0x3e2000 +#define XEHPC_BCS3_RING_BASE 0x3e4000 +#define XEHPC_BCS4_RING_BASE 0x3e6000 +#define XEHPC_BCS5_RING_BASE 0x3e8000 +#define XEHPC_BCS6_RING_BASE 0x3ea000 +#define XEHPC_BCS7_RING_BASE 0x3ec000 +#define XEHPC_BCS8_RING_BASE 0x3ee000 #define DG1_GSC_HECI1_BASE 0x00258000 #define DG1_GSC_HECI2_BASE 0x00259000 #define DG2_GSC_HECI1_BASE 0x00373000 -- cgit v1.2.3 From 500d7135c924024ed2e5e62b03dd9b3b6257fa10 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 5 May 2022 14:38:09 -0700 Subject: drm/i915/pvc: Interrupt support for new copy engines Add the interrupt handler support for new copy engines. Bspec: 54030 Original-author: CQ Tang Signed-off-by: Matt Roper Reviewed-by: Stuart Summers Link: https://patchwork.freedesktop.org/patch/msgid/20220505213812.3979301-10-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_irq.c | 16 ++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt_regs.h | 4 ++++ 2 files changed, 20 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 88b4becfcb17..3a72d4fd0214 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -193,6 +193,14 @@ void gen11_gt_irq_reset(struct intel_gt *gt) /* Restore masks irqs on RCS, BCS, VCS and VECS engines. */ intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~0); intel_uncore_write(uncore, GEN11_BCS_RSVD_INTR_MASK, ~0); + if (HAS_ENGINE(gt, BCS1) || HAS_ENGINE(gt, BCS2)) + intel_uncore_write(uncore, XEHPC_BCS1_BCS2_INTR_MASK, ~0); + if (HAS_ENGINE(gt, BCS3) || HAS_ENGINE(gt, BCS4)) + intel_uncore_write(uncore, XEHPC_BCS3_BCS4_INTR_MASK, ~0); + if (HAS_ENGINE(gt, BCS5) || HAS_ENGINE(gt, BCS6)) + intel_uncore_write(uncore, XEHPC_BCS5_BCS6_INTR_MASK, ~0); + if (HAS_ENGINE(gt, BCS7) || HAS_ENGINE(gt, BCS8)) + intel_uncore_write(uncore, XEHPC_BCS7_BCS8_INTR_MASK, ~0); intel_uncore_write(uncore, GEN11_VCS0_VCS1_INTR_MASK, ~0); intel_uncore_write(uncore, GEN11_VCS2_VCS3_INTR_MASK, ~0); if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5)) @@ -248,6 +256,14 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) /* Unmask irqs on RCS, BCS, VCS and VECS engines. */ intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~smask); intel_uncore_write(uncore, GEN11_BCS_RSVD_INTR_MASK, ~smask); + if (HAS_ENGINE(gt, BCS1) || HAS_ENGINE(gt, BCS2)) + intel_uncore_write(uncore, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask); + if (HAS_ENGINE(gt, BCS3) || HAS_ENGINE(gt, BCS4)) + intel_uncore_write(uncore, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask); + if (HAS_ENGINE(gt, BCS5) || HAS_ENGINE(gt, BCS6)) + intel_uncore_write(uncore, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask); + if (HAS_ENGINE(gt, BCS7) || HAS_ENGINE(gt, BCS8)) + intel_uncore_write(uncore, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask); intel_uncore_write(uncore, GEN11_VCS0_VCS1_INTR_MASK, ~dmask); intel_uncore_write(uncore, GEN11_VCS2_VCS3_INTR_MASK, ~dmask); if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5)) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index aa2c0974b02c..fe09288a3145 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1529,6 +1529,10 @@ #define GEN11_GUNIT_CSME_INTR_MASK _MMIO(0x1900f4) #define GEN12_CCS0_CCS1_INTR_MASK _MMIO(0x190100) #define GEN12_CCS2_CCS3_INTR_MASK _MMIO(0x190104) +#define XEHPC_BCS1_BCS2_INTR_MASK _MMIO(0x190110) +#define XEHPC_BCS3_BCS4_INTR_MASK _MMIO(0x190114) +#define XEHPC_BCS5_BCS6_INTR_MASK _MMIO(0x190118) +#define XEHPC_BCS7_BCS8_INTR_MASK _MMIO(0x19011c) #define GEN12_SFC_DONE(n) _MMIO(0x1cc000 + (n) * 0x1000) -- cgit v1.2.3 From 8caaf7ad659da9b757781d5f08ce0bf98801931e Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 5 May 2022 14:38:10 -0700 Subject: drm/i915/pvc: Reset support for new copy engines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the reset support for new copy engines in PVC. Bspec: 52549 Original-author: CQ Tang Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Reviewed-by: Stuart Summers Link: https://patchwork.freedesktop.org/patch/msgid/20220505213812.3979301-11-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 8 ++++++ drivers/gpu/drm/i915/gt/intel_gt_regs.h | 44 ++++++++++++++++++------------- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 4532c3ea9ace..c6e93db134b1 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -390,6 +390,14 @@ static u32 get_reset_domain(u8 ver, enum intel_engine_id id) static const u32 engine_reset_domains[] = { [RCS0] = GEN11_GRDOM_RENDER, [BCS0] = GEN11_GRDOM_BLT, + [BCS1] = XEHPC_GRDOM_BLT1, + [BCS2] = XEHPC_GRDOM_BLT2, + [BCS3] = XEHPC_GRDOM_BLT3, + [BCS4] = XEHPC_GRDOM_BLT4, + [BCS5] = XEHPC_GRDOM_BLT5, + [BCS6] = XEHPC_GRDOM_BLT6, + [BCS7] = XEHPC_GRDOM_BLT7, + [BCS8] = XEHPC_GRDOM_BLT8, [VCS0] = GEN11_GRDOM_MEDIA, [VCS1] = GEN11_GRDOM_MEDIA2, [VCS2] = GEN11_GRDOM_MEDIA3, diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index fe09288a3145..98ede9c93f00 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -597,24 +597,32 @@ /* GEN11 changed all bit defs except for FULL & RENDER */ #define GEN11_GRDOM_FULL GEN6_GRDOM_FULL #define GEN11_GRDOM_RENDER GEN6_GRDOM_RENDER -#define GEN11_GRDOM_BLT (1 << 2) -#define GEN11_GRDOM_GUC (1 << 3) -#define GEN11_GRDOM_MEDIA (1 << 5) -#define GEN11_GRDOM_MEDIA2 (1 << 6) -#define GEN11_GRDOM_MEDIA3 (1 << 7) -#define GEN11_GRDOM_MEDIA4 (1 << 8) -#define GEN11_GRDOM_MEDIA5 (1 << 9) -#define GEN11_GRDOM_MEDIA6 (1 << 10) -#define GEN11_GRDOM_MEDIA7 (1 << 11) -#define GEN11_GRDOM_MEDIA8 (1 << 12) -#define GEN11_GRDOM_VECS (1 << 13) -#define GEN11_GRDOM_VECS2 (1 << 14) -#define GEN11_GRDOM_VECS3 (1 << 15) -#define GEN11_GRDOM_VECS4 (1 << 16) -#define GEN11_GRDOM_SFC0 (1 << 17) -#define GEN11_GRDOM_SFC1 (1 << 18) -#define GEN11_GRDOM_SFC2 (1 << 19) -#define GEN11_GRDOM_SFC3 (1 << 20) +#define XEHPC_GRDOM_BLT8 REG_BIT(31) +#define XEHPC_GRDOM_BLT7 REG_BIT(30) +#define XEHPC_GRDOM_BLT6 REG_BIT(29) +#define XEHPC_GRDOM_BLT5 REG_BIT(28) +#define XEHPC_GRDOM_BLT4 REG_BIT(27) +#define XEHPC_GRDOM_BLT3 REG_BIT(26) +#define XEHPC_GRDOM_BLT2 REG_BIT(25) +#define XEHPC_GRDOM_BLT1 REG_BIT(24) +#define GEN11_GRDOM_SFC3 REG_BIT(20) +#define GEN11_GRDOM_SFC2 REG_BIT(19) +#define GEN11_GRDOM_SFC1 REG_BIT(18) +#define GEN11_GRDOM_SFC0 REG_BIT(17) +#define GEN11_GRDOM_VECS4 REG_BIT(16) +#define GEN11_GRDOM_VECS3 REG_BIT(15) +#define GEN11_GRDOM_VECS2 REG_BIT(14) +#define GEN11_GRDOM_VECS REG_BIT(13) +#define GEN11_GRDOM_MEDIA8 REG_BIT(12) +#define GEN11_GRDOM_MEDIA7 REG_BIT(11) +#define GEN11_GRDOM_MEDIA6 REG_BIT(10) +#define GEN11_GRDOM_MEDIA5 REG_BIT(9) +#define GEN11_GRDOM_MEDIA4 REG_BIT(8) +#define GEN11_GRDOM_MEDIA3 REG_BIT(7) +#define GEN11_GRDOM_MEDIA2 REG_BIT(6) +#define GEN11_GRDOM_MEDIA REG_BIT(5) +#define GEN11_GRDOM_GUC REG_BIT(3) +#define GEN11_GRDOM_BLT REG_BIT(2) #define GEN11_VCS_SFC_RESET_BIT(instance) (GEN11_GRDOM_SFC0 << ((instance) >> 1)) #define GEN11_VECS_SFC_RESET_BIT(instance) (GEN11_GRDOM_SFC0 << (instance)) -- cgit v1.2.3 From 1a1a5a315ee805bec457fd214250c088efadb50b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 5 May 2022 14:38:11 -0700 Subject: drm/i915/pvc: skip all copy engines from aux table invalidate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we have more copy engines now, mask all of them from aux table invalidate. v2 (MattR): - Use I915_MAX_BCS to determine mask rather than hardcoding BCS8. (Prathap) Cc: Prathap Kumar Valsan Signed-off-by: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Reviewed-by: Prathap Kumar Valsan Link: https://patchwork.freedesktop.org/patch/msgid/20220505213812.3979301-12-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 3e13960615bd..daa1a61972f4 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -272,7 +272,8 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) if (!HAS_FLAT_CCS(rq->engine->i915) && (rq->engine->class == VIDEO_DECODE_CLASS || rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) { - aux_inv = rq->engine->mask & ~BIT(BCS0); + aux_inv = rq->engine->mask & + ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0); if (aux_inv) cmd += 4; } -- cgit v1.2.3 From ad5f74f3420183052532a220edd9a37aba92724a Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 5 May 2022 14:38:12 -0700 Subject: drm/i915/pvc: read fuses for link copy engines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new Link Copy engines in PVC may be fused off according to the mslice_mask. Each bit of the MEML3_EN_MASK we read from the GEN10_MIRROR_FUSE3 register disables a pair of link copy engines. v2 (Tvrtko): - Minor cosmetic changes: s/u8/unsigned long/, use instance local variable. (Tvrtko) Bspec: 44483 Cc: Matt Roper Cc: Tvrtko Ursulin Signed-off-by: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220505213812.3979301-13-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index c6e93db134b1..1adbf34c3632 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -686,6 +686,34 @@ static void engine_mask_apply_compute_fuses(struct intel_gt *gt) } } +static void engine_mask_apply_copy_fuses(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_gt_info *info = >->info; + unsigned long meml3_mask; + unsigned long quad; + + meml3_mask = intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3); + meml3_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, meml3_mask); + + /* + * Link Copy engines may be fused off according to meml3_mask. Each + * bit is a quad that houses 2 Link Copy and two Sub Copy engines. + */ + for_each_clear_bit(quad, &meml3_mask, GEN12_MAX_MSLICES) { + unsigned int instance = quad * 2 + 1; + intel_engine_mask_t mask = GENMASK(_BCS(instance + 1), + _BCS(instance)); + + if (mask & info->engine_mask) { + drm_dbg(&i915->drm, "bcs%u fused off\n", instance); + drm_dbg(&i915->drm, "bcs%u fused off\n", instance + 1); + + info->engine_mask &= ~mask; + } + } +} + /* * Determine which engines are fused off in our particular hardware. * Note that we have a catch-22 situation where we need to be able to access @@ -768,6 +796,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt)); engine_mask_apply_compute_fuses(gt); + engine_mask_apply_copy_fuses(gt); return info->engine_mask; } -- cgit v1.2.3 From 303760aa914b7f5ac9602dbb4b471a2ad52eeb3e Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Mon, 25 Apr 2022 17:30:45 -0700 Subject: i915/guc/reset: Make __guc_reset_context aware of guilty engines There are 2 ways an engine can get reset in i915 and the method of reset affects how KMD labels a context as guilty/innocent. (1) GuC initiated engine-reset: GuC resets a hung engine and notifies KMD. The context that hung on the engine is marked guilty and all other contexts are innocent. The innocent contexts are resubmitted. (2) GT based reset: When an engine heartbeat fails to tick, KMD initiates a gt/chip reset. All active contexts are marked as guilty and discarded. In order to correctly mark the contexts as guilty/innocent, pass a mask of engines that were reset to __guc_reset_context. Fixes: eb5e7da736f3 ("drm/i915/guc: Reset implementation for new GuC interface") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Alan Previn Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220426003045.3929439-1-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 16 ++++++++-------- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_uc.h | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 894f17f8b4ce..11bf33f1f772 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -808,7 +808,7 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) __intel_engine_reset(engine, stalled_mask & engine->mask); local_bh_enable(); - intel_uc_reset(>->uc, true); + intel_uc_reset(>->uc, ALL_ENGINES); intel_ggtt_restore_fences(gt->ggtt); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 3f3373f68123..966e69a8b1c1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -443,7 +443,7 @@ int intel_guc_global_policies_update(struct intel_guc *guc); void intel_guc_context_ban(struct intel_context *ce, struct i915_request *rq); void intel_guc_submission_reset_prepare(struct intel_guc *guc); -void intel_guc_submission_reset(struct intel_guc *guc, bool stalled); +void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled); void intel_guc_submission_reset_finish(struct intel_guc *guc); void intel_guc_submission_cancel_requests(struct intel_guc *guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 8bf8b6d588d4..5a1dfacf24ea 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1654,9 +1654,9 @@ __unwind_incomplete_requests(struct intel_context *ce) spin_unlock_irqrestore(&sched_engine->lock, flags); } -static void __guc_reset_context(struct intel_context *ce, bool stalled) +static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled) { - bool local_stalled; + bool guilty; struct i915_request *rq; unsigned long flags; u32 head; @@ -1684,7 +1684,7 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled) if (!intel_context_is_pinned(ce)) goto next_context; - local_stalled = false; + guilty = false; rq = intel_context_find_active_request(ce); if (!rq) { head = ce->ring->tail; @@ -1692,14 +1692,14 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled) } if (i915_request_started(rq)) - local_stalled = true; + guilty = stalled & ce->engine->mask; GEM_BUG_ON(i915_active_is_idle(&ce->active)); head = intel_ring_wrap(ce->ring, rq->head); - __i915_request_reset(rq, local_stalled && stalled); + __i915_request_reset(rq, guilty); out_replay: - guc_reset_state(ce, head, local_stalled && stalled); + guc_reset_state(ce, head, guilty); next_context: if (i != number_children) ce = list_next_entry(ce, parallel.child_link); @@ -1709,7 +1709,7 @@ next_context: intel_context_put(parent); } -void intel_guc_submission_reset(struct intel_guc *guc, bool stalled) +void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled) { struct intel_context *ce; unsigned long index; @@ -4228,7 +4228,7 @@ static void guc_context_replay(struct intel_context *ce) { struct i915_sched_engine *sched_engine = ce->engine->sched_engine; - __guc_reset_context(ce, true); + __guc_reset_context(ce, ce->engine->mask); tasklet_hi_schedule(&sched_engine->tasklet); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index ecf149c5fdb0..3c3527cb0007 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -597,7 +597,7 @@ sanitize: __uc_sanitize(uc); } -void intel_uc_reset(struct intel_uc *uc, bool stalled) +void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled) { struct intel_guc *guc = &uc->guc; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h index 866b462821c0..a8f38c2c60e2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -42,7 +42,7 @@ void intel_uc_driver_late_release(struct intel_uc *uc); void intel_uc_driver_remove(struct intel_uc *uc); void intel_uc_init_mmio(struct intel_uc *uc); void intel_uc_reset_prepare(struct intel_uc *uc); -void intel_uc_reset(struct intel_uc *uc, bool stalled); +void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled); void intel_uc_reset_finish(struct intel_uc *uc); void intel_uc_cancel_requests(struct intel_uc *uc); void intel_uc_suspend(struct intel_uc *uc); -- cgit v1.2.3 From 18fb42db05a0b93ab5dd5eab5315e50eaa3ca620 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 13 May 2022 08:51:36 +0100 Subject: drm/i915: Fix CFI violation with show_dynamic_id() When an attribute group is created with sysfs_create_group(), the ->sysfs_ops() callback is set to kobj_sysfs_ops, which sets the ->show() callback to kobj_attr_show(). kobj_attr_show() uses container_of() to get the ->show() callback from the attribute it was passed, meaning the ->show() callback needs to be the same type as the ->show() callback in 'struct kobj_attribute'. However, show_dynamic_id() has the type of the ->show() callback in 'struct device_attribute', which causes a CFI violation when opening the 'id' sysfs node under drm/card0/metrics. This happens to work because the layout of 'struct kobj_attribute' and 'struct device_attribute' are the same, so the container_of() cast happens to allow the ->show() callback to still work. Change the type of show_dynamic_id() to match the ->show() callback in 'struct kobj_attributes' and update the type of sysfs_metric_id to match, which resolves the CFI violation. Fixes: f89823c21224 ("drm/i915/perf: Implement I915_PERF_ADD/REMOVE_CONFIG interface") Signed-off-by: Nathan Chancellor Reviewed-by: Kees Cook Reviewed-by: Sami Tolvanen Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20220513075136.1027007-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_perf.c | 4 ++-- drivers/gpu/drm/i915/i915_perf_types.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 0a9c3fcc09b1..1577ab6754db 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4050,8 +4050,8 @@ addr_err: return ERR_PTR(err); } -static ssize_t show_dynamic_id(struct device *dev, - struct device_attribute *attr, +static ssize_t show_dynamic_id(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { struct i915_oa_config *oa_config = diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index 473a3c0544bb..05cb9a335a97 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -55,7 +55,7 @@ struct i915_oa_config { struct attribute_group sysfs_metric; struct attribute *attrs[2]; - struct device_attribute sysfs_metric_id; + struct kobj_attribute sysfs_metric_id; struct kref ref; struct rcu_head rcu; -- cgit v1.2.3 From ca10b9d60f8c9556720bad8b1ec7d522e353a01d Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Thu, 5 May 2022 22:41:42 -0700 Subject: drm/i915/guc/rc: Use i915_probe_error instead of drm_error To avoid false positives in error injection cases. Signed-off-by: Vinay Belgaumkar Reviewed-by: Alan Previn Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220506054142.5025-1-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c index e00661fb0853..8f8dd05835c5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c @@ -49,7 +49,6 @@ static int guc_action_control_gucrc(struct intel_guc *guc, bool enable) static int __guc_rc_control(struct intel_guc *guc, bool enable) { struct intel_gt *gt = guc_to_gt(guc); - struct drm_device *drm = &guc_to_gt(guc)->i915->drm; int ret; if (!intel_uc_uses_guc_rc(>->uc)) @@ -60,8 +59,8 @@ static int __guc_rc_control(struct intel_guc *guc, bool enable) ret = guc_action_control_gucrc(guc, enable); if (ret) { - drm_err(drm, "Failed to %s GuC RC (%pe)\n", - str_enable_disable(enable), ERR_PTR(ret)); + i915_probe_error(guc_to_gt(guc)->i915, "Failed to %s GuC RC (%pe)\n", + str_enable_disable(enable), ERR_PTR(ret)); return ret; } -- cgit v1.2.3 From d158367c31f0f87567d2e8a0955287dc005d40e5 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 11 May 2022 17:37:44 +0200 Subject: drm/i915: return -EIO on lmem setup failure Caller of setup_lmem() ignores -ENODEV but failing to setup lmem on dGPU isn't ignorable error. Signed-off-by: Nirmoy Das Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220511153746.14142-1-nirmoy.das@intel.com --- drivers/gpu/drm/i915/gt/intel_region_lmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index f5111c0a0060..5a7c403d718a 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -108,7 +108,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) flat_ccs_base = (flat_ccs_base >> XEHPSDV_CCS_BASE_SHIFT) * SZ_64K; if (GEM_WARN_ON(lmem_size < flat_ccs_base)) - return ERR_PTR(-ENODEV); + return ERR_PTR(-EIO); tile_stolen = lmem_size - flat_ccs_base; @@ -131,7 +131,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) io_start = pci_resource_start(pdev, 2); io_size = min(pci_resource_len(pdev, 2), lmem_size); if (!io_size) - return ERR_PTR(-ENODEV); + return ERR_PTR(-EIO); min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K : I915_GTT_PAGE_SIZE_4K; -- cgit v1.2.3 From 8f6de23184452793e60945a26ac40db435d7798d Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 11 May 2022 17:37:45 +0200 Subject: drm/i915: determine lmem_size properly Determine lmem_size using ADDR_RANGE register so that lmem_setup() works on platform with small-bar as well. Signed-off-by: Nirmoy Das Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220511153746.14142-2-nirmoy.das@intel.com --- drivers/gpu/drm/i915/gt/intel_region_lmem.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index 5a7c403d718a..cd105ec10429 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -101,9 +101,13 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) return ERR_PTR(-ENODEV); if (HAS_FLAT_CCS(i915)) { + resource_size_t lmem_range; u64 tile_stolen, flat_ccs_base; - lmem_size = pci_resource_len(pdev, 2); + lmem_range = intel_gt_read_register(&i915->gt0, XEHPSDV_TILE0_ADDR_RANGE) & 0xFFFF; + lmem_size = lmem_range >> XEHPSDV_TILE_LMEM_RANGE_SHIFT; + lmem_size *= SZ_1G; + flat_ccs_base = intel_gt_read_register(gt, XEHPSDV_FLAT_CCS_BASE_ADDR); flat_ccs_base = (flat_ccs_base >> XEHPSDV_CCS_BASE_SHIFT) * SZ_64K; -- cgit v1.2.3 From 9e97c46f832d4669b4e52cde5ad0bd43423504eb Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 11 May 2022 17:37:46 +0200 Subject: drm/i915: gracefully error out on platform with small-bar Currently we just fatally crash during module load if we encounter small-BAR configuration on DG2. We have most of the support already, but we lack the final uAPI bits to tie it all together. Signed-off-by: Nirmoy Das Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld [mauld: reword the commit and error message slightly] Link: https://patchwork.freedesktop.org/patch/msgid/20220511153746.14142-3-nirmoy.das@intel.com --- drivers/gpu/drm/i915/gt/intel_region_lmem.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index cd105ec10429..e9c12e0d6f59 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -111,6 +111,12 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) flat_ccs_base = intel_gt_read_register(gt, XEHPSDV_FLAT_CCS_BASE_ADDR); flat_ccs_base = (flat_ccs_base >> XEHPSDV_CCS_BASE_SHIFT) * SZ_64K; + /* FIXME: Remove this when we have small-bar enabled */ + if (pci_resource_len(pdev, 2) < lmem_size) { + drm_err(&i915->drm, "System requires small-BAR support, which is currently unsupported on this kernel\n"); + return ERR_PTR(-EINVAL); + } + if (GEM_WARN_ON(lmem_size < flat_ccs_base)) return ERR_PTR(-EIO); -- cgit v1.2.3 From e180a7b218487065efd9a3f05eac5de7de128e19 Mon Sep 17 00:00:00 2001 From: Alan Previn Date: Fri, 6 May 2022 21:58:47 -0700 Subject: drm/i915/guc: Remove unnecessary GuC err capture noise GuC error capture blurts some debug messages about empty register lists for certain register types on engines during firmware initialization. These are not errors or warnings, so get rid of them. Signed-off-by: Alan Previn Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220507045847.862261-2-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c | 77 +------------------------- 1 file changed, 2 insertions(+), 75 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index c4e25966d3e9..97a32e610c30 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -420,72 +420,6 @@ guc_capture_get_device_reglist(struct intel_guc *guc) return default_lists; } -static const char * -__stringify_owner(u32 owner) -{ - switch (owner) { - case GUC_CAPTURE_LIST_INDEX_PF: - return "PF"; - case GUC_CAPTURE_LIST_INDEX_VF: - return "VF"; - default: - return "unknown"; - } - - return ""; -} - -static const char * -__stringify_type(u32 type) -{ - switch (type) { - case GUC_CAPTURE_LIST_TYPE_GLOBAL: - return "Global"; - case GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS: - return "Class"; - case GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE: - return "Instance"; - default: - return "unknown"; - } - - return ""; -} - -static const char * -__stringify_engclass(u32 class) -{ - switch (class) { - case GUC_RENDER_CLASS: - return "Render"; - case GUC_VIDEO_CLASS: - return "Video"; - case GUC_VIDEOENHANCE_CLASS: - return "VideoEnhance"; - case GUC_BLITTER_CLASS: - return "Blitter"; - case GUC_COMPUTE_CLASS: - return "Compute"; - default: - return "unknown"; - } - - return ""; -} - -static void -guc_capture_warn_with_list_info(struct drm_i915_private *i915, char *msg, - u32 owner, u32 type, u32 classid) -{ - if (type == GUC_CAPTURE_LIST_TYPE_GLOBAL) - drm_dbg(&i915->drm, "GuC-capture: %s for %s %s-Registers.\n", msg, - __stringify_owner(owner), __stringify_type(type)); - else - drm_dbg(&i915->drm, "GuC-capture: %s for %s %s-Registers on %s-Engine\n", msg, - __stringify_owner(owner), __stringify_type(type), - __stringify_engclass(classid)); -} - static int guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 classid, struct guc_mmio_reg *ptr, u16 num_entries) @@ -501,11 +435,8 @@ guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 classid, return -ENODEV; match = guc_capture_get_one_list(reglists, owner, type, classid); - if (!match) { - guc_capture_warn_with_list_info(i915, "Missing register list init", owner, type, - classid); + if (!match) return -ENODATA; - } for (i = 0; i < num_entries && i < match->num_regs; ++i) { ptr[i].offset = match->list[i].reg.reg; @@ -556,7 +487,6 @@ int intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid, size_t *size) { - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; struct intel_guc_state_capture *gc = guc->capture; struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][classid]; int num_regs; @@ -570,11 +500,8 @@ intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 cl } num_regs = guc_cap_list_num_regs(gc, owner, type, classid); - if (!num_regs) { - guc_capture_warn_with_list_info(i915, "Missing register list size", - owner, type, classid); + if (!num_regs) return -ENODATA; - } *size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) + (num_regs * sizeof(struct guc_mmio_reg))); -- cgit v1.2.3 From a50794f26f52c66cb793d5d392f5f19bc2962cdd Mon Sep 17 00:00:00 2001 From: Ramalingam C Date: Mon, 2 May 2022 19:45:08 +0530 Subject: uapi/drm/i915: Document memory residency and Flat-CCS capability of obj Capture the impact of memory region preference list of the objects, on their memory residency and Flat-CCS capability. v2: Fix the Flat-CCS capability of an obj with {lmem, smem} preference list [Thomas] v3: Reworded the doc [Matt] v4: Fixed Typos and spelling mistakes [Tvrtko, Joonas] Signed-off-by: Ramalingam C cc: Matthew Auld cc: Thomas Hellstrom cc: Daniel Vetter cc: Jon Bloomfield cc: Lionel Landwerlin cc: Kenneth Graunke cc: mesa-dev@lists.freedesktop.org cc: Jordan Justen cc: Tony Ye Reviewed-by: Matthew Auld Acked-by: Jordan Justen Link: https://patchwork.freedesktop.org/patch/msgid/20220502141508.2327-1-ramalingam.c@intel.com --- include/uapi/drm/i915_drm.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index a2def7b27009..de49b68b4fc8 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -3443,6 +3443,22 @@ struct drm_i915_gem_create_ext { * At which point we get the object handle in &drm_i915_gem_create_ext.handle, * along with the final object size in &drm_i915_gem_create_ext.size, which * should account for any rounding up, if required. + * + * Note that userspace has no means of knowing the current backing region + * for objects where @num_regions is larger than one. The kernel will only + * ensure that the priority order of the @regions array is honoured, either + * when initially placing the object, or when moving memory around due to + * memory pressure + * + * On Flat-CCS capable HW, compression is supported for the objects residing + * in I915_MEMORY_CLASS_DEVICE. When such objects (compressed) have other + * memory class in @regions and migrated (by i915, due to memory + * constraints) to the non I915_MEMORY_CLASS_DEVICE region, then i915 needs to + * decompress the content. But i915 doesn't have the required information to + * decompress the userspace compressed objects. + * + * So i915 supports Flat-CCS, on the objects which can reside only on + * I915_MEMORY_CLASS_DEVICE regions. */ struct drm_i915_gem_create_ext_memory_regions { /** @base: Extension link. See struct i915_user_extension. */ -- cgit v1.2.3 From 451374eef622fca6f00eeeda89aaccb45a30a149 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 11 May 2022 13:52:19 +0200 Subject: drm/i915: Use i915_gem_object_ggtt_pin_ww for reloc_iomap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When removing short term pins, I've changed the the batch buffer pinning for relocation to use __i915_vma_pin, because i915_gem_object_ggtt_pin_ww was destroying the old vma. This caused regressions, because the functions are not identical. Fix the regressions by calling i915_gem_object_ggtt_pin_ww() again on ggtt-only platforms, but only if the batch can be pinned without being moved. Fixes: b5cfe6f7a6e1 ("drm/i915: Remove short-term pins from execbuf, v6.") Cc: Matthew Auld Reported-by: Mateusz Jończyk Tested-by: Hans de Goede Signed-off-by: Maarten Lankhorst Acked-by: Matthew Auld Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5806 Link: https://patchwork.freedesktop.org/patch/msgid/20220511115219.46507-1-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index b3383e047505..c326bd2b444f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1251,14 +1251,12 @@ static void *reloc_iomap(struct i915_vma *batch, * Only attempt to pin the batch buffer to ggtt if the current batch * is not inside ggtt, or the batch buffer is not misplaced. */ - if (!i915_is_ggtt(batch->vm)) { + if (!i915_is_ggtt(batch->vm) || + !i915_vma_misplaced(batch, 0, 0, PIN_MAPPABLE)) { vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0, PIN_MAPPABLE | PIN_NONBLOCK /* NOWARN */ | PIN_NOEVICT); - } else if (i915_vma_is_map_and_fenceable(batch)) { - __i915_vma_pin(batch); - vma = batch; } if (vma == ERR_PTR(-EDEADLK)) -- cgit v1.2.3 From 1ade30812abfdd1c161a155fd54b0dd594c217ee Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Tue, 10 May 2022 16:04:47 +0200 Subject: drm/i915/gt: Fix use of static in macro mismatch The INTEL_GT_RPS_SYSFS_ATTR was creating to different structures but. When called with the "static" keyword this is affecting only the first structure, while the second is created as non static. Move the static keyword inside the macros to affect both the structures. Reported-by: Jani Nikula Fixes: 56a709cf77468 ("drm/i915/gt: Create per-tile RPS sysfs interfaces") Signed-off-by: Andi Shyti Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20220510140447.80200-1-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index e92990d514b2..f76b6cf8040e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -457,22 +457,23 @@ static ssize_t vlv_rpe_freq_mhz_show(struct device *dev, } #define INTEL_GT_RPS_SYSFS_ATTR(_name, _mode, _show, _store) \ - struct device_attribute dev_attr_gt_##_name = __ATTR(gt_##_name, _mode, _show, _store); \ - struct device_attribute dev_attr_rps_##_name = __ATTR(rps_##_name, _mode, _show, _store) + static struct device_attribute dev_attr_gt_##_name = __ATTR(gt_##_name, _mode, _show, _store); \ + static struct device_attribute dev_attr_rps_##_name = __ATTR(rps_##_name, _mode, _show, _store) #define INTEL_GT_RPS_SYSFS_ATTR_RO(_name) \ INTEL_GT_RPS_SYSFS_ATTR(_name, 0444, _name##_show, NULL) #define INTEL_GT_RPS_SYSFS_ATTR_RW(_name) \ INTEL_GT_RPS_SYSFS_ATTR(_name, 0644, _name##_show, _name##_store) -static INTEL_GT_RPS_SYSFS_ATTR_RO(act_freq_mhz); -static INTEL_GT_RPS_SYSFS_ATTR_RO(cur_freq_mhz); -static INTEL_GT_RPS_SYSFS_ATTR_RW(boost_freq_mhz); -static INTEL_GT_RPS_SYSFS_ATTR_RO(RP0_freq_mhz); -static INTEL_GT_RPS_SYSFS_ATTR_RO(RP1_freq_mhz); -static INTEL_GT_RPS_SYSFS_ATTR_RO(RPn_freq_mhz); -static INTEL_GT_RPS_SYSFS_ATTR_RW(max_freq_mhz); -static INTEL_GT_RPS_SYSFS_ATTR_RW(min_freq_mhz); +/* The below macros generate static structures */ +INTEL_GT_RPS_SYSFS_ATTR_RO(act_freq_mhz); +INTEL_GT_RPS_SYSFS_ATTR_RO(cur_freq_mhz); +INTEL_GT_RPS_SYSFS_ATTR_RW(boost_freq_mhz); +INTEL_GT_RPS_SYSFS_ATTR_RO(RP0_freq_mhz); +INTEL_GT_RPS_SYSFS_ATTR_RO(RP1_freq_mhz); +INTEL_GT_RPS_SYSFS_ATTR_RO(RPn_freq_mhz); +INTEL_GT_RPS_SYSFS_ATTR_RW(max_freq_mhz); +INTEL_GT_RPS_SYSFS_ATTR_RW(min_freq_mhz); static DEVICE_ATTR_RO(vlv_rpe_freq_mhz); -- cgit v1.2.3 From 962bd34bb457f6353f333ce234c3fd34cad1c00a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 18 May 2022 14:33:15 +0300 Subject: drm/i915/uc: Fix undefined behavior due to shift overflowing the constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: In file included from :0:0: drivers/gpu/drm/i915/gt/uc/intel_guc.c: In function ‘intel_guc_send_mmio’: ././include/linux/compiler_types.h:352:38: error: call to ‘__compiletime_assert_1047’ \ declared with attribute error: FIELD_PREP: mask is not constant _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) and other build errors due to shift overflowing values. See https://lore.kernel.org/r/YkwQ6%2BtIH8GQpuct@zn.tnic for the gory details as to why it triggers with older gccs only. v2 by Jani: - Drop the i915_reg.h changes Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Ruiqi GONG Cc: Randy Dunlap Signed-off-by: Borislav Petkov Signed-off-by: Jani Nikula Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20220518113315.1305027-2-jani.nikula@intel.com --- drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h | 2 +- drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h | 2 +- drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index be9ac47fa9d0..4ef9990ed7f8 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -50,7 +50,7 @@ #define HOST2GUC_SELF_CFG_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 3u) #define HOST2GUC_SELF_CFG_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 -#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY (0xffff << 16) +#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY (0xffffU << 16) #define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN (0xffff << 0) #define HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32 GUC_HXG_REQUEST_MSG_n_DATAn #define HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64 GUC_HXG_REQUEST_MSG_n_DATAn diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h index c9086a600bce..df83c1cc7c7a 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h @@ -82,7 +82,7 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64); #define GUC_CTB_HDR_LEN 1u #define GUC_CTB_MSG_MIN_LEN GUC_CTB_HDR_LEN #define GUC_CTB_MSG_MAX_LEN 256u -#define GUC_CTB_MSG_0_FENCE (0xffff << 16) +#define GUC_CTB_MSG_0_FENCE (0xffffU << 16) #define GUC_CTB_MSG_0_FORMAT (0xf << 12) #define GUC_CTB_FORMAT_HXG 0u #define GUC_CTB_MSG_0_RESERVED (0xf << 8) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h index 29ac823acd4c..7d5ba4d97d70 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h @@ -40,7 +40,7 @@ */ #define GUC_HXG_MSG_MIN_LEN 1u -#define GUC_HXG_MSG_0_ORIGIN (0x1 << 31) +#define GUC_HXG_MSG_0_ORIGIN (0x1U << 31) #define GUC_HXG_ORIGIN_HOST 0u #define GUC_HXG_ORIGIN_GUC 1u #define GUC_HXG_MSG_0_TYPE (0x7 << 28) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h index 2516705b9f36..8dc063f087eb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h @@ -28,7 +28,7 @@ #define GS_MIA_HALT_REQUESTED (0x02 << GS_MIA_SHIFT) #define GS_MIA_ISR_ENTRY (0x04 << GS_MIA_SHIFT) #define GS_AUTH_STATUS_SHIFT 30 -#define GS_AUTH_STATUS_MASK (0x03 << GS_AUTH_STATUS_SHIFT) +#define GS_AUTH_STATUS_MASK (0x03U << GS_AUTH_STATUS_SHIFT) #define GS_AUTH_STATUS_BAD (0x01 << GS_AUTH_STATUS_SHIFT) #define GS_AUTH_STATUS_GOOD (0x02 << GS_AUTH_STATUS_SHIFT) -- cgit v1.2.3 From 411d44d754739a371999412606b28af1d72a210b Mon Sep 17 00:00:00 2001 From: Swathi Dhanavanthri Date: Tue, 17 May 2022 14:29:05 -0700 Subject: drm/i915/dg2: Add workaround 22014600077 Signed-off-by: Swathi Dhanavanthri Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20220517212905.24212-1-swathi.dhanavanthri@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 98ede9c93f00..7246eb870c7e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1068,6 +1068,7 @@ #define GEN9_ENABLE_GPGPU_PREEMPTION REG_BIT(2) #define GEN10_CACHE_MODE_SS _MMIO(0xe420) +#define ENABLE_EU_COUNT_FOR_TDL_FLUSH REG_BIT(10) #define ENABLE_PREFETCH_INTO_IC REG_BIT(3) #define FLOAT_BLEND_OPTIMIZATION_ENABLE REG_BIT(4) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 756807c4b405..73b59ea6fd3b 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2178,6 +2178,16 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); } + if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) || + IS_DG2_G10(i915)) { + /* Wa_22014600077:dg2 */ + wa_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH), + 0 /* Wa_14012342262 :write-only reg, so skip + verification */, + true); + } + if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) { /* -- cgit v1.2.3 From 7f73b371710edaee1f40d834096c07c62e51b184 Mon Sep 17 00:00:00 2001 From: Swathi Dhanavanthri Date: Tue, 17 May 2022 13:13:38 -0700 Subject: drm/i915/dg2: Extend Wa_22010954014 to DG2-G11 and DG2-G12 Signed-off-by: Swathi Dhanavanthri Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20220517201338.7291-1-swathi.dhanavanthri@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 594ab59e4991..b3a5c3f5ce14 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7494,10 +7494,9 @@ static void xehpsdv_init_clock_gating(struct drm_i915_private *dev_priv) static void dg2_init_clock_gating(struct drm_i915_private *i915) { - /* Wa_22010954014:dg2_g10 */ - if (IS_DG2_G10(i915)) - intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0, - SGSI_SIDECLK_DIS); + /* Wa_22010954014:dg2 */ + intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0, + SGSI_SIDECLK_DIS); /* * Wa_14010733611:dg2_g10 -- cgit v1.2.3 From 9602efab9f8652ef39dc2789edcd26c3d1d3f901 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 19 May 2022 10:07:56 +0100 Subject: Revert "drm/i915: Drop has_psr from device info" This reverts commit b15a7357a84f091fde8ce35bf2fd494150ad4bd0. Signed-off-by: Tvrtko Ursulin Acked-by: Joonas Lahtinen Acked-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20220519090802.1294691-2-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_pci.c | 3 +++ drivers/gpu/drm/i915/intel_device_info.h | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9c0a8c86876a..52695e7f60d2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1297,7 +1297,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, IS_HASWELL(dev_priv)) #define HAS_DP_MST(dev_priv) (HAS_DDI(dev_priv)) #define HAS_FPGA_DBG_UNCLAIMED(dev_priv) (INTEL_INFO(dev_priv)->display.has_fpga_dbg) -#define HAS_PSR(dev_priv) (DISPLAY_VER(dev_priv) >= 9) +#define HAS_PSR(dev_priv) (INTEL_INFO(dev_priv)->display.has_psr) #define HAS_PSR_HW_TRACKING(dev_priv) \ (INTEL_INFO(dev_priv)->display.has_psr_hw_tracking) #define HAS_PSR2_SEL_FETCH(dev_priv) (DISPLAY_VER(dev_priv) >= 12) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 0e1d0493d3fe..5ceca6d99f26 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -635,6 +635,7 @@ static const struct intel_device_info chv_info = { .display.has_dmc = 1, \ .display.has_hdcp = 1, \ .display.has_ipc = 1, \ + .display.has_psr = 1, \ .display.has_psr_hw_tracking = 1, \ .dbuf.size = 896 - 4, /* 4 blocks for bypass path allocation */ \ .dbuf.slice_mask = BIT(DBUF_S1) @@ -683,6 +684,7 @@ static const struct intel_device_info skl_gt4_info = { .display.has_fpga_dbg = 1, \ .display.fbc_mask = BIT(INTEL_FBC_A), \ .display.has_hdcp = 1, \ + .display.has_psr = 1, \ .display.has_psr_hw_tracking = 1, \ .has_runtime_pm = 1, \ .display.has_dmc = 1, \ @@ -934,6 +936,7 @@ static const struct intel_device_info adl_s_info = { .display.has_hdcp = 1, \ .display.has_hotplug = 1, \ .display.has_ipc = 1, \ + .display.has_psr = 1, \ .display.ver = 13, \ .display.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), \ .pipe_offsets = { \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 55f40fa0ea49..9dbaa2861fa1 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -173,6 +173,7 @@ enum intel_ppgtt_type { func(has_ipc); \ func(has_modular_fia); \ func(has_overlay); \ + func(has_psr); \ func(has_psr_hw_tracking); \ func(overlay_needs_physical); \ func(supports_tv); -- cgit v1.2.3 From e91eec9128c2ad9eab9cf9d7c17f8034b6a86c4c Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 19 May 2022 10:07:57 +0100 Subject: Revert "drm/i915: Drop has_dp_mst from device info" This reverts commit eb86f645ab9b90c47de7ebe229feae7ac999421b. Signed-off-by: Tvrtko Ursulin Acked-by: Joonas Lahtinen Acked-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20220519090802.1294691-3-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_pci.c | 3 +++ drivers/gpu/drm/i915/intel_device_info.h | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 52695e7f60d2..680b7ff8c557 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1289,13 +1289,13 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_IPS(dev_priv) (IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv)) +#define HAS_DP_MST(dev_priv) (INTEL_INFO(dev_priv)->display.has_dp_mst) #define HAS_DP20(dev_priv) (IS_DG2(dev_priv)) #define HAS_CDCLK_CRAWL(dev_priv) (INTEL_INFO(dev_priv)->display.has_cdclk_crawl) #define HAS_DDI(dev_priv) (DISPLAY_VER(dev_priv) >= 9 || \ IS_BROADWELL(dev_priv) || \ IS_HASWELL(dev_priv)) -#define HAS_DP_MST(dev_priv) (HAS_DDI(dev_priv)) #define HAS_FPGA_DBG_UNCLAIMED(dev_priv) (INTEL_INFO(dev_priv)->display.has_fpga_dbg) #define HAS_PSR(dev_priv) (INTEL_INFO(dev_priv)->display.has_psr) #define HAS_PSR_HW_TRACKING(dev_priv) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 5ceca6d99f26..5870f7daa72e 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -536,6 +536,7 @@ static const struct intel_device_info vlv_info = { .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ BIT(TRANSCODER_C) | BIT(TRANSCODER_EDP), \ .display.has_fpga_dbg = 1, \ + .display.has_dp_mst = 1, \ .has_rc6p = 0 /* RC6p removed-by HSW */, \ HSW_PIPE_OFFSETS, \ .has_runtime_pm = 1 @@ -689,6 +690,7 @@ static const struct intel_device_info skl_gt4_info = { .has_runtime_pm = 1, \ .display.has_dmc = 1, \ .has_rps = true, \ + .display.has_dp_mst = 1, \ .has_logical_ring_contexts = 1, \ .dma_mask_size = 39, \ .ppgtt_type = INTEL_PPGTT_FULL, \ @@ -929,6 +931,7 @@ static const struct intel_device_info adl_s_info = { .dbuf.slice_mask = BIT(DBUF_S1) | BIT(DBUF_S2) | BIT(DBUF_S3) | \ BIT(DBUF_S4), \ .display.has_dmc = 1, \ + .display.has_dp_mst = 1, \ .display.has_dsb = 1, \ .display.has_dsc = 1, \ .display.fbc_mask = BIT(INTEL_FBC_A), \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 9dbaa2861fa1..6194dcc18e4f 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -163,6 +163,7 @@ enum intel_ppgtt_type { func(cursor_needs_physical); \ func(has_cdclk_crawl); \ func(has_dmc); \ + func(has_dp_mst); \ func(has_dsb); \ func(has_dsc); \ func(has_fpga_dbg); \ -- cgit v1.2.3 From 9d8d5a39173244a1394cf84a93447be6f82b361d Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 19 May 2022 10:07:58 +0100 Subject: Revert "drm/i915: Drop has_ddi from device info" This reverts commit efd01cd3c27636bc4840057a03839e54abaf11dc. Signed-off-by: Tvrtko Ursulin Acked-by: Joonas Lahtinen Acked-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20220519090802.1294691-4-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 4 +--- drivers/gpu/drm/i915/i915_pci.c | 3 +++ drivers/gpu/drm/i915/intel_device_info.h | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 680b7ff8c557..906a76b7d827 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1293,9 +1293,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_DP20(dev_priv) (IS_DG2(dev_priv)) #define HAS_CDCLK_CRAWL(dev_priv) (INTEL_INFO(dev_priv)->display.has_cdclk_crawl) -#define HAS_DDI(dev_priv) (DISPLAY_VER(dev_priv) >= 9 || \ - IS_BROADWELL(dev_priv) || \ - IS_HASWELL(dev_priv)) +#define HAS_DDI(dev_priv) (INTEL_INFO(dev_priv)->display.has_ddi) #define HAS_FPGA_DBG_UNCLAIMED(dev_priv) (INTEL_INFO(dev_priv)->display.has_fpga_dbg) #define HAS_PSR(dev_priv) (INTEL_INFO(dev_priv)->display.has_psr) #define HAS_PSR_HW_TRACKING(dev_priv) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 5870f7daa72e..e245a8208c64 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -535,6 +535,7 @@ static const struct intel_device_info vlv_info = { .platform_engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \ .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ BIT(TRANSCODER_C) | BIT(TRANSCODER_EDP), \ + .display.has_ddi = 1, \ .display.has_fpga_dbg = 1, \ .display.has_dp_mst = 1, \ .has_rc6p = 0 /* RC6p removed-by HSW */, \ @@ -682,6 +683,7 @@ static const struct intel_device_info skl_gt4_info = { BIT(TRANSCODER_C) | BIT(TRANSCODER_EDP) | \ BIT(TRANSCODER_DSI_A) | BIT(TRANSCODER_DSI_C), \ .has_64bit_reloc = 1, \ + .display.has_ddi = 1, \ .display.has_fpga_dbg = 1, \ .display.fbc_mask = BIT(INTEL_FBC_A), \ .display.has_hdcp = 1, \ @@ -930,6 +932,7 @@ static const struct intel_device_info adl_s_info = { .dbuf.size = 4096, \ .dbuf.slice_mask = BIT(DBUF_S1) | BIT(DBUF_S2) | BIT(DBUF_S3) | \ BIT(DBUF_S4), \ + .display.has_ddi = 1, \ .display.has_dmc = 1, \ .display.has_dp_mst = 1, \ .display.has_dsb = 1, \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 6194dcc18e4f..9e2600b86139 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -163,6 +163,7 @@ enum intel_ppgtt_type { func(cursor_needs_physical); \ func(has_cdclk_crawl); \ func(has_dmc); \ + func(has_ddi); \ func(has_dp_mst); \ func(has_dsb); \ func(has_dsc); \ -- cgit v1.2.3 From 3d6c72b7fdd2429ca1d4d690618bf65050380b48 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 19 May 2022 10:07:59 +0100 Subject: Revert "drm/i915: Drop has_logical_ring_elsq from device info" This reverts commit b6411373d3954c8fe4617c27f90f773108b0ab03. Signed-off-by: Tvrtko Ursulin Acked-by: Joonas Lahtinen Acked-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20220519090802.1294691-5-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 ++- drivers/gpu/drm/i915/i915_pci.c | 4 +++- drivers/gpu/drm/i915/intel_device_info.h | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 906a76b7d827..95bf99f7f7a5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1241,7 +1241,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \ (INTEL_INFO(dev_priv)->has_logical_ring_contexts) -#define HAS_LOGICAL_RING_ELSQ(dev_priv) (GRAPHICS_VER(dev_priv) >= 11) +#define HAS_LOGICAL_RING_ELSQ(dev_priv) \ + (INTEL_INFO(dev_priv)->has_logical_ring_elsq) #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index e245a8208c64..d5e2df985119 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -809,7 +809,8 @@ static const struct intel_device_info cml_gt2_info = { .dbuf.size = 2048, \ .dbuf.slice_mask = BIT(DBUF_S1) | BIT(DBUF_S2), \ .display.has_dsc = 1, \ - .has_coherent_ggtt = false + .has_coherent_ggtt = false, \ + .has_logical_ring_elsq = 1 static const struct intel_device_info icl_info = { GEN11_FEATURES, @@ -996,6 +997,7 @@ static const struct intel_device_info adl_p_info = { .has_global_mocs = 1, \ .has_llc = 1, \ .has_logical_ring_contexts = 1, \ + .has_logical_ring_elsq = 1, \ .has_mslices = 1, \ .has_rps = 1, \ .has_runtime_pm = 1, \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 9e2600b86139..ffcd88d13bbf 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -147,6 +147,7 @@ enum intel_ppgtt_type { func(has_l3_dpf); \ func(has_llc); \ func(has_logical_ring_contexts); \ + func(has_logical_ring_elsq); \ func(has_mslices); \ func(has_pooled_eu); \ func(has_pxp); \ -- cgit v1.2.3 From b409db082da6b76ad2b759a1a48d9402eee4b942 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 19 May 2022 10:08:00 +0100 Subject: Revert "drm/i915: Drop has_reset_engine from device info" This reverts commit 922abe4d19bd21b38298f3902674774b92a49293. Signed-off-by: Tvrtko Ursulin Acked-by: Joonas Lahtinen Acked-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20220519090802.1294691-6-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/i915_pci.c | 5 +++++ drivers/gpu/drm/i915/intel_device_info.h | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 11bf33f1f772..a5338c3fde7a 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -699,7 +699,7 @@ bool intel_has_reset_engine(const struct intel_gt *gt) if (gt->i915->params.reset < 2) return false; - return GRAPHICS_VER(gt->i915) >= 7; + return INTEL_INFO(gt->i915)->has_reset_engine; } int intel_reset_guc(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index d5e2df985119..3e514e34bd1d 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -456,6 +456,7 @@ static const struct intel_device_info snb_m_gt2_info = { .has_coherent_ggtt = true, \ .has_llc = 1, \ .has_rc6p = 1, \ + .has_reset_engine = true, \ .has_rps = true, \ .dma_mask_size = 40, \ .ppgtt_type = INTEL_PPGTT_ALIASING, \ @@ -513,6 +514,7 @@ static const struct intel_device_info vlv_info = { .display.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), .has_runtime_pm = 1, + .has_reset_engine = true, .has_rps = true, .display.has_gmch = 1, .display.has_hotplug = 1, @@ -616,6 +618,7 @@ static const struct intel_device_info chv_info = { .dma_mask_size = 39, .ppgtt_type = INTEL_PPGTT_FULL, .ppgtt_size = 32, + .has_reset_engine = 1, .has_snoop = true, .has_coherent_ggtt = false, .display_mmio_offset = VLV_DISPLAY_BASE, @@ -697,6 +700,7 @@ static const struct intel_device_info skl_gt4_info = { .dma_mask_size = 39, \ .ppgtt_type = INTEL_PPGTT_FULL, \ .ppgtt_size = 48, \ + .has_reset_engine = 1, \ .has_snoop = true, \ .has_coherent_ggtt = false, \ .display.has_ipc = 1, \ @@ -999,6 +1003,7 @@ static const struct intel_device_info adl_p_info = { .has_logical_ring_contexts = 1, \ .has_logical_ring_elsq = 1, \ .has_mslices = 1, \ + .has_reset_engine = 1, \ .has_rps = 1, \ .has_runtime_pm = 1, \ .ppgtt_size = 48, \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index ffcd88d13bbf..b22004814e88 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -137,6 +137,7 @@ enum intel_ppgtt_type { func(has_64k_pages); \ func(needs_compact_pt); \ func(gpu_reset_clobbers_display); \ + func(has_reset_engine); \ func(has_4tile); \ func(has_flat_ccs); \ func(has_global_mocs); \ -- cgit v1.2.3 From fdbec9ff669d83bf863ca7e657af6a9e4c949565 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 19 May 2022 10:08:01 +0100 Subject: Revert "drm/i915: Drop has_rc6 from device info" This reverts commit 218076abbcd647de46635d21331a34b814f90906. Signed-off-by: Tvrtko Ursulin Acked-by: Joonas Lahtinen Acked-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20220519090802.1294691-7-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 +-- drivers/gpu/drm/i915/i915_pci.c | 8 ++++++++ drivers/gpu/drm/i915/intel_device_info.h | 1 + 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 95bf99f7f7a5..63451e94fd63 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1302,8 +1302,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_PSR2_SEL_FETCH(dev_priv) (DISPLAY_VER(dev_priv) >= 12) #define HAS_TRANSCODER(dev_priv, trans) ((INTEL_INFO(dev_priv)->display.cpu_transcoder_mask & BIT(trans)) != 0) -/* ilk does support rc6, but we do not implement [power] contexts */ -#define HAS_RC6(dev_priv) (GRAPHICS_VER(dev_priv) >= 6) +#define HAS_RC6(dev_priv) (INTEL_INFO(dev_priv)->has_rc6) #define HAS_RC6p(dev_priv) (INTEL_INFO(dev_priv)->has_rc6p) #define HAS_RC6pp(dev_priv) (false) /* HW was never validated */ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 3e514e34bd1d..80803ab6e938 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -376,6 +376,8 @@ static const struct intel_device_info gm45_info = { .platform_engine_mask = BIT(RCS0) | BIT(VCS0), \ .has_snoop = true, \ .has_coherent_ggtt = true, \ + /* ilk does support rc6, but we do not implement [power] contexts */ \ + .has_rc6 = 0, \ .dma_mask_size = 36, \ I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ @@ -405,6 +407,7 @@ static const struct intel_device_info ilk_m_info = { .platform_engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \ .has_coherent_ggtt = true, \ .has_llc = 1, \ + .has_rc6 = 1, \ .has_rc6p = 1, \ .has_rps = true, \ .dma_mask_size = 40, \ @@ -455,6 +458,7 @@ static const struct intel_device_info snb_m_gt2_info = { .platform_engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \ .has_coherent_ggtt = true, \ .has_llc = 1, \ + .has_rc6 = 1, \ .has_rc6p = 1, \ .has_reset_engine = true, \ .has_rps = true, \ @@ -514,6 +518,7 @@ static const struct intel_device_info vlv_info = { .display.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), .has_runtime_pm = 1, + .has_rc6 = 1, .has_reset_engine = true, .has_rps = true, .display.has_gmch = 1, @@ -612,6 +617,7 @@ static const struct intel_device_info chv_info = { .platform_engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), .has_64bit_reloc = 1, .has_runtime_pm = 1, + .has_rc6 = 1, .has_rps = true, .has_logical_ring_contexts = 1, .display.has_gmch = 1, @@ -694,6 +700,7 @@ static const struct intel_device_info skl_gt4_info = { .display.has_psr_hw_tracking = 1, \ .has_runtime_pm = 1, \ .display.has_dmc = 1, \ + .has_rc6 = 1, \ .has_rps = true, \ .display.has_dp_mst = 1, \ .has_logical_ring_contexts = 1, \ @@ -1003,6 +1010,7 @@ static const struct intel_device_info adl_p_info = { .has_logical_ring_contexts = 1, \ .has_logical_ring_elsq = 1, \ .has_mslices = 1, \ + .has_rc6 = 1, \ .has_reset_engine = 1, \ .has_rps = 1, \ .has_runtime_pm = 1, \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index b22004814e88..20de6974406a 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -152,6 +152,7 @@ enum intel_ppgtt_type { func(has_mslices); \ func(has_pooled_eu); \ func(has_pxp); \ + func(has_rc6); \ func(has_rc6p); \ func(has_rps); \ func(has_runtime_pm); \ -- cgit v1.2.3 From 39921e5f00f5a98ff9cb3229937ca339e8d9c9c6 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 19 May 2022 10:08:02 +0100 Subject: Revert "drm/i915: Drop has_gt_uc from device info" This reverts commit 222ff6db8a0dcb86f2bb65fc8656aec635a737a6. Signed-off-by: Tvrtko Ursulin Acked-by: Joonas Lahtinen Acked-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20220519090802.1294691-8-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/i915_pci.c | 3 +++ drivers/gpu/drm/i915/intel_device_info.h | 1 + 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 63451e94fd63..72394a11baf9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1350,7 +1350,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, */ #define HAS_FLAT_CCS(dev_priv) (INTEL_INFO(dev_priv)->has_flat_ccs) -#define HAS_GT_UC(dev_priv) (GRAPHICS_VER(dev_priv) >= 9) +#define HAS_GT_UC(dev_priv) (INTEL_INFO(dev_priv)->has_gt_uc) #define HAS_POOLED_EU(dev_priv) (INTEL_INFO(dev_priv)->has_pooled_eu) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 5bd9cb899852..0512c66fa4f3 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -2008,7 +2008,7 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 du return ERR_PTR(-ENOMEM); } - if (HAS_GT_UC(i915)) { + if (INTEL_INFO(i915)->has_gt_uc) { error->gt->uc = gt_record_uc(error->gt, compress); if (error->gt->uc) { if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 80803ab6e938..664a218030be 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -644,6 +644,7 @@ static const struct intel_device_info chv_info = { GEN(9), \ GEN9_DEFAULT_PAGE_SIZES, \ .display.has_dmc = 1, \ + .has_gt_uc = 1, \ .display.has_hdcp = 1, \ .display.has_ipc = 1, \ .display.has_psr = 1, \ @@ -704,6 +705,7 @@ static const struct intel_device_info skl_gt4_info = { .has_rps = true, \ .display.has_dp_mst = 1, \ .has_logical_ring_contexts = 1, \ + .has_gt_uc = 1, \ .dma_mask_size = 39, \ .ppgtt_type = INTEL_PPGTT_FULL, \ .ppgtt_size = 48, \ @@ -1006,6 +1008,7 @@ static const struct intel_device_info adl_p_info = { .has_64bit_reloc = 1, \ .has_flat_ccs = 1, \ .has_global_mocs = 1, \ + .has_gt_uc = 1, \ .has_llc = 1, \ .has_logical_ring_contexts = 1, \ .has_logical_ring_elsq = 1, \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 20de6974406a..c4ea5c4abfc6 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -141,6 +141,7 @@ enum intel_ppgtt_type { func(has_4tile); \ func(has_flat_ccs); \ func(has_global_mocs); \ + func(has_gt_uc); \ func(has_heci_pxp); \ func(has_heci_gscfi); \ func(has_guc_deprivilege); \ -- cgit v1.2.3 From d63ddca7c58132257fd01e3a4feca41b3bac3089 Mon Sep 17 00:00:00 2001 From: Bommu Krishnaiah Date: Mon, 16 May 2022 10:20:15 +0200 Subject: drm/i915: Update tiled blits selftest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the selftest to include Tile 4 mode and switch to Tile 4 on platforms that supports Tile 4 but no Tile Y and vice versa. Also switch to XY_FAST_COPY_BLT on platforms that supports it. v4: update commit message to reflect the code changes properly. v3: add a function to find X-tile availability for a platform. v2: disable Tile X for iGPU in fastblit and fix checkpath --strict warnings. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5879 Signed-off-by: Bommu Krishnaiah Co-developed-by: Nirmoy Das Signed-off-by: Nirmoy Das Reviewed-by: Zbigniew Kempczyński Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220516082015.32020-1-nirmoy.das@intel.com --- .../drm/i915/gem/selftests/i915_gem_client_blt.c | 250 +++++++++++++++++---- drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 22 ++ 2 files changed, 227 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index ddd0772fd828..3cfc621ef363 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -6,6 +6,7 @@ #include "i915_selftest.h" #include "gt/intel_context.h" +#include "gt/intel_engine_regs.h" #include "gt/intel_engine_user.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" @@ -18,10 +19,71 @@ #include "huge_gem_object.h" #include "mock_context.h" +#define OW_SIZE 16 /* in bytes */ +#define F_SUBTILE_SIZE 64 /* in bytes */ +#define F_TILE_WIDTH 128 /* in bytes */ +#define F_TILE_HEIGHT 32 /* in pixels */ +#define F_SUBTILE_WIDTH OW_SIZE /* in bytes */ +#define F_SUBTILE_HEIGHT 4 /* in pixels */ + +static int linear_x_y_to_ftiled_pos(int x, int y, u32 stride, int bpp) +{ + int tile_base; + int tile_x, tile_y; + int swizzle, subtile; + int pixel_size = bpp / 8; + int pos; + + /* + * Subtile remapping for F tile. Note that map[a]==b implies map[b]==a + * so we can use the same table to tile and until. + */ + static const u8 f_subtile_map[] = { + 0, 1, 2, 3, 8, 9, 10, 11, + 4, 5, 6, 7, 12, 13, 14, 15, + 16, 17, 18, 19, 24, 25, 26, 27, + 20, 21, 22, 23, 28, 29, 30, 31, + 32, 33, 34, 35, 40, 41, 42, 43, + 36, 37, 38, 39, 44, 45, 46, 47, + 48, 49, 50, 51, 56, 57, 58, 59, + 52, 53, 54, 55, 60, 61, 62, 63 + }; + + x *= pixel_size; + /* + * Where does the 4k tile start (in bytes)? This is the same for Y and + * F so we can use the Y-tile algorithm to get to that point. + */ + tile_base = + y / F_TILE_HEIGHT * stride * F_TILE_HEIGHT + + x / F_TILE_WIDTH * 4096; + + /* Find pixel within tile */ + tile_x = x % F_TILE_WIDTH; + tile_y = y % F_TILE_HEIGHT; + + /* And figure out the subtile within the 4k tile */ + subtile = tile_y / F_SUBTILE_HEIGHT * 8 + tile_x / F_SUBTILE_WIDTH; + + /* Swizzle the subtile number according to the bspec diagram */ + swizzle = f_subtile_map[subtile]; + + /* Calculate new position */ + pos = tile_base + + swizzle * F_SUBTILE_SIZE + + tile_y % F_SUBTILE_HEIGHT * OW_SIZE + + tile_x % F_SUBTILE_WIDTH; + + GEM_BUG_ON(!IS_ALIGNED(pos, pixel_size)); + + return pos / pixel_size * 4; +} + enum client_tiling { CLIENT_TILING_LINEAR, CLIENT_TILING_X, CLIENT_TILING_Y, + CLIENT_TILING_4, CLIENT_NUM_TILING_TYPES }; @@ -45,6 +107,36 @@ struct tiled_blits { u32 height; }; +static bool supports_x_tiling(const struct drm_i915_private *i915) +{ + int gen = GRAPHICS_VER(i915); + + if (gen < 12) + return true; + + if (!HAS_LMEM(i915) || IS_DG1(i915)) + return false; + + return true; +} + +static bool fast_blit_ok(const struct blit_buffer *buf) +{ + int gen = GRAPHICS_VER(buf->vma->vm->i915); + + if (gen < 9) + return false; + + if (gen < 12) + return true; + + /* filter out platforms with unsupported X-tile support in fastblit */ + if (buf->tiling == CLIENT_TILING_X && !supports_x_tiling(buf->vma->vm->i915)) + return false; + + return true; +} + static int prepare_blit(const struct tiled_blits *t, struct blit_buffer *dst, struct blit_buffer *src, @@ -59,51 +151,103 @@ static int prepare_blit(const struct tiled_blits *t, if (IS_ERR(cs)) return PTR_ERR(cs); - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = i915_mmio_reg_offset(BCS_SWCTRL); - cmd = (BCS_SRC_Y | BCS_DST_Y) << 16; - if (src->tiling == CLIENT_TILING_Y) - cmd |= BCS_SRC_Y; - if (dst->tiling == CLIENT_TILING_Y) - cmd |= BCS_DST_Y; - *cs++ = cmd; - - cmd = MI_FLUSH_DW; - if (ver >= 8) - cmd++; - *cs++ = cmd; - *cs++ = 0; - *cs++ = 0; - *cs++ = 0; - - cmd = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (8 - 2); - if (ver >= 8) - cmd += 2; - - src_pitch = t->width * 4; - if (src->tiling) { - cmd |= XY_SRC_COPY_BLT_SRC_TILED; - src_pitch /= 4; - } + if (fast_blit_ok(dst) && fast_blit_ok(src)) { + struct intel_gt *gt = t->ce->engine->gt; + u32 src_tiles = 0, dst_tiles = 0; + u32 src_4t = 0, dst_4t = 0; + + /* Need to program BLIT_CCTL if it is not done previously + * before using XY_FAST_COPY_BLT + */ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(BLIT_CCTL(t->ce->engine->mmio_base)); + *cs++ = (BLIT_CCTL_SRC_MOCS(gt->mocs.uc_index) | + BLIT_CCTL_DST_MOCS(gt->mocs.uc_index)); + + src_pitch = t->width; /* in dwords */ + if (src->tiling == CLIENT_TILING_4) { + src_tiles = XY_FAST_COPY_BLT_D0_SRC_TILE_MODE(YMAJOR); + src_4t = XY_FAST_COPY_BLT_D1_SRC_TILE4; + } else if (src->tiling == CLIENT_TILING_Y) { + src_tiles = XY_FAST_COPY_BLT_D0_SRC_TILE_MODE(YMAJOR); + } else if (src->tiling == CLIENT_TILING_X) { + src_tiles = XY_FAST_COPY_BLT_D0_SRC_TILE_MODE(TILE_X); + } else { + src_pitch *= 4; /* in bytes */ + } - dst_pitch = t->width * 4; - if (dst->tiling) { - cmd |= XY_SRC_COPY_BLT_DST_TILED; - dst_pitch /= 4; - } + dst_pitch = t->width; /* in dwords */ + if (dst->tiling == CLIENT_TILING_4) { + dst_tiles = XY_FAST_COPY_BLT_D0_DST_TILE_MODE(YMAJOR); + dst_4t = XY_FAST_COPY_BLT_D1_DST_TILE4; + } else if (dst->tiling == CLIENT_TILING_Y) { + dst_tiles = XY_FAST_COPY_BLT_D0_DST_TILE_MODE(YMAJOR); + } else if (dst->tiling == CLIENT_TILING_X) { + dst_tiles = XY_FAST_COPY_BLT_D0_DST_TILE_MODE(TILE_X); + } else { + dst_pitch *= 4; /* in bytes */ + } - *cs++ = cmd; - *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | dst_pitch; - *cs++ = 0; - *cs++ = t->height << 16 | t->width; - *cs++ = lower_32_bits(dst->vma->node.start); - if (use_64b_reloc) + *cs++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2) | + src_tiles | dst_tiles; + *cs++ = src_4t | dst_4t | BLT_DEPTH_32 | dst_pitch; + *cs++ = 0; + *cs++ = t->height << 16 | t->width; + *cs++ = lower_32_bits(dst->vma->node.start); *cs++ = upper_32_bits(dst->vma->node.start); - *cs++ = 0; - *cs++ = src_pitch; - *cs++ = lower_32_bits(src->vma->node.start); - if (use_64b_reloc) + *cs++ = 0; + *cs++ = src_pitch; + *cs++ = lower_32_bits(src->vma->node.start); *cs++ = upper_32_bits(src->vma->node.start); + } else { + if (ver >= 6) { + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(BCS_SWCTRL); + cmd = (BCS_SRC_Y | BCS_DST_Y) << 16; + if (src->tiling == CLIENT_TILING_Y) + cmd |= BCS_SRC_Y; + if (dst->tiling == CLIENT_TILING_Y) + cmd |= BCS_DST_Y; + *cs++ = cmd; + + cmd = MI_FLUSH_DW; + if (ver >= 8) + cmd++; + *cs++ = cmd; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + } + + cmd = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (8 - 2); + if (ver >= 8) + cmd += 2; + + src_pitch = t->width * 4; + if (src->tiling) { + cmd |= XY_SRC_COPY_BLT_SRC_TILED; + src_pitch /= 4; + } + + dst_pitch = t->width * 4; + if (dst->tiling) { + cmd |= XY_SRC_COPY_BLT_DST_TILED; + dst_pitch /= 4; + } + + *cs++ = cmd; + *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | dst_pitch; + *cs++ = 0; + *cs++ = t->height << 16 | t->width; + *cs++ = lower_32_bits(dst->vma->node.start); + if (use_64b_reloc) + *cs++ = upper_32_bits(dst->vma->node.start); + *cs++ = 0; + *cs++ = src_pitch; + *cs++ = lower_32_bits(src->vma->node.start); + if (use_64b_reloc) + *cs++ = upper_32_bits(src->vma->node.start); + } *cs++ = MI_BATCH_BUFFER_END; @@ -181,7 +325,13 @@ static int tiled_blits_create_buffers(struct tiled_blits *t, t->buffers[i].vma = vma; t->buffers[i].tiling = - i915_prandom_u32_max_state(CLIENT_TILING_Y + 1, prng); + i915_prandom_u32_max_state(CLIENT_NUM_TILING_TYPES, prng); + + /* Platforms support either TileY or Tile4, not both */ + if (HAS_4TILE(i915) && t->buffers[i].tiling == CLIENT_TILING_Y) + t->buffers[i].tiling = CLIENT_TILING_4; + else if (!HAS_4TILE(i915) && t->buffers[i].tiling == CLIENT_TILING_4) + t->buffers[i].tiling = CLIENT_TILING_Y; } return 0; @@ -206,7 +356,8 @@ static u64 swizzle_bit(unsigned int bit, u64 offset) static u64 tiled_offset(const struct intel_gt *gt, u64 v, unsigned int stride, - enum client_tiling tiling) + enum client_tiling tiling, + int x_pos, int y_pos) { unsigned int swizzle; u64 x, y; @@ -216,7 +367,12 @@ static u64 tiled_offset(const struct intel_gt *gt, y = div64_u64_rem(v, stride, &x); - if (tiling == CLIENT_TILING_X) { + if (tiling == CLIENT_TILING_4) { + v = linear_x_y_to_ftiled_pos(x_pos, y_pos, stride, 32); + + /* no swizzling for f-tiling */ + swizzle = I915_BIT_6_SWIZZLE_NONE; + } else if (tiling == CLIENT_TILING_X) { v = div64_u64_rem(y, 8, &y) * stride * 8; v += y * 512; v += div64_u64_rem(x, 512, &x) << 12; @@ -259,6 +415,7 @@ static const char *repr_tiling(enum client_tiling tiling) case CLIENT_TILING_LINEAR: return "linear"; case CLIENT_TILING_X: return "X"; case CLIENT_TILING_Y: return "Y"; + case CLIENT_TILING_4: return "F"; default: return "unknown"; } } @@ -284,7 +441,7 @@ static int verify_buffer(const struct tiled_blits *t, } else { u64 v = tiled_offset(buf->vma->vm->gt, p * 4, t->width * 4, - buf->tiling); + buf->tiling, x, y); if (vaddr[v / sizeof(*vaddr)] != buf->start_val + p) ret = -EINVAL; @@ -504,6 +661,9 @@ static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) if (err) return err; + /* Simulating GTT eviction of the same buffer / layout */ + t->buffers[2].tiling = t->buffers[0].tiling; + /* Reposition so that we overlap the old addresses, and slightly off */ err = tiled_blit(t, &t->buffers[2], t->hole + t->align, diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 556bca3be804..246ab8f7bf57 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -236,6 +236,28 @@ #define XY_FAST_COLOR_BLT_DW 16 #define XY_FAST_COLOR_BLT_MOCS_MASK GENMASK(27, 21) #define XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31 + +#define XY_FAST_COPY_BLT_D0_SRC_TILING_MASK REG_GENMASK(21, 20) +#define XY_FAST_COPY_BLT_D0_DST_TILING_MASK REG_GENMASK(14, 13) +#define XY_FAST_COPY_BLT_D0_SRC_TILE_MODE(mode) \ + REG_FIELD_PREP(XY_FAST_COPY_BLT_D0_SRC_TILING_MASK, mode) +#define XY_FAST_COPY_BLT_D0_DST_TILE_MODE(mode) \ + REG_FIELD_PREP(XY_FAST_COPY_BLT_D0_DST_TILING_MASK, mode) +#define LINEAR 0 +#define TILE_X 0x1 +#define XMAJOR 0x1 +#define YMAJOR 0x2 +#define TILE_64 0x3 +#define XY_FAST_COPY_BLT_D1_SRC_TILE4 REG_BIT(31) +#define XY_FAST_COPY_BLT_D1_DST_TILE4 REG_BIT(30) +#define BLIT_CCTL_SRC_MOCS_MASK REG_GENMASK(6, 0) +#define BLIT_CCTL_DST_MOCS_MASK REG_GENMASK(14, 8) +/* Note: MOCS value = (index << 1) */ +#define BLIT_CCTL_SRC_MOCS(idx) \ + REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, (idx) << 1) +#define BLIT_CCTL_DST_MOCS(idx) \ + REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, (idx) << 1) + #define SRC_COPY_BLT_CMD (2 << 29 | 0x43 << 22) #define GEN9_XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22) #define XY_SRC_COPY_BLT_CMD (2 << 29 | 0x53 << 22) -- cgit v1.2.3 From d32e8ed918ba7384fda9055ebb31b89b3eadb517 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 10 May 2022 23:02:24 -0700 Subject: drm/i915/uncore: Reorganize and document shadow and forcewake tables Let's reorganize some of the forcewake/shadow handling in intel_uncore.c and consolidate the cargo-cult comments on each table into more general comments that apply to all tables. We'll probably move forcewake handling to its own dedicated file in the near future and further enhance this with true kerneldoc. But this is a good intermediate step to help clarify the behavior a bit. Cc: Stuart Summers Signed-off-by: Matt Roper Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20220511060228.1179450-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 124 +++++++++++++++++++++++------------- 1 file changed, 79 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 83517a703eb6..632327f62dfa 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -938,36 +938,32 @@ find_fw_domain(struct intel_uncore *uncore, u32 offset) return entry->domains; } -#define GEN_FW_RANGE(s, e, d) \ - { .start = (s), .end = (e), .domains = (d) } - -/* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ -static const struct intel_forcewake_range __vlv_fw_ranges[] = { - GEN_FW_RANGE(0x2000, 0x3fff, FORCEWAKE_RENDER), - GEN_FW_RANGE(0x5000, 0x7fff, FORCEWAKE_RENDER), - GEN_FW_RANGE(0xb000, 0x11fff, FORCEWAKE_RENDER), - GEN_FW_RANGE(0x12000, 0x13fff, FORCEWAKE_MEDIA), - GEN_FW_RANGE(0x22000, 0x23fff, FORCEWAKE_MEDIA), - GEN_FW_RANGE(0x2e000, 0x2ffff, FORCEWAKE_RENDER), - GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_MEDIA), -}; - -#define __fwtable_reg_read_fw_domains(uncore, offset) \ -({ \ - enum forcewake_domains __fwd = 0; \ - if (NEEDS_FORCE_WAKE((offset))) \ - __fwd = find_fw_domain(uncore, offset); \ - __fwd; \ -}) +/* + * Shadowed register tables describe special register ranges that i915 is + * allowed to write to without acquiring forcewake. If these registers' power + * wells are down, the hardware will save values written by i915 to a shadow + * copy and automatically transfer them into the real register the next time + * the power well is woken up. Shadowing only applies to writes; forcewake + * must still be acquired when reading from registers in these ranges. + * + * The documentation for shadowed registers is somewhat spotty on older + * platforms. However missing registers from these lists is non-fatal; it just + * means we'll wake up the hardware for some register accesses where we didn't + * really need to. + * + * The ranges listed in these tables must be sorted by offset. + * + * When adding new tables here, please also add them to + * intel_shadow_table_check() in selftests/intel_uncore.c so that they will be + * scanned for obvious mistakes or typos by the selftests. + */ -/* *Must* be sorted by offset! See intel_shadow_table_check(). */ static const struct i915_range gen8_shadowed_regs[] = { { .start = 0x2030, .end = 0x2030 }, { .start = 0xA008, .end = 0xA00C }, { .start = 0x12030, .end = 0x12030 }, { .start = 0x1a030, .end = 0x1a030 }, { .start = 0x22030, .end = 0x22030 }, - /* TODO: Other registers are not yet used */ }; static const struct i915_range gen11_shadowed_regs[] = { @@ -1107,11 +1103,70 @@ gen6_reg_write_fw_domains(struct intel_uncore *uncore, i915_reg_t reg) return FORCEWAKE_RENDER; } +#define __fwtable_reg_read_fw_domains(uncore, offset) \ +({ \ + enum forcewake_domains __fwd = 0; \ + if (NEEDS_FORCE_WAKE((offset))) \ + __fwd = find_fw_domain(uncore, offset); \ + __fwd; \ +}) + +#define __fwtable_reg_write_fw_domains(uncore, offset) \ +({ \ + enum forcewake_domains __fwd = 0; \ + const u32 __offset = (offset); \ + if (NEEDS_FORCE_WAKE((__offset)) && !is_shadowed(uncore, __offset)) \ + __fwd = find_fw_domain(uncore, __offset); \ + __fwd; \ +}) + +#define GEN_FW_RANGE(s, e, d) \ + { .start = (s), .end = (e), .domains = (d) } + +/* + * All platforms' forcewake tables below must be sorted by offset ranges. + * Furthermore, new forcewake tables added should be "watertight" and have + * no gaps between ranges. + * + * When there are multiple consecutive ranges listed in the bspec with + * the same forcewake domain, it is customary to combine them into a single + * row in the tables below to keep the tables small and lookups fast. + * Likewise, reserved/unused ranges may be combined with the preceding and/or + * following ranges since the driver will never be making MMIO accesses in + * those ranges. + * + * For example, if the bspec were to list: + * + * ... + * 0x1000 - 0x1fff: GT + * 0x2000 - 0x2cff: GT + * 0x2d00 - 0x2fff: unused/reserved + * 0x3000 - 0xffff: GT + * ... + * + * these could all be represented by a single line in the code: + * + * GEN_FW_RANGE(0x1000, 0xffff, FORCEWAKE_GT) + * + * When adding new forcewake tables here, please also add them to + * intel_uncore_mock_selftests in selftests/intel_uncore.c so that they will be + * scanned for obvious mistakes or typos by the selftests. + */ + static const struct intel_forcewake_range __gen6_fw_ranges[] = { GEN_FW_RANGE(0x0, 0x3ffff, FORCEWAKE_RENDER), }; -/* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ +static const struct intel_forcewake_range __vlv_fw_ranges[] = { + GEN_FW_RANGE(0x2000, 0x3fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x5000, 0x7fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0xb000, 0x11fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x12000, 0x13fff, FORCEWAKE_MEDIA), + GEN_FW_RANGE(0x22000, 0x23fff, FORCEWAKE_MEDIA), + GEN_FW_RANGE(0x2e000, 0x2ffff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_MEDIA), +}; + static const struct intel_forcewake_range __chv_fw_ranges[] = { GEN_FW_RANGE(0x2000, 0x3fff, FORCEWAKE_RENDER), GEN_FW_RANGE(0x4000, 0x4fff, FORCEWAKE_RENDER | FORCEWAKE_MEDIA), @@ -1131,16 +1186,6 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = { GEN_FW_RANGE(0x30000, 0x37fff, FORCEWAKE_MEDIA), }; -#define __fwtable_reg_write_fw_domains(uncore, offset) \ -({ \ - enum forcewake_domains __fwd = 0; \ - const u32 __offset = (offset); \ - if (NEEDS_FORCE_WAKE((__offset)) && !is_shadowed(uncore, __offset)) \ - __fwd = find_fw_domain(uncore, __offset); \ - __fwd; \ -}) - -/* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ static const struct intel_forcewake_range __gen9_fw_ranges[] = { GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_GT), GEN_FW_RANGE(0xb00, 0x1fff, 0), /* uncore range */ @@ -1176,7 +1221,6 @@ static const struct intel_forcewake_range __gen9_fw_ranges[] = { GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_MEDIA), }; -/* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ static const struct intel_forcewake_range __gen11_fw_ranges[] = { GEN_FW_RANGE(0x0, 0x1fff, 0), /* uncore range */ GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER), @@ -1215,14 +1259,6 @@ static const struct intel_forcewake_range __gen11_fw_ranges[] = { GEN_FW_RANGE(0x1d4000, 0x1dbfff, 0) }; -/* - * *Must* be sorted by offset ranges! See intel_fw_table_check(). - * - * Note that the spec lists several reserved/unused ranges that don't - * actually contain any registers. In the table below we'll combine those - * reserved ranges with either the preceding or following range to keep the - * table small and lookups fast. - */ static const struct intel_forcewake_range __gen12_fw_ranges[] = { GEN_FW_RANGE(0x0, 0x1fff, 0), /* 0x0 - 0xaff: reserved @@ -1327,8 +1363,6 @@ static const struct intel_forcewake_range __gen12_fw_ranges[] = { /* * Graphics IP version 12.55 brings a slight change to the 0xd800 range, * switching it from the GT domain to the render domain. - * - * *Must* be sorted by offset ranges! See intel_fw_table_check(). */ #define XEHP_FWRANGES(FW_RANGE_D800) \ GEN_FW_RANGE(0x0, 0x1fff, 0), /* \ -- cgit v1.2.3 From fb289464f695f9d913479b4ebae9e39b057f8531 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 10 May 2022 23:02:25 -0700 Subject: drm/i915/pvc: Add forcewake support Add PVC's forcewake ranges. v2: - Drop replicated comment completely; move general cleanup of the documentation to a separate patch. Bspec: 67609 Cc: Daniele Ceraolo Spurio Cc: Stuart Summers Signed-off-by: Matt Roper Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20220511060228.1179450-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 142 +++++++++++++++++++++++++- drivers/gpu/drm/i915/selftests/intel_uncore.c | 2 + 2 files changed, 143 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 632327f62dfa..33304eb987e4 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1076,6 +1076,45 @@ static const struct i915_range dg2_shadowed_regs[] = { { .start = 0x1F8510, .end = 0x1F8550 }, }; +static const struct i915_range pvc_shadowed_regs[] = { + { .start = 0x2030, .end = 0x2030 }, + { .start = 0x2510, .end = 0x2550 }, + { .start = 0xA008, .end = 0xA00C }, + { .start = 0xA188, .end = 0xA188 }, + { .start = 0xA278, .end = 0xA278 }, + { .start = 0xA540, .end = 0xA56C }, + { .start = 0xC4C8, .end = 0xC4C8 }, + { .start = 0xC4E0, .end = 0xC4E0 }, + { .start = 0xC600, .end = 0xC600 }, + { .start = 0xC658, .end = 0xC658 }, + { .start = 0x22030, .end = 0x22030 }, + { .start = 0x22510, .end = 0x22550 }, + { .start = 0x1C0030, .end = 0x1C0030 }, + { .start = 0x1C0510, .end = 0x1C0550 }, + { .start = 0x1C4030, .end = 0x1C4030 }, + { .start = 0x1C4510, .end = 0x1C4550 }, + { .start = 0x1C8030, .end = 0x1C8030 }, + { .start = 0x1C8510, .end = 0x1C8550 }, + { .start = 0x1D0030, .end = 0x1D0030 }, + { .start = 0x1D0510, .end = 0x1D0550 }, + { .start = 0x1D4030, .end = 0x1D4030 }, + { .start = 0x1D4510, .end = 0x1D4550 }, + { .start = 0x1D8030, .end = 0x1D8030 }, + { .start = 0x1D8510, .end = 0x1D8550 }, + { .start = 0x1E0030, .end = 0x1E0030 }, + { .start = 0x1E0510, .end = 0x1E0550 }, + { .start = 0x1E4030, .end = 0x1E4030 }, + { .start = 0x1E4510, .end = 0x1E4550 }, + { .start = 0x1E8030, .end = 0x1E8030 }, + { .start = 0x1E8510, .end = 0x1E8550 }, + { .start = 0x1F0030, .end = 0x1F0030 }, + { .start = 0x1F0510, .end = 0x1F0550 }, + { .start = 0x1F4030, .end = 0x1F4030 }, + { .start = 0x1F4510, .end = 0x1F4550 }, + { .start = 0x1F8030, .end = 0x1F8030 }, + { .start = 0x1F8510, .end = 0x1F8550 }, +}; + static int mmio_range_cmp(u32 key, const struct i915_range *range) { if (key < range->start) @@ -1524,6 +1563,103 @@ static const struct intel_forcewake_range __dg2_fw_ranges[] = { XEHP_FWRANGES(FORCEWAKE_RENDER) }; +static const struct intel_forcewake_range __pvc_fw_ranges[] = { + GEN_FW_RANGE(0x0, 0xaff, 0), + GEN_FW_RANGE(0xb00, 0xbff, FORCEWAKE_GT), + GEN_FW_RANGE(0xc00, 0xfff, 0), + GEN_FW_RANGE(0x1000, 0x1fff, FORCEWAKE_GT), + GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x2700, 0x2fff, FORCEWAKE_GT), + GEN_FW_RANGE(0x3000, 0x3fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x4000, 0x813f, FORCEWAKE_GT), /* + 0x4000 - 0x4aff: gt + 0x4b00 - 0x4fff: reserved + 0x5000 - 0x51ff: gt + 0x5200 - 0x52ff: reserved + 0x5300 - 0x53ff: gt + 0x5400 - 0x7fff: reserved + 0x8000 - 0x813f: gt */ + GEN_FW_RANGE(0x8140, 0x817f, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8180, 0x81ff, 0), + GEN_FW_RANGE(0x8200, 0x94cf, FORCEWAKE_GT), /* + 0x8200 - 0x82ff: gt + 0x8300 - 0x84ff: reserved + 0x8500 - 0x887f: gt + 0x8880 - 0x8a7f: reserved + 0x8a80 - 0x8aff: gt + 0x8b00 - 0x8fff: reserved + 0x9000 - 0x947f: gt + 0x9480 - 0x94cf: reserved */ + GEN_FW_RANGE(0x94d0, 0x955f, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x9560, 0x967f, 0), /* + 0x9560 - 0x95ff: always on + 0x9600 - 0x967f: reserved */ + GEN_FW_RANGE(0x9680, 0x97ff, FORCEWAKE_RENDER), /* + 0x9680 - 0x96ff: render + 0x9700 - 0x97ff: reserved */ + GEN_FW_RANGE(0x9800, 0xcfff, FORCEWAKE_GT), /* + 0x9800 - 0xb4ff: gt + 0xb500 - 0xbfff: reserved + 0xc000 - 0xcfff: gt */ + GEN_FW_RANGE(0xd000, 0xd3ff, 0), + GEN_FW_RANGE(0xd400, 0xdbff, FORCEWAKE_GT), + GEN_FW_RANGE(0xdc00, 0xdcff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0xdd00, 0xde7f, FORCEWAKE_GT), /* + 0xdd00 - 0xddff: gt + 0xde00 - 0xde7f: reserved */ + GEN_FW_RANGE(0xde80, 0xe8ff, FORCEWAKE_RENDER), /* + 0xde80 - 0xdeff: render + 0xdf00 - 0xe1ff: reserved + 0xe200 - 0xe7ff: render + 0xe800 - 0xe8ff: reserved */ + GEN_FW_RANGE(0xe900, 0x11fff, FORCEWAKE_GT), /* + 0xe900 - 0xe9ff: gt + 0xea00 - 0xebff: reserved + 0xec00 - 0xffff: gt + 0x10000 - 0x11fff: reserved */ + GEN_FW_RANGE(0x12000, 0x12fff, 0), /* + 0x12000 - 0x127ff: always on + 0x12800 - 0x12fff: reserved */ + GEN_FW_RANGE(0x13000, 0x23fff, FORCEWAKE_GT), /* + 0x13000 - 0x135ff: gt + 0x13600 - 0x147ff: reserved + 0x14800 - 0x153ff: gt + 0x15400 - 0x19fff: reserved + 0x1a000 - 0x1ffff: gt + 0x20000 - 0x21fff: reserved + 0x22000 - 0x23fff: gt */ + GEN_FW_RANGE(0x24000, 0x2417f, 0), /* + 24000 - 0x2407f: always on + 24080 - 0x2417f: reserved */ + GEN_FW_RANGE(0x24180, 0x3ffff, FORCEWAKE_GT), /* + 0x24180 - 0x241ff: gt + 0x24200 - 0x251ff: reserved + 0x25200 - 0x252ff: gt + 0x25300 - 0x25fff: reserved + 0x26000 - 0x27fff: gt + 0x28000 - 0x2ffff: reserved + 0x30000 - 0x3ffff: gt */ + GEN_FW_RANGE(0x40000, 0x1bffff, 0), + GEN_FW_RANGE(0x1c0000, 0x1c3fff, FORCEWAKE_MEDIA_VDBOX0), /* + 0x1c0000 - 0x1c2bff: VD0 + 0x1c2c00 - 0x1c2cff: reserved + 0x1c2d00 - 0x1c2dff: VD0 + 0x1c2e00 - 0x1c3eff: reserved + 0x1c3f00 - 0x1c3fff: VD0 */ + GEN_FW_RANGE(0x1c4000, 0x1cffff, FORCEWAKE_MEDIA_VDBOX1), /* + 0x1c4000 - 0x1c6aff: VD1 + 0x1c6b00 - 0x1c7eff: reserved + 0x1c7f00 - 0x1c7fff: VD1 + 0x1c8000 - 0x1cffff: reserved */ + GEN_FW_RANGE(0x1d0000, 0x23ffff, FORCEWAKE_MEDIA_VDBOX2), /* + 0x1d0000 - 0x1d2aff: VD2 + 0x1d2b00 - 0x1d3eff: reserved + 0x1d3f00 - 0x1d3fff: VD2 + 0x1d4000 - 0x23ffff: reserved */ + GEN_FW_RANGE(0x240000, 0x3dffff, 0), + GEN_FW_RANGE(0x3e0000, 0x3effff, FORCEWAKE_GT), +}; + static void ilk_dummy_write(struct intel_uncore *uncore) { @@ -2159,7 +2295,11 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 60)) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __pvc_fw_ranges); + ASSIGN_SHADOW_TABLE(uncore, pvc_shadowed_regs); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); + } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __dg2_fw_ranges); ASSIGN_SHADOW_TABLE(uncore, dg2_shadowed_regs); ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index cdd196783535..fda9bb79c049 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -69,6 +69,7 @@ static int intel_shadow_table_check(void) { gen11_shadowed_regs, ARRAY_SIZE(gen11_shadowed_regs) }, { gen12_shadowed_regs, ARRAY_SIZE(gen12_shadowed_regs) }, { dg2_shadowed_regs, ARRAY_SIZE(dg2_shadowed_regs) }, + { pvc_shadowed_regs, ARRAY_SIZE(pvc_shadowed_regs) }, }; const struct i915_range *range; unsigned int i, j; @@ -115,6 +116,7 @@ int intel_uncore_mock_selftests(void) { __gen11_fw_ranges, ARRAY_SIZE(__gen11_fw_ranges), true }, { __gen12_fw_ranges, ARRAY_SIZE(__gen12_fw_ranges), true }, { __xehp_fw_ranges, ARRAY_SIZE(__xehp_fw_ranges), true }, + { __pvc_fw_ranges, ARRAY_SIZE(__pvc_fw_ranges), true }, }; int err, i; -- cgit v1.2.3 From 1eb31338994889ac34c4f841e5d54bf10111741b Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Tue, 10 May 2022 23:02:26 -0700 Subject: drm/i915/pvc: Remove additional 3D flags from PIPE_CONTROL Although we already strip 3D-specific flags from PIPE_CONTROL instructions when submitting to a compute engine, there are some additional flags that need to be removed when the platform as a whole lacks a 3D pipeline. Add those restrictions here. v2: - Replace LACKS_3D_PIPELINE checks with !HAS_3D_PIPELINE and add has_3d_pipeline to all platforms except PVC. (Lucas) Bspec: 47112 Cc: Lucas De Marchi Signed-off-by: Stuart Summers Signed-off-by: Matt Roper Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20220511060228.1179450-4-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 18 ++++++++++++------ drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 15 +++++++++++++-- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_pci.c | 10 ++++++++++ drivers/gpu/drm/i915/intel_device_info.h | 1 + 5 files changed, 38 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index daa1a61972f4..98645797962f 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -197,8 +197,10 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) flags |= PIPE_CONTROL_CS_STALL; - if (engine->class == COMPUTE_CLASS) - flags &= ~PIPE_CONTROL_3D_FLAGS; + if (!HAS_3D_PIPELINE(engine->i915)) + flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS; + else if (engine->class == COMPUTE_CLASS) + flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) @@ -227,8 +229,10 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) flags |= PIPE_CONTROL_CS_STALL; - if (engine->class == COMPUTE_CLASS) - flags &= ~PIPE_CONTROL_3D_FLAGS; + if (!HAS_3D_PIPELINE(engine->i915)) + flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS; + else if (engine->class == COMPUTE_CLASS) + flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; if (!HAS_FLAT_CCS(rq->engine->i915)) count = 8 + 4; @@ -717,8 +721,10 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) /* Wa_1409600907 */ flags |= PIPE_CONTROL_DEPTH_STALL; - if (rq->engine->class == COMPUTE_CLASS) - flags &= ~PIPE_CONTROL_3D_FLAGS; + if (!HAS_3D_PIPELINE(rq->engine->i915)) + flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS; + else if (rq->engine->class == COMPUTE_CLASS) + flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; cs = gen12_emit_ggtt_write_rcs(cs, rq->fence.seqno, diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 246ab8f7bf57..d4e9702d3c8e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -310,8 +310,11 @@ #define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0) #define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */ -/* 3D-related flags can't be set on compute engine */ -#define PIPE_CONTROL_3D_FLAGS (\ +/* + * 3D-related flags that can't be set on _engines_ that lack access to the 3D + * pipeline (i.e., CCS engines). + */ +#define PIPE_CONTROL_3D_ENGINE_FLAGS (\ PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \ PIPE_CONTROL_DEPTH_CACHE_FLUSH | \ PIPE_CONTROL_TILE_CACHE_FLUSH | \ @@ -322,6 +325,14 @@ PIPE_CONTROL_VF_CACHE_INVALIDATE | \ PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET) +/* 3D-related flags that can't be set on _platforms_ that lack a 3D pipeline */ +#define PIPE_CONTROL_3D_ARCH_FLAGS ( \ + PIPE_CONTROL_3D_ENGINE_FLAGS | \ + PIPE_CONTROL_INDIRECT_STATE_DISABLE | \ + PIPE_CONTROL_FLUSH_ENABLE | \ + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \ + PIPE_CONTROL_DC_FLUSH_ENABLE) + #define MI_MATH(x) MI_INSTR(0x1a, (x) - 1) #define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2)) /* Opcodes for MI_MATH_INSTR */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c5fc402d9c50..11ef106dba93 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1405,6 +1405,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_MBUS_JOINING(i915) (IS_ALDERLAKE_P(i915)) +#define HAS_3D_PIPELINE(i915) (INTEL_INFO(i915)->has_3d_pipeline) + /* i915_gem.c */ void i915_gem_init_early(struct drm_i915_private *dev_priv); void i915_gem_cleanup_early(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index b9f474d3bec4..269d7c8f2f81 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -171,6 +171,7 @@ .display.overlay_needs_physical = 1, \ .display.has_gmch = 1, \ .gpu_reset_clobbers_display = true, \ + .has_3d_pipeline = 1, \ .hws_needs_physical = 1, \ .unfenced_needs_alignment = 1, \ .platform_engine_mask = BIT(RCS0), \ @@ -190,6 +191,7 @@ .display.has_overlay = 1, \ .display.overlay_needs_physical = 1, \ .display.has_gmch = 1, \ + .has_3d_pipeline = 1, \ .gpu_reset_clobbers_display = true, \ .hws_needs_physical = 1, \ .unfenced_needs_alignment = 1, \ @@ -232,6 +234,7 @@ static const struct intel_device_info i865g_info = { .display.has_gmch = 1, \ .gpu_reset_clobbers_display = true, \ .platform_engine_mask = BIT(RCS0), \ + .has_3d_pipeline = 1, \ .has_snoop = true, \ .has_coherent_ggtt = true, \ .dma_mask_size = 32, \ @@ -323,6 +326,7 @@ static const struct intel_device_info pnv_m_info = { .display.has_gmch = 1, \ .gpu_reset_clobbers_display = true, \ .platform_engine_mask = BIT(RCS0), \ + .has_3d_pipeline = 1, \ .has_snoop = true, \ .has_coherent_ggtt = true, \ .dma_mask_size = 36, \ @@ -374,6 +378,7 @@ static const struct intel_device_info gm45_info = { .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), \ .display.has_hotplug = 1, \ .platform_engine_mask = BIT(RCS0) | BIT(VCS0), \ + .has_3d_pipeline = 1, \ .has_snoop = true, \ .has_coherent_ggtt = true, \ /* ilk does support rc6, but we do not implement [power] contexts */ \ @@ -405,6 +410,7 @@ static const struct intel_device_info ilk_m_info = { .display.has_hotplug = 1, \ .display.fbc_mask = BIT(INTEL_FBC_A), \ .platform_engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \ + .has_3d_pipeline = 1, \ .has_coherent_ggtt = true, \ .has_llc = 1, \ .has_rc6 = 1, \ @@ -456,6 +462,7 @@ static const struct intel_device_info snb_m_gt2_info = { .display.has_hotplug = 1, \ .display.fbc_mask = BIT(INTEL_FBC_A), \ .platform_engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \ + .has_3d_pipeline = 1, \ .has_coherent_ggtt = true, \ .has_llc = 1, \ .has_rc6 = 1, \ @@ -692,6 +699,7 @@ static const struct intel_device_info skl_gt4_info = { .display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ BIT(TRANSCODER_C) | BIT(TRANSCODER_EDP) | \ BIT(TRANSCODER_DSI_A) | BIT(TRANSCODER_DSI_C), \ + .has_3d_pipeline = 1, \ .has_64bit_reloc = 1, \ .display.has_ddi = 1, \ .display.has_fpga_dbg = 1, \ @@ -1005,6 +1013,7 @@ static const struct intel_device_info adl_p_info = { .graphics.rel = 50, \ XE_HP_PAGE_SIZES, \ .dma_mask_size = 46, \ + .has_3d_pipeline = 1, \ .has_64bit_reloc = 1, \ .has_flat_ccs = 1, \ .has_global_mocs = 1, \ @@ -1080,6 +1089,7 @@ static const struct intel_device_info ats_m_info = { #define XE_HPC_FEATURES \ XE_HP_FEATURES, \ .dma_mask_size = 52, \ + .has_3d_pipeline = 0, \ .has_l3_ccs_read = 1 __maybe_unused diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index a134914237dd..4e1c80966ab5 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -143,6 +143,7 @@ enum intel_ppgtt_type { func(needs_compact_pt); \ func(gpu_reset_clobbers_display); \ func(has_reset_engine); \ + func(has_3d_pipeline); \ func(has_4tile); \ func(has_flat_ccs); \ func(has_global_mocs); \ -- cgit v1.2.3 From e41388d508a50ddb2be5c2676f1992353ca2f155 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 10 May 2022 23:02:27 -0700 Subject: drm/i915/pvc: Add new BCS engines to GuC engine list Initialize ADS system info to reflect the availability of new BCS engines Original-author: CQ Tang Cc: Stuart Summers Cc: John Harrison Signed-off-by: Matt Roper Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20220511060228.1179450-5-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 3eabf4cf8eec..bb197610fd5b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -457,7 +457,7 @@ static void fill_engine_enable_masks(struct intel_gt *gt, { info_map_write(info_map, engine_enabled_masks[GUC_RENDER_CLASS], RCS_MASK(gt)); info_map_write(info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], CCS_MASK(gt)); - info_map_write(info_map, engine_enabled_masks[GUC_BLITTER_CLASS], 1); + info_map_write(info_map, engine_enabled_masks[GUC_BLITTER_CLASS], BCS_MASK(gt)); info_map_write(info_map, engine_enabled_masks[GUC_VIDEO_CLASS], VDBOX_MASK(gt)); info_map_write(info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS], VEBOX_MASK(gt)); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 11ef106dba93..74b3caccd839 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1223,6 +1223,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, }) #define RCS_MASK(gt) \ ENGINE_INSTANCES_MASK(gt, RCS0, I915_MAX_RCS) +#define BCS_MASK(gt) \ + ENGINE_INSTANCES_MASK(gt, BCS0, I915_MAX_BCS) #define VDBOX_MASK(gt) \ ENGINE_INSTANCES_MASK(gt, VCS0, I915_MAX_VCS) #define VEBOX_MASK(gt) \ -- cgit v1.2.3 From a4f263f46961b1229b10ca3a98f0be618a9d47ac Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 10 May 2022 23:02:28 -0700 Subject: drm/i915/guc: XEHPSDV and PVC do not use HuC Disable HuC loading since it is not used on these platforms. Cc: Stuart Summers Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Matt Roper Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20220511060228.1179450-6-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 3c3527cb0007..f2e7c82985ef 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -45,6 +45,10 @@ static void uc_expand_default_options(struct intel_uc *uc) /* Default: enable HuC authentication and GuC submission */ i915->params.enable_guc = ENABLE_GUC_LOAD_HUC | ENABLE_GUC_SUBMISSION; + + /* XEHPSDV and PVC do not use HuC */ + if (IS_XEHPSDV(i915) || IS_PONTEVECCHIO(i915)) + i915->params.enable_guc &= ~ENABLE_GUC_LOAD_HUC; } /* Reset GuC providing us with fresh state for both GuC and HuC. -- cgit v1.2.3 From 837c72b23a57e15dd363d5f3f3f268c49c128740 Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Mon, 23 May 2022 13:21:16 +0530 Subject: drm/i915/hwconfig: Report no hwconfig support on ADL-N ADL-N being a subplatform of ADL-P, it lacks support for hwconfig table. Explicit check added to skip ADL-N. Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Matt Roper Reviewed-by: Andrzej Hajda Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20220523075116.207677-1-balasubramani.vivekanandan@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c index 79c66b6b51a3..5aaa3948de74 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c @@ -94,7 +94,7 @@ static int guc_hwconfig_fill_buffer(struct intel_guc *guc, struct intel_hwconfig static bool has_table(struct drm_i915_private *i915) { - if (IS_ALDERLAKE_P(i915)) + if (IS_ALDERLAKE_P(i915) && !IS_ADLP_N(i915)) return true; if (IS_DG2(i915)) return true; -- cgit v1.2.3 From 16e214d4aef2d600ef94e845530a28ce795b8fcc Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 24 May 2022 16:59:06 -0700 Subject: drm/i915/hwconfig: Future-proof platform checks PVC also has a hwconfig table. Actually the current expectation is that all future platforms will have hwconfig, so let's just change the condition to an IP version check so that we don't need to keep updating this for each new platform that shows up. Cc: John Harrison Cc: Radhakrishna Sripada Signed-off-by: Matt Roper Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220524235906.529771-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c index 5aaa3948de74..4781fccc2687 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c @@ -96,7 +96,7 @@ static bool has_table(struct drm_i915_private *i915) { if (IS_ALDERLAKE_P(i915) && !IS_ADLP_N(i915)) return true; - if (IS_DG2(i915)) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) return true; return false; -- cgit v1.2.3 From 3304033a1e69cd81a2044b4422f0d7e593afb4e6 Mon Sep 17 00:00:00 2001 From: Alan Previn Date: Thu, 10 Mar 2022 16:43:11 -0800 Subject: drm/i915/reset: Fix error_state_read ptr + offset use Fix our pointer offset usage in error_state_read when there is no i915_gpu_coredump but buf offset is non-zero. This fixes a kernel page fault can happen when multiple tests are running concurrently in a loop and one is producing engine resets and consuming the i915 error_state dump while the other is forcing full GT resets. (takes a while to trigger). The dmesg call trace: [ 5590.803000] BUG: unable to handle page fault for address: ffffffffa0b0e000 [ 5590.803009] #PF: supervisor read access in kernel mode [ 5590.803013] #PF: error_code(0x0000) - not-present page [ 5590.803016] PGD 5814067 P4D 5814067 PUD 5815063 PMD 109de4067 PTE 0 [ 5590.803022] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 5590.803026] CPU: 5 PID: 13656 Comm: i915_hangman Tainted: G U 5.17.0-rc5-ups69-guc-err-capt-rev6+ #136 [ 5590.803033] Hardware name: Intel Corporation Alder Lake Client Platform/AlderLake-M LP4x RVP, BIOS ADLPFWI1.R00. 3031.A02.2201171222 01/17/2022 [ 5590.803039] RIP: 0010:memcpy_erms+0x6/0x10 [ 5590.803045] Code: fe ff ff cc eb 1e 0f 1f 00 48 89 f8 48 89 d1 48 c1 e9 03 83 e2 07 f3 48 a5 89 d1 f3 a4 c3 66 0f 1f 44 00 00 48 89 f8 48 89 d1 a4 c3 0f 1f 80 00 00 00 00 48 89 f8 48 83 fa 20 72 7e 40 38 fe [ 5590.803054] RSP: 0018:ffffc90003a8fdf0 EFLAGS: 00010282 [ 5590.803057] RAX: ffff888107ee9000 RBX: ffff888108cb1a00 RCX: 0000000000000f8f [ 5590.803061] RDX: 0000000000001000 RSI: ffffffffa0b0e000 RDI: ffff888107ee9071 [ 5590.803065] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000001 [ 5590.803069] R10: 0000000000000001 R11: 0000000000000002 R12: 0000000000000019 [ 5590.803073] R13: 0000000000174fff R14: 0000000000001000 R15: ffff888107ee9000 [ 5590.803077] FS: 00007f62a99bee80(0000) GS:ffff88849f880000(0000) knlGS:0000000000000000 [ 5590.803082] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 5590.803085] CR2: ffffffffa0b0e000 CR3: 000000010a1a8004 CR4: 0000000000770ee0 [ 5590.803089] PKRU: 55555554 [ 5590.803091] Call Trace: [ 5590.803093] [ 5590.803096] error_state_read+0xa1/0xd0 [i915] [ 5590.803175] kernfs_fop_read_iter+0xb2/0x1b0 [ 5590.803180] new_sync_read+0x116/0x1a0 [ 5590.803185] vfs_read+0x114/0x1b0 [ 5590.803189] ksys_read+0x63/0xe0 [ 5590.803193] do_syscall_64+0x38/0xc0 [ 5590.803197] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 5590.803201] RIP: 0033:0x7f62aaea5912 [ 5590.803204] Code: c0 e9 b2 fe ff ff 50 48 8d 3d 5a b9 0c 00 e8 05 19 02 00 0f 1f 44 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77 56 c3 0f 1f 44 00 00 48 83 ec 28 48 89 54 24 [ 5590.803213] RSP: 002b:00007fff5b659ae8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 5590.803218] RAX: ffffffffffffffda RBX: 0000000000100000 RCX: 00007f62aaea5912 [ 5590.803221] RDX: 000000000008b000 RSI: 00007f62a8c4000f RDI: 0000000000000006 [ 5590.803225] RBP: 00007f62a8bcb00f R08: 0000000000200010 R09: 0000000000101000 [ 5590.803229] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000006 [ 5590.803233] R13: 0000000000075000 R14: 00007f62a8acb010 R15: 0000000000200000 [ 5590.803238] [ 5590.803240] Modules linked in: i915 ttm drm_buddy drm_dp_helper drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops prime_numbers nfnetlink br_netfilter overlay mei_pxp mei_hdcp x86_pkg_temp_thermal coretemp kvm_intel snd_hda_codec_hdmi snd_hda_intel snd_intel_dspcfg snd_hda_codec snd_hwdep snd_hda_core snd_pcm mei_me mei fuse ip_tables x_tables crct10dif_pclmul e1000e crc32_pclmul ptp i2c_i801 ghash_clmulni_intel i2c_smbus pps_core [last unloa ded: ttm] [ 5590.803277] CR2: ffffffffa0b0e000 [ 5590.803280] ---[ end trace 0000000000000000 ]--- Fixes: 0e39037b3165 ("drm/i915: Cache the error string") Signed-off-by: Alan Previn Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220311004311.514198-2-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/i915_sysfs.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 8521daba212a..a4e3b6dbb231 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -166,7 +166,14 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj, struct device *kdev = kobj_to_dev(kobj); struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); struct i915_gpu_coredump *gpu; - ssize_t ret; + ssize_t ret = 0; + + /* + * FIXME: Concurrent clients triggering resets and reading + clearing + * dumps can cause inconsistent sysfs reads when a user calls in with a + * non-zero offset to complete a prior partial read but the + * gpu_coredump has been cleared or replaced. + */ gpu = i915_first_error_state(i915); if (IS_ERR(gpu)) { @@ -178,8 +185,10 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj, const char *str = "No error state collected\n"; size_t len = strlen(str); - ret = min_t(size_t, count, len - off); - memcpy(buf, str + off, ret); + if (off < len) { + ret = min_t(size_t, count, len - off); + memcpy(buf, str + off, ret); + } } return ret; -- cgit v1.2.3 From 26be7cd8aacdd3f0429834e78e3166286779f083 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 25 May 2022 06:19:17 -0700 Subject: drm/i915/gt: Add media freq factor to per-gt sysfs Expose new sysfs to program and retrieve media freq factor. Factor values of 0 (dynamic), 0.5 and 1.0 are supported via a u8.8 fixed point representation (corresponding to integer values of 0, 128 and 256 respectively). Media freq factor is converted to media_ratio_mode for GuC. It is programmed into GuC using H2G SLPC interface. It is retrieved from GuC through a register read. A cached media_ratio_mode is maintained to preserve set values across GuC resets. This patch adds the following sysfs files to gt/gtN sysfs: * media_freq_factor * media_freq_factor.scale v2: Minor wording change in drm_warn (Tvrtko) Cc: Tvrtko Ursulin Signed-off-by: Dale B Stimson Signed-off-by: Ashutosh Dixit Reviewed-by: Andi Shyti Reviewed-by: Rodrigo Vivi Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/7ad7578335d8af9cba047b4bcf33d1887453d2e1.1653484574.git.ashutosh.dixit@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 130 +++++++++++++++++++++ .../gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 6 + drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 20 ++++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h | 3 + 6 files changed, 161 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 7246eb870c7e..b4642dcc192f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -740,6 +740,7 @@ #define GEN6_AGGRESSIVE_TURBO (0 << 15) #define GEN9_SW_REQ_UNSLICE_RATIO_SHIFT 23 #define GEN9_IGNORE_SLICE_RATIO (0 << 0) +#define GEN12_MEDIA_FREQ_RATIO REG_BIT(13) #define GEN6_RC_VIDEO_FREQ _MMIO(0xa00c) #define GEN6_RC_CTL_RC6pp_ENABLE (1 << 16) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index f76b6cf8040e..081a17f5ca33 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -558,6 +558,128 @@ static const struct attribute *freq_attrs[] = { NULL }; +/* + * Scaling for multipliers (aka frequency factors). + * The format of the value in the register is u8.8. + * + * The presentation to userspace is inspired by the perf event framework. + * See: + * Documentation/ABI/testing/sysfs-bus-event_source-devices-events + * for description of: + * /sys/bus/event_source/devices//events/.scale + * + * Summary: Expose two sysfs files for each multiplier. + * + * 1. File contains a raw hardware value. + * 2. File .scale contains the multiplicative scale factor to be + * used by userspace to compute the actual value. + * + * So userspace knows that to get the frequency_factor it multiplies the + * provided value by the specified scale factor and vice-versa. + * + * That way there is no precision loss in the kernel interface and API + * is future proof should one day the hardware register change to u16.u16, + * on some platform. (Or any other fixed point representation.) + * + * Example: + * File contains the value 2.5, represented as u8.8 0x0280, which + * is comprised of: + * - an integer part of 2 + * - a fractional part of 0x80 (representing 0x80 / 2^8 == 0x80 / 256). + * File .scale contains a string representation of floating point + * value 0.00390625 (which is (1 / 256)). + * Userspace computes the actual value: + * 0x0280 * 0.00390625 -> 2.5 + * or converts an actual value to the value to be written into : + * 2.5 / 0.00390625 -> 0x0280 + */ + +#define U8_8_VAL_MASK 0xffff +#define U8_8_SCALE_TO_VALUE "0.00390625" + +static ssize_t freq_factor_scale_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + return sysfs_emit(buff, "%s\n", U8_8_SCALE_TO_VALUE); +} + +static u32 media_ratio_mode_to_factor(u32 mode) +{ + /* 0 -> 0, 1 -> 256, 2 -> 128 */ + return !mode ? mode : 256 / mode; +} + +static ssize_t media_freq_factor_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_guc_slpc *slpc = >->uc.guc.slpc; + intel_wakeref_t wakeref; + u32 mode; + + /* + * Retrieve media_ratio_mode from GEN6_RPNSWREQ bit 13 set by + * GuC. GEN6_RPNSWREQ:13 value 0 represents 1:2 and 1 represents 1:1 + */ + if (IS_XEHPSDV(gt->i915) && + slpc->media_ratio_mode == SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL) { + /* + * For XEHPSDV dynamic mode GEN6_RPNSWREQ:13 does not contain + * the media_ratio_mode, just return the cached media ratio + */ + mode = slpc->media_ratio_mode; + } else { + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + mode = intel_uncore_read(gt->uncore, GEN6_RPNSWREQ); + mode = REG_FIELD_GET(GEN12_MEDIA_FREQ_RATIO, mode) ? + SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_ONE : + SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO; + } + + return sysfs_emit(buff, "%u\n", media_ratio_mode_to_factor(mode)); +} + +static ssize_t media_freq_factor_store(struct device *dev, + struct device_attribute *attr, + const char *buff, size_t count) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_guc_slpc *slpc = >->uc.guc.slpc; + u32 factor, mode; + int err; + + err = kstrtou32(buff, 0, &factor); + if (err) + return err; + + for (mode = SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL; + mode <= SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO; mode++) + if (factor == media_ratio_mode_to_factor(mode)) + break; + + if (mode > SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO) + return -EINVAL; + + err = intel_guc_slpc_set_media_ratio_mode(slpc, mode); + if (!err) { + slpc->media_ratio_mode = mode; + DRM_DEBUG("Set slpc->media_ratio_mode to %d", mode); + } + return err ?: count; +} + +static DEVICE_ATTR_RW(media_freq_factor); +static struct device_attribute dev_attr_media_freq_factor_scale = + __ATTR(media_freq_factor.scale, 0444, freq_factor_scale_show, NULL); + +static const struct attribute *media_perf_power_attrs[] = { + &dev_attr_media_freq_factor.attr, + &dev_attr_media_freq_factor_scale.attr, + NULL +}; + static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj, const struct attribute * const *attrs) { @@ -599,4 +721,12 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj) drm_warn(>->i915->drm, "failed to create gt%u throttle sysfs files (%pe)", gt->info.id, ERR_PTR(ret)); + + if (HAS_MEDIA_RATIO_MODE(gt->i915) && intel_uc_uses_guc_slpc(>->uc)) { + ret = sysfs_create_files(kobj, media_perf_power_attrs); + if (ret) + drm_warn(>->i915->drm, + "failed to create gt%u media_perf_power_attrs sysfs (%pe)\n", + gt->info.id, ERR_PTR(ret)); + } } diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h index 62cb4254a77a..4c840a2639dc 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h @@ -122,6 +122,12 @@ enum slpc_param_id { SLPC_MAX_PARAM = 32, }; +enum slpc_media_ratio_mode { + SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL = 0, + SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_ONE = 1, + SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO = 2, +}; + enum slpc_event_id { SLPC_EVENT_RESET = 0, SLPC_EVENT_SHUTDOWN = 1, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 1db833da42df..2df31af70d63 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -260,6 +260,7 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc) slpc->boost_freq = 0; atomic_set(&slpc->num_waiters, 0); slpc->num_boosts = 0; + slpc->media_ratio_mode = SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL; mutex_init(&slpc->lock); INIT_WORK(&slpc->boost_work, slpc_boost_work); @@ -506,6 +507,22 @@ int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val) return ret; } +int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret = 0; + + if (!HAS_MEDIA_RATIO_MODE(i915)) + return -ENODEV; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + ret = slpc_set_param(slpc, + SLPC_PARAM_MEDIA_FF_RATIO_MODE, + val); + return ret; +} + void intel_guc_pm_intrmsk_enable(struct intel_gt *gt) { u32 pm_intrmsk_mbz = 0; @@ -654,6 +671,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) return ret; } + /* Set cached media freq ratio mode */ + intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode); + return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index 0caa8fee3c04..82a98f78f96c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -38,6 +38,7 @@ int intel_guc_slpc_set_boost_freq(struct intel_guc_slpc *slpc, u32 val); int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val); int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val); int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p); +int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val); void intel_guc_pm_intrmsk_enable(struct intel_gt *gt); void intel_guc_slpc_boost(struct intel_guc_slpc *slpc); void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h index bf5b9a563c09..73d208123528 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h @@ -29,6 +29,9 @@ struct intel_guc_slpc { u32 min_freq_softlimit; u32 max_freq_softlimit; + /* cached media ratio mode */ + u32 media_ratio_mode; + /* Protects set/reset of boost freq * and value of num_waiters */ -- cgit v1.2.3 From 6a735552f44d7a29a9e629cdc3b5c3ac2b4021e6 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 25 May 2022 06:19:18 -0700 Subject: drm/i915/pcode: Init pcode on different gt's Extend pcode initialization to pcode on different gt's. Cc: Tvrtko Ursulin Cc: Jani Nikula Signed-off-by: Ashutosh Dixit Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/4de1e4fd71a2971549c5cfb185412f190f15e235.1653484574.git.ashutosh.dixit@intel.com --- drivers/gpu/drm/i915/i915_driver.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index b47746152d97..d26dcca7e654 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -520,6 +520,22 @@ mask_err: return ret; } +static int i915_pcode_init(struct drm_i915_private *i915) +{ + struct intel_gt *gt; + int id, ret; + + for_each_gt(gt, i915, id) { + ret = intel_pcode_init(gt->uncore); + if (ret) { + drm_err(>->i915->drm, "gt%d: intel_pcode_init failed %d\n", id, ret); + return ret; + } + } + + return 0; +} + /** * i915_driver_hw_probe - setup state requiring device access * @dev_priv: device private @@ -629,7 +645,7 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) intel_opregion_setup(dev_priv); - ret = intel_pcode_init(&dev_priv->uncore); + ret = i915_pcode_init(dev_priv); if (ret) goto err_msi; @@ -1251,7 +1267,7 @@ static int i915_drm_resume(struct drm_device *dev) disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); - ret = intel_pcode_init(&dev_priv->uncore); + ret = i915_pcode_init(dev_priv); if (ret) return ret; -- cgit v1.2.3 From 9d15dd1bb3e7de4dd31a225977813dda2748253e Mon Sep 17 00:00:00 2001 From: Dale B Stimson Date: Wed, 25 May 2022 06:19:19 -0700 Subject: drm/i915/gt: Add media RP0/RPn to per-gt sysfs Retrieve RP0 and RPn freq for media IP from PCODE and display in per-gt sysfs. This patch adds the following files to gt/gtN sysfs: * media_RP0_freq_mhz * media_RPn_freq_mhz v2: Fixed commit author (Rodrigo) v3: Convert to new uncore interface for pcode functions v4: Adapt to intel_pcode.* function rename v5: #include "intel_pcode.h" in alphabetical order (Tvrtko) Cc: Tvrtko Ursulin Signed-off-by: Dale B Stimson Signed-off-by: Ashutosh Dixit Reviewed-by: Andi Shyti Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/45e34127a79e808f6582db8afb77f2f728a446e6.1653484574.git.ashutosh.dixit@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 47 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 8 +++++ 2 files changed, 55 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index 081a17f5ca33..ae8a8f725f01 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -14,6 +14,7 @@ #include "intel_gt_regs.h" #include "intel_gt_sysfs.h" #include "intel_gt_sysfs_pm.h" +#include "intel_pcode.h" #include "intel_rc6.h" #include "intel_rps.h" @@ -670,13 +671,59 @@ static ssize_t media_freq_factor_store(struct device *dev, return err ?: count; } +static ssize_t media_RP0_freq_mhz_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + u32 val; + int err; + + err = snb_pcode_read_p(gt->uncore, XEHPSDV_PCODE_FREQUENCY_CONFIG, + PCODE_MBOX_FC_SC_READ_FUSED_P0, + PCODE_MBOX_DOMAIN_MEDIAFF, &val); + + if (err) + return err; + + /* Fused media RP0 read from pcode is in units of 50 MHz */ + val *= GT_FREQUENCY_MULTIPLIER; + + return sysfs_emit(buff, "%u\n", val); +} + +static ssize_t media_RPn_freq_mhz_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + u32 val; + int err; + + err = snb_pcode_read_p(gt->uncore, XEHPSDV_PCODE_FREQUENCY_CONFIG, + PCODE_MBOX_FC_SC_READ_FUSED_PN, + PCODE_MBOX_DOMAIN_MEDIAFF, &val); + + if (err) + return err; + + /* Fused media RPn read from pcode is in units of 50 MHz */ + val *= GT_FREQUENCY_MULTIPLIER; + + return sysfs_emit(buff, "%u\n", val); +} + static DEVICE_ATTR_RW(media_freq_factor); static struct device_attribute dev_attr_media_freq_factor_scale = __ATTR(media_freq_factor.scale, 0444, freq_factor_scale_show, NULL); +static DEVICE_ATTR_RO(media_RP0_freq_mhz); +static DEVICE_ATTR_RO(media_RPn_freq_mhz); static const struct attribute *media_perf_power_attrs[] = { &dev_attr_media_freq_factor.attr, &dev_attr_media_freq_factor_scale.attr, + &dev_attr_media_RP0_freq_mhz.attr, + &dev_attr_media_RPn_freq_mhz.attr, NULL }; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d1806dcee668..213f02d58fc8 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6767,6 +6767,14 @@ #define DG1_UNCORE_GET_INIT_STATUS 0x0 #define DG1_UNCORE_INIT_STATUS_COMPLETE 0x1 #define GEN12_PCODE_READ_SAGV_BLOCK_TIME_US 0x23 +#define XEHPSDV_PCODE_FREQUENCY_CONFIG 0x6e /* xehpsdv, pvc */ +/* XEHPSDV_PCODE_FREQUENCY_CONFIG sub-commands (param1) */ +#define PCODE_MBOX_FC_SC_READ_FUSED_P0 0x0 +#define PCODE_MBOX_FC_SC_READ_FUSED_PN 0x1 +/* PCODE_MBOX_DOMAIN_* - mailbox domain IDs */ +/* XEHPSDV_PCODE_FREQUENCY_CONFIG param2 */ +#define PCODE_MBOX_DOMAIN_NONE 0x0 +#define PCODE_MBOX_DOMAIN_MEDIAFF 0x3 #define GEN6_PCODE_DATA _MMIO(0x138128) #define GEN6_PCODE_FREQ_IA_RATIO_SHIFT 8 #define GEN6_PCODE_FREQ_RING_RATIO_SHIFT 16 -- cgit v1.2.3 From 69d6bf5c3754ffc491896632438417d1cedc2c68 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 25 May 2022 06:19:20 -0700 Subject: drm/i915/gt: Fix memory leaks in per-gt sysfs All kmalloc'd kobjects need a kobject_put() to free memory. For example in previous code, kobj_gt_release() never gets called. The requirement of kobject_put() now results in a slightly different code organization. v2: s/gtn/gt/ (Andi) Fixes: b770bcfae9ad ("drm/i915/gt: create per-tile sysfs interface") Signed-off-by: Ashutosh Dixit Reviewed-by: Andi Shyti Acked-by: Andrzej Hajda Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/a6f6686517c85fba61a0c45097f5bb4fe7e257fb.1653484574.git.ashutosh.dixit@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 1 + drivers/gpu/drm/i915/gt/intel_gt_sysfs.c | 29 ++++++++++++----------------- drivers/gpu/drm/i915/gt/intel_gt_sysfs.h | 6 +----- drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 +++ drivers/gpu/drm/i915/i915_sysfs.c | 2 ++ 5 files changed, 19 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 034182f85501..0a3931c011c6 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -790,6 +790,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt) { intel_wakeref_t wakeref; + intel_gt_sysfs_unregister(gt); intel_rps_driver_unregister(>->rps); intel_gsc_fini(>->gsc); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c index 8ec8bc660c8c..9e4ebf53379b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c @@ -24,7 +24,7 @@ bool is_object_gt(struct kobject *kobj) static struct intel_gt *kobj_to_gt(struct kobject *kobj) { - return container_of(kobj, struct kobj_gt, base)->gt; + return container_of(kobj, struct intel_gt, sysfs_gt); } struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, @@ -72,9 +72,9 @@ static struct attribute *id_attrs[] = { }; ATTRIBUTE_GROUPS(id); +/* A kobject needs a release() method even if it does nothing */ static void kobj_gt_release(struct kobject *kobj) { - kfree(kobj); } static struct kobj_type kobj_gt_type = { @@ -85,8 +85,6 @@ static struct kobj_type kobj_gt_type = { void intel_gt_sysfs_register(struct intel_gt *gt) { - struct kobj_gt *kg; - /* * We need to make things right with the * ABI compatibility. The files were originally @@ -98,25 +96,22 @@ void intel_gt_sysfs_register(struct intel_gt *gt) if (gt_is_root(gt)) intel_gt_sysfs_pm_init(gt, gt_get_parent_obj(gt)); - kg = kzalloc(sizeof(*kg), GFP_KERNEL); - if (!kg) + /* init and xfer ownership to sysfs tree */ + if (kobject_init_and_add(>->sysfs_gt, &kobj_gt_type, + gt->i915->sysfs_gt, "gt%d", gt->info.id)) goto exit_fail; - kobject_init(&kg->base, &kobj_gt_type); - kg->gt = gt; - - /* xfer ownership to sysfs tree */ - if (kobject_add(&kg->base, gt->i915->sysfs_gt, "gt%d", gt->info.id)) - goto exit_kobj_put; - - intel_gt_sysfs_pm_init(gt, &kg->base); + intel_gt_sysfs_pm_init(gt, >->sysfs_gt); return; -exit_kobj_put: - kobject_put(&kg->base); - exit_fail: + kobject_put(>->sysfs_gt); drm_warn(>->i915->drm, "failed to initialize gt%d sysfs root\n", gt->info.id); } + +void intel_gt_sysfs_unregister(struct intel_gt *gt) +{ + kobject_put(>->sysfs_gt); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h index 9471b26752cf..a99aa7e8b01a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h @@ -13,11 +13,6 @@ struct intel_gt; -struct kobj_gt { - struct kobject base; - struct intel_gt *gt; -}; - bool is_object_gt(struct kobject *kobj); struct drm_i915_private *kobj_to_i915(struct kobject *kobj); @@ -28,6 +23,7 @@ intel_gt_create_kobj(struct intel_gt *gt, const char *name); void intel_gt_sysfs_register(struct intel_gt *gt); +void intel_gt_sysfs_unregister(struct intel_gt *gt); struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, const char *name); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 097e10291f2d..993f003dad1d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -225,6 +225,9 @@ struct intel_gt { } mocs; struct intel_pxp pxp; + + /* gt/gtN sysfs */ + struct kobject sysfs_gt; }; enum intel_gt_scratch_field { diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index a4e3b6dbb231..1e2750210831 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -268,4 +268,6 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv) device_remove_bin_file(kdev, &dpf_attrs_1); device_remove_bin_file(kdev, &dpf_attrs); + + kobject_put(dev_priv->sysfs_gt); } -- cgit v1.2.3 From 420a07b841d03f6a436d8c06571c69aa5c783897 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 25 May 2022 11:59:55 +0200 Subject: drm/i915: Individualize fences before adding to dma_resv obj _i915_vma_move_to_active() can receive > 1 fences for multiple batch buffers submission. Because dma_resv_add_fence() can only accept one fence at a time, change _i915_vma_move_to_active() to be aware of multiple fences so that it can add individual fences to the dma resv object. v6: fix multi-line comment. v5: remove double fence reservation for batch VMAs. v4: Reserve fences for composite_fence on multi-batch contexts and also reserve fence slots to composite_fence for each VMAs. v3: dma_resv_reserve_fences is not cumulative so pass num_fences. v2: make sure to reserve enough fence slots before adding. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5614 Fixes: 544460c33821 ("drm/i915: Multi-BB execbuf") Cc: # v5.16+ Signed-off-by: Nirmoy Das Reviewed-by: Matthew Auld Reviewed-by: Andrzej Hajda Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220525095955.15371-1-nirmoy.das@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 3 +- drivers/gpu/drm/i915/i915_vma.c | 48 +++++++++++++++----------- 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index c326bd2b444f..30fe847c6664 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -999,7 +999,8 @@ static int eb_validate_vmas(struct i915_execbuffer *eb) } } - err = dma_resv_reserve_fences(vma->obj->base.resv, 1); + /* Reserve enough slots to accommodate composite fences */ + err = dma_resv_reserve_fences(vma->obj->base.resv, eb->num_batches); if (err) return err; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 4f6db539571a..0bffb70b3c5f 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -23,6 +23,7 @@ */ #include +#include #include #include "display/intel_frontbuffer.h" @@ -1823,6 +1824,21 @@ int _i915_vma_move_to_active(struct i915_vma *vma, if (unlikely(err)) return err; + /* + * Reserve fences slot early to prevent an allocation after preparing + * the workload and associating fences with dma_resv. + */ + if (fence && !(flags & __EXEC_OBJECT_NO_RESERVE)) { + struct dma_fence *curr; + int idx; + + dma_fence_array_for_each(curr, idx, fence) + ; + err = dma_resv_reserve_fences(vma->obj->base.resv, idx); + if (unlikely(err)) + return err; + } + if (flags & EXEC_OBJECT_WRITE) { struct intel_frontbuffer *front; @@ -1832,31 +1848,23 @@ int _i915_vma_move_to_active(struct i915_vma *vma, i915_active_add_request(&front->write, rq); intel_frontbuffer_put(front); } + } - if (!(flags & __EXEC_OBJECT_NO_RESERVE)) { - err = dma_resv_reserve_fences(vma->obj->base.resv, 1); - if (unlikely(err)) - return err; - } + if (fence) { + struct dma_fence *curr; + enum dma_resv_usage usage; + int idx; - if (fence) { - dma_resv_add_fence(vma->obj->base.resv, fence, - DMA_RESV_USAGE_WRITE); + obj->read_domains = 0; + if (flags & EXEC_OBJECT_WRITE) { + usage = DMA_RESV_USAGE_WRITE; obj->write_domain = I915_GEM_DOMAIN_RENDER; - obj->read_domains = 0; - } - } else { - if (!(flags & __EXEC_OBJECT_NO_RESERVE)) { - err = dma_resv_reserve_fences(vma->obj->base.resv, 1); - if (unlikely(err)) - return err; + } else { + usage = DMA_RESV_USAGE_READ; } - if (fence) { - dma_resv_add_fence(vma->obj->base.resv, fence, - DMA_RESV_USAGE_READ); - obj->write_domain = 0; - } + dma_fence_array_for_each(curr, idx, fence) + dma_resv_add_fence(vma->obj->base.resv, curr, usage); } if (flags & EXEC_OBJECT_NEEDS_FENCE && vma->fence) -- cgit v1.2.3 From 8b449f1c44d1f921240da6a3e7fc4030966abbff Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 27 May 2022 09:33:47 -0700 Subject: drm/i915/pvc: Extract stepping information from PCI revid For PVC, the base die and compute tile have separate stepping values that we need to track; we'll use the existing graphics_step field to represent the compute tile stepping and add a new 'basedie_step' field. Unlike past platforms, steppings for these components are represented by specific bitfields within the PCI revision ID, and we shouldn't make assumptions about the non-CT, non-BD bits staying 0. Let's update our stepping code accordingly. Bspec: 44484 Signed-off-by: Matt Roper Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20220527163348.1936146-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 13 ++++++++ drivers/gpu/drm/i915/intel_step.c | 70 ++++++++++++++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_step.h | 4 ++- 3 files changed, 85 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 74b3caccd839..ec1b3484fdaf 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -943,6 +943,7 @@ static inline struct intel_gt *to_gt(struct drm_i915_private *i915) #define INTEL_DISPLAY_STEP(__i915) (RUNTIME_INFO(__i915)->step.display_step) #define INTEL_GRAPHICS_STEP(__i915) (RUNTIME_INFO(__i915)->step.graphics_step) #define INTEL_MEDIA_STEP(__i915) (RUNTIME_INFO(__i915)->step.media_step) +#define INTEL_BASEDIE_STEP(__i915) (RUNTIME_INFO(__i915)->step.basedie_step) #define IS_DISPLAY_STEP(__i915, since, until) \ (drm_WARN_ON(&(__i915)->drm, INTEL_DISPLAY_STEP(__i915) == STEP_NONE), \ @@ -956,6 +957,10 @@ static inline struct intel_gt *to_gt(struct drm_i915_private *i915) (drm_WARN_ON(&(__i915)->drm, INTEL_MEDIA_STEP(__i915) == STEP_NONE), \ INTEL_MEDIA_STEP(__i915) >= (since) && INTEL_MEDIA_STEP(__i915) < (until)) +#define IS_BASEDIE_STEP(__i915, since, until) \ + (drm_WARN_ON(&(__i915)->drm, INTEL_BASEDIE_STEP(__i915) == STEP_NONE), \ + INTEL_BASEDIE_STEP(__i915) >= (since) && INTEL_BASEDIE_STEP(__i915) < (until)) + static __always_inline unsigned int __platform_mask_index(const struct intel_runtime_info *info, enum intel_platform p) @@ -1208,6 +1213,14 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, (IS_DG2(__i915) && \ IS_DISPLAY_STEP(__i915, since, until)) +#define IS_PVC_BD_STEP(__i915, since, until) \ + (IS_PONTEVECCHIO(__i915) && \ + IS_BASEDIE_STEP(__i915, since, until)) + +#define IS_PVC_CT_STEP(__i915, since, until) \ + (IS_PONTEVECCHIO(__i915) && \ + IS_GRAPHICS_STEP(__i915, since, until)) + #define IS_LP(dev_priv) (INTEL_INFO(dev_priv)->is_lp) #define IS_GEN9_LP(dev_priv) (GRAPHICS_VER(dev_priv) == 9 && IS_LP(dev_priv)) #define IS_GEN9_BC(dev_priv) (GRAPHICS_VER(dev_priv) == 9 && !IS_LP(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index 74e8e4680028..42b3133d8387 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -135,6 +135,8 @@ static const struct intel_step_info adlp_n_revids[] = { [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_D0 }, }; +static void pvc_step_init(struct drm_i915_private *i915, int pci_revid); + void intel_step_init(struct drm_i915_private *i915) { const struct intel_step_info *revids = NULL; @@ -142,7 +144,10 @@ void intel_step_init(struct drm_i915_private *i915) int revid = INTEL_REVID(i915); struct intel_step_info step = {}; - if (IS_DG2_G10(i915)) { + if (IS_PONTEVECCHIO(i915)) { + pvc_step_init(i915, revid); + return; + } else if (IS_DG2_G10(i915)) { revids = dg2_g10_revid_step_tbl; size = ARRAY_SIZE(dg2_g10_revid_step_tbl); } else if (IS_DG2_G11(i915)) { @@ -235,6 +240,69 @@ void intel_step_init(struct drm_i915_private *i915) RUNTIME_INFO(i915)->step = step; } +#define PVC_BD_REVID GENMASK(5, 3) +#define PVC_CT_REVID GENMASK(2, 0) + +static const int pvc_bd_subids[] = { + [0x0] = STEP_A0, + [0x3] = STEP_B0, + [0x4] = STEP_B1, + [0x5] = STEP_B3, +}; + +static const int pvc_ct_subids[] = { + [0x3] = STEP_A0, + [0x5] = STEP_B0, + [0x6] = STEP_B1, + [0x7] = STEP_C0, +}; + +static int +pvc_step_lookup(struct drm_i915_private *i915, const char *type, + const int *table, int size, int subid) +{ + if (subid < size && table[subid] != STEP_NONE) + return table[subid]; + + drm_warn(&i915->drm, "Unknown %s id 0x%02x\n", type, subid); + + /* + * As on other platforms, try to use the next higher ID if we land on a + * gap in the table. + */ + while (subid < size && table[subid] == STEP_NONE) + subid++; + + if (subid < size) { + drm_dbg(&i915->drm, "Using steppings for %s id 0x%02x\n", + type, subid); + return table[subid]; + } + + drm_dbg(&i915->drm, "Using future steppings\n"); + return STEP_FUTURE; +} + +/* + * PVC needs special handling since we don't lookup the + * revid in a table, but rather specific bitfields within + * the revid for various components. + */ +static void pvc_step_init(struct drm_i915_private *i915, int pci_revid) +{ + int ct_subid, bd_subid; + + bd_subid = FIELD_GET(PVC_BD_REVID, pci_revid); + ct_subid = FIELD_GET(PVC_CT_REVID, pci_revid); + + RUNTIME_INFO(i915)->step.basedie_step = + pvc_step_lookup(i915, "Base Die", pvc_bd_subids, + ARRAY_SIZE(pvc_bd_subids), bd_subid); + RUNTIME_INFO(i915)->step.graphics_step = + pvc_step_lookup(i915, "Compute Tile", pvc_ct_subids, + ARRAY_SIZE(pvc_ct_subids), ct_subid); +} + #define STEP_NAME_CASE(name) \ case STEP_##name: \ return #name; diff --git a/drivers/gpu/drm/i915/intel_step.h b/drivers/gpu/drm/i915/intel_step.h index d71a99bd5179..a6b12bfa9744 100644 --- a/drivers/gpu/drm/i915/intel_step.h +++ b/drivers/gpu/drm/i915/intel_step.h @@ -11,9 +11,10 @@ struct drm_i915_private; struct intel_step_info { - u8 graphics_step; + u8 graphics_step; /* Represents the compute tile on Xe_HPC */ u8 display_step; u8 media_step; + u8 basedie_step; }; #define STEP_ENUM_VAL(name) STEP_##name, @@ -25,6 +26,7 @@ struct intel_step_info { func(B0) \ func(B1) \ func(B2) \ + func(B3) \ func(C0) \ func(C1) \ func(D0) \ -- cgit v1.2.3 From ce581ae142e24deb27905840fb720fee9b38d7a4 Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Fri, 27 May 2022 09:33:48 -0700 Subject: drm/i915/pvc: Add initial PVC workarounds Bspec: 64027 Signed-off-by: Stuart Summers Signed-off-by: Matt Roper Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20220527163348.1936146-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_regs.h | 5 ++- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 61 ++++++++++++++++++++++++----- drivers/gpu/drm/i915/intel_pm.c | 16 +++++++- 4 files changed, 73 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index 75a0c55c5aa5..44de10cf7837 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -196,6 +196,7 @@ #define RING_CTX_TIMESTAMP(base) _MMIO((base) + 0x3a8) /* gen8+ */ #define RING_PREDICATE_RESULT(base) _MMIO((base) + 0x3b8) #define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4D0) + (i) * 4) +#define RING_FORCE_TO_NONPRIV_DENY REG_BIT(30) #define RING_FORCE_TO_NONPRIV_ADDRESS_MASK REG_GENMASK(25, 2) #define RING_FORCE_TO_NONPRIV_ACCESS_RW (0 << 28) /* CFL+ & Gen11+ */ #define RING_FORCE_TO_NONPRIV_ACCESS_RD (1 << 28) @@ -208,7 +209,9 @@ #define RING_FORCE_TO_NONPRIV_RANGE_64 (3 << 0) #define RING_FORCE_TO_NONPRIV_RANGE_MASK (3 << 0) #define RING_FORCE_TO_NONPRIV_MASK_VALID \ - (RING_FORCE_TO_NONPRIV_RANGE_MASK | RING_FORCE_TO_NONPRIV_ACCESS_MASK) + (RING_FORCE_TO_NONPRIV_RANGE_MASK | \ + RING_FORCE_TO_NONPRIV_ACCESS_MASK | \ + RING_FORCE_TO_NONPRIV_DENY) #define RING_MAX_NONPRIV_SLOTS 12 #define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index b4642dcc192f..58e9b464d564 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1070,8 +1070,9 @@ #define GEN10_CACHE_MODE_SS _MMIO(0xe420) #define ENABLE_EU_COUNT_FOR_TDL_FLUSH REG_BIT(10) -#define ENABLE_PREFETCH_INTO_IC REG_BIT(3) +#define DISABLE_ECC REG_BIT(5) #define FLOAT_BLEND_OPTIMIZATION_ENABLE REG_BIT(4) +#define ENABLE_PREFETCH_INTO_IC REG_BIT(3) #define EU_PERF_CNTL0 _MMIO(0xe458) #define EU_PERF_CNTL4 _MMIO(0xe45c) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 73b59ea6fd3b..a604bc7c0701 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -776,7 +776,9 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, if (engine->class != RENDER_CLASS) goto done; - if (IS_DG2(i915)) + if (IS_PONTEVECCHIO(i915)) + ; /* noop; none at this time */ + else if (IS_DG2(i915)) dg2_ctx_workarounds_init(engine, wal); else if (IS_XEHPSDV(i915)) ; /* noop; none at this time */ @@ -1494,7 +1496,9 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) { struct drm_i915_private *i915 = gt->i915; - if (IS_DG2(i915)) + if (IS_PONTEVECCHIO(i915)) + ; /* none yet */ + else if (IS_DG2(i915)) dg2_gt_workarounds_init(gt, wal); else if (IS_XEHPSDV(i915)) xehpsdv_gt_workarounds_init(gt, wal); @@ -1924,6 +1928,32 @@ static void dg2_whitelist_build(struct intel_engine_cs *engine) } } +static void blacklist_trtt(struct intel_engine_cs *engine) +{ + struct i915_wa_list *w = &engine->whitelist; + + /* + * Prevent read/write access to [0x4400, 0x4600) which covers + * the TRTT range across all engines. Note that normally userspace + * cannot access the other engines' trtt control, but for simplicity + * we cover the entire range on each engine. + */ + whitelist_reg_ext(w, _MMIO(0x4400), + RING_FORCE_TO_NONPRIV_DENY | + RING_FORCE_TO_NONPRIV_RANGE_64); + whitelist_reg_ext(w, _MMIO(0x4500), + RING_FORCE_TO_NONPRIV_DENY | + RING_FORCE_TO_NONPRIV_RANGE_64); +} + +static void pvc_whitelist_build(struct intel_engine_cs *engine) +{ + allow_read_ctx_timestamp(engine); + + /* Wa_16014440446:pvc */ + blacklist_trtt(engine); +} + void intel_engine_init_whitelist(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; @@ -1931,7 +1961,9 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) wa_init_start(w, "whitelist", engine->name); - if (IS_DG2(i915)) + if (IS_PONTEVECCHIO(i915)) + pvc_whitelist_build(engine); + else if (IS_DG2(i915)) dg2_whitelist_build(engine); else if (IS_XEHPSDV(i915)) xehpsdv_whitelist_build(engine); @@ -2041,9 +2073,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) struct drm_i915_private *i915 = engine->i915; if (IS_DG2(i915)) { - /* Wa_14015227452:dg2 */ - wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); - /* Wa_1509235366:dg2 */ wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE); @@ -2611,6 +2640,15 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } } +static void +ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) +{ + if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) { + /* Wa_14014999345:pvc */ + wa_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC); + } +} + /* * The workarounds in this function apply to shared registers in * the general render reset domain that aren't tied to a @@ -2657,8 +2695,11 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li GLOBAL_INVALIDATION_MODE); } - if (IS_DG2(i915)) { - /* Wa_22014226127:dg2 */ + if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) { + /* Wa_14015227452:dg2,pvc */ + wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); + + /* Wa_22014226127:dg2,pvc */ wa_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); } } @@ -2679,7 +2720,9 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) general_render_compute_wa_init(engine, wal); - if (engine->class == RENDER_CLASS) + if (engine->class == COMPUTE_CLASS) + ccs_engine_wa_init(engine, wal); + else if (engine->class == RENDER_CLASS) rcs_engine_wa_init(engine, wal); else xcs_engine_wa_init(engine, wal); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 42db41c8e3b3..7a3f023d39e9 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7526,6 +7526,17 @@ static void dg2_init_clock_gating(struct drm_i915_private *i915) SGR_DIS | SGGI_DIS); } +static void pvc_init_clock_gating(struct drm_i915_private *dev_priv) +{ + /* Wa_14012385139:pvc */ + if (IS_PVC_BD_STEP(dev_priv, STEP_A0, STEP_B0)) + intel_uncore_rmw(&dev_priv->uncore, XEHP_CLOCK_GATE_DIS, 0, SGR_DIS); + + /* Wa_22010954014:pvc */ + if (IS_PVC_BD_STEP(dev_priv, STEP_A0, STEP_B0)) + intel_uncore_rmw(&dev_priv->uncore, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS); +} + static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) { if (!HAS_PCH_CNP(dev_priv)) @@ -7942,6 +7953,7 @@ static const struct drm_i915_clock_gating_funcs platform##_clock_gating_funcs = .init_clock_gating = platform##_init_clock_gating, \ } +CG_FUNCS(pvc); CG_FUNCS(dg2); CG_FUNCS(xehpsdv); CG_FUNCS(adlp); @@ -7980,7 +7992,9 @@ CG_FUNCS(nop); */ void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) { - if (IS_DG2(dev_priv)) + if (IS_PONTEVECCHIO(dev_priv)) + dev_priv->clock_gating_funcs = &pvc_clock_gating_funcs; + else if (IS_DG2(dev_priv)) dev_priv->clock_gating_funcs = &dg2_clock_gating_funcs; else if (IS_XEHPSDV(dev_priv)) dev_priv->clock_gating_funcs = &xehpsdv_clock_gating_funcs; -- cgit v1.2.3 From 935a3c66eb9b66426d4be9b54666c77dbe7c32eb Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 1 Jun 2022 08:07:20 -0700 Subject: drm/i915/xehp: Use separate sseu init function Xe_HP has enough fundamental differences from previous platforms that it makes sense to use a separate SSEU init function to keep things straightforward and easy to understand. We'll also add a has_xehp_dss flag to the SSEU structure that will be used by other upcoming changes. v2: - Add has_xehp_dss flag Signed-off-by: Matt Roper Acked-by: Tvrtko Ursulin Reviewed-by: Balasubramani Vivekanandan Link: https://patchwork.freedesktop.org/patch/msgid/20220601150725.521468-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_sseu.c | 86 ++++++++++++++++++++---------------- drivers/gpu/drm/i915/gt/intel_sseu.h | 5 +++ 2 files changed, 54 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index fdd25691beda..b5fd479a7b85 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -169,13 +169,43 @@ static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en, sseu->eu_total = compute_eu_total(sseu); } -static void gen12_sseu_info_init(struct intel_gt *gt) +static void xehp_sseu_info_init(struct intel_gt *gt) { struct sseu_dev_info *sseu = >->info.sseu; struct intel_uncore *uncore = gt->uncore; u32 g_dss_en, c_dss_en = 0; u16 eu_en = 0; u8 eu_en_fuse; + int eu; + + /* + * The concept of slice has been removed in Xe_HP. To be compatible + * with prior generations, assume a single slice across the entire + * device. Then calculate out the DSS for each workload type within + * that software slice. + */ + intel_sseu_set_info(sseu, 1, 32, 16); + sseu->has_xehp_dss = 1; + + g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); + c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE); + + eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; + + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) + if (eu_en_fuse & BIT(eu)) + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); + + gen11_compute_sseu_info(sseu, 0x1, g_dss_en, c_dss_en, eu_en); +} + +static void gen12_sseu_info_init(struct intel_gt *gt) +{ + struct sseu_dev_info *sseu = >->info.sseu; + struct intel_uncore *uncore = gt->uncore; + u32 g_dss_en; + u16 eu_en = 0; + u8 eu_en_fuse; u8 s_en; int eu; @@ -183,43 +213,23 @@ static void gen12_sseu_info_init(struct intel_gt *gt) * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. * Instead of splitting these, provide userspace with an array * of DSS to more closely represent the hardware resource. - * - * In addition, the concept of slice has been removed in Xe_HP. - * To be compatible with prior generations, assume a single slice - * across the entire device. Then calculate out the DSS for each - * workload type within that software slice. */ - if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915)) - intel_sseu_set_info(sseu, 1, 32, 16); - else - intel_sseu_set_info(sseu, 1, 6, 16); + intel_sseu_set_info(sseu, 1, 6, 16); - /* - * As mentioned above, Xe_HP does not have the concept of a slice. - * Enable one for software backwards compatibility. - */ - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) - s_en = 0x1; - else - s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & - GEN11_GT_S_ENA_MASK; + s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & + GEN11_GT_S_ENA_MASK; g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) - c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE); /* one bit per pair of EUs */ - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) - eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; - else - eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & - GEN11_EU_DIS_MASK); + eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & + GEN11_EU_DIS_MASK); for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) if (eu_en_fuse & BIT(eu)) eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); - gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en); + gen11_compute_sseu_info(sseu, s_en, g_dss_en, 0, eu_en); /* TGL only supports slice-level power gating */ sseu->has_slice_pg = 1; @@ -574,18 +584,20 @@ void intel_sseu_info_init(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; - if (IS_HASWELL(i915)) - hsw_sseu_info_init(gt); - else if (IS_CHERRYVIEW(i915)) - cherryview_sseu_info_init(gt); - else if (IS_BROADWELL(i915)) - bdw_sseu_info_init(gt); - else if (GRAPHICS_VER(i915) == 9) - gen9_sseu_info_init(gt); - else if (GRAPHICS_VER(i915) == 11) - gen11_sseu_info_init(gt); + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) + xehp_sseu_info_init(gt); else if (GRAPHICS_VER(i915) >= 12) gen12_sseu_info_init(gt); + else if (GRAPHICS_VER(i915) >= 11) + gen11_sseu_info_init(gt); + else if (GRAPHICS_VER(i915) >= 9) + gen9_sseu_info_init(gt); + else if (IS_BROADWELL(i915)) + bdw_sseu_info_init(gt); + else if (IS_CHERRYVIEW(i915)) + cherryview_sseu_info_init(gt); + else if (IS_HASWELL(i915)) + hsw_sseu_info_init(gt); } u32 intel_sseu_make_rpcs(struct intel_gt *gt, diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 5c078df4729c..4a041f9dc490 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -66,6 +66,11 @@ struct sseu_dev_info { u8 has_slice_pg:1; u8 has_subslice_pg:1; u8 has_eu_pg:1; + /* + * For Xe_HP and beyond, the hardware no longer has traditional slices + * so we just report the entire DSS pool under a fake "slice 0." + */ + u8 has_xehp_dss:1; /* Topology fields */ u8 max_slices; -- cgit v1.2.3 From aa2bdc4843f5871e6c68fbec5e10c0dbaf10ff91 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 1 Jun 2022 08:07:21 -0700 Subject: drm/i915/xehp: Drop GETPARAM lookups of I915_PARAM_[SUB]SLICE_MASK Slice/subslice/EU information should be obtained via the topology queries provided by the I915_QUERY interface; let's turn off support for the old GETPARAM lookups on Xe_HP and beyond where we can't return meaningful values. The slice mask lookup is meaningless since Xe_HP doesn't support traditional slices (and we make no attempt to return the various new units like gslices, cslices, mslices, etc.) here. The subslice mask lookup is even more problematic; given the distinct masks for geometry vs compute purposes, the combined mask returned here is likely not what userspace would want to act upon anyway. The value is also limited to 32-bits by the nature of the GETPARAM ioctl which is sufficient for the initial Xe_HP platforms, but is unable to convey the larger masks that will be needed on other upcoming platforms. Finally, the value returned here becomes even less meaningful when used on multi-tile platforms where each tile will have its own masks. Signed-off-by: Matt Roper Acked-by: Tvrtko Ursulin Acked-by: Lionel Landwerlin # mesa Reviewed-by: Balasubramani Vivekanandan Link: https://patchwork.freedesktop.org/patch/msgid/20220601150725.521468-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/i915_getparam.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index c12a0adefda5..ac9767c56619 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -148,11 +148,19 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, value = intel_engines_has_context_isolation(i915); break; case I915_PARAM_SLICE_MASK: + /* Not supported from Xe_HP onward; use topology queries */ + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) + return -EINVAL; + value = sseu->slice_mask; if (!value) return -ENODEV; break; case I915_PARAM_SUBSLICE_MASK: + /* Not supported from Xe_HP onward; use topology queries */ + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) + return -EINVAL; + /* Only copy bits from the first slice */ memcpy(&value, sseu->subslice_mask, min(sseu->ss_stride, (u8)sizeof(value))); -- cgit v1.2.3 From 4cfd16659641067f618cdd3589eda42eb2943399 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 1 Jun 2022 08:07:22 -0700 Subject: drm/i915/sseu: Simplify gen11+ SSEU handling Although gen11 and gen12 architectures supported the concept of multiple slices, in practice all the platforms that were actually designed only had a single slice (i.e., note the parameters to 'intel_sseu_set_info' that we pass for each platform). We can simplify the code slightly by dropping the multi-slice logic from gen11+ platforms. v2: - Promote drm_dbg to drm_WARN_ON if the slice fuse register reports unexpected fusing. (Tvrtko) Cc: Tvrtko Ursulin Signed-off-by: Matt Roper Acked-by: Tvrtko Ursulin Reviewed-by: Balasubramani Vivekanandan Link: https://patchwork.freedesktop.org/patch/msgid/20220601150725.521468-4-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_sseu.c | 76 +++++++++++++++++------------------- 1 file changed, 36 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index b5fd479a7b85..7e5222ad2f98 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -119,52 +119,37 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu) return total; } -static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en) -{ - u32 ss_mask; - - ss_mask = ss_en >> (s * sseu->max_subslices); - ss_mask &= GENMASK(sseu->max_subslices - 1, 0); - - return ss_mask; -} - -static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en, +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u32 g_ss_en, u32 c_ss_en, u16 eu_en) { - int s, ss; + u32 valid_ss_mask = GENMASK(sseu->max_subslices - 1, 0); + int ss; /* g_ss_en/c_ss_en represent entire subslice mask across all slices */ GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > sizeof(g_ss_en) * BITS_PER_BYTE); - for (s = 0; s < sseu->max_slices; s++) { - if ((s_en & BIT(s)) == 0) - continue; + sseu->slice_mask |= BIT(0); - sseu->slice_mask |= BIT(s); - - /* - * XeHP introduces the concept of compute vs geometry DSS. To - * reduce variation between GENs around subslice usage, store a - * mask for both the geometry and compute enabled masks since - * userspace will need to be able to query these masks - * independently. Also compute a total enabled subslice count - * for the purposes of selecting subslices to use in a - * particular GEM context. - */ - intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask, - get_ss_stride_mask(sseu, s, c_ss_en)); - intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask, - get_ss_stride_mask(sseu, s, g_ss_en)); - intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, - get_ss_stride_mask(sseu, s, - g_ss_en | c_ss_en)); + /* + * XeHP introduces the concept of compute vs geometry DSS. To reduce + * variation between GENs around subslice usage, store a mask for both + * the geometry and compute enabled masks since userspace will need to + * be able to query these masks independently. Also compute a total + * enabled subslice count for the purposes of selecting subslices to + * use in a particular GEM context. + */ + intel_sseu_set_subslices(sseu, 0, sseu->compute_subslice_mask, + c_ss_en & valid_ss_mask); + intel_sseu_set_subslices(sseu, 0, sseu->geometry_subslice_mask, + g_ss_en & valid_ss_mask); + intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, + (g_ss_en | c_ss_en) & valid_ss_mask); + + for (ss = 0; ss < sseu->max_subslices; ss++) + if (intel_sseu_has_subslice(sseu, 0, ss)) + sseu_set_eus(sseu, 0, ss, eu_en); - for (ss = 0; ss < sseu->max_subslices; ss++) - if (intel_sseu_has_subslice(sseu, s, ss)) - sseu_set_eus(sseu, s, ss, eu_en); - } sseu->eu_per_subslice = hweight16(eu_en); sseu->eu_total = compute_eu_total(sseu); } @@ -196,7 +181,7 @@ static void xehp_sseu_info_init(struct intel_gt *gt) if (eu_en_fuse & BIT(eu)) eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); - gen11_compute_sseu_info(sseu, 0x1, g_dss_en, c_dss_en, eu_en); + gen11_compute_sseu_info(sseu, g_dss_en, c_dss_en, eu_en); } static void gen12_sseu_info_init(struct intel_gt *gt) @@ -216,8 +201,13 @@ static void gen12_sseu_info_init(struct intel_gt *gt) */ intel_sseu_set_info(sseu, 1, 6, 16); + /* + * Although gen12 architecture supported multiple slices, TGL, RKL, + * DG1, and ADL only had a single slice. + */ s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; + drm_WARN_ON(>->i915->drm, s_en != 0x1); g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); @@ -229,7 +219,7 @@ static void gen12_sseu_info_init(struct intel_gt *gt) if (eu_en_fuse & BIT(eu)) eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); - gen11_compute_sseu_info(sseu, s_en, g_dss_en, 0, eu_en); + gen11_compute_sseu_info(sseu, g_dss_en, 0, eu_en); /* TGL only supports slice-level power gating */ sseu->has_slice_pg = 1; @@ -248,14 +238,20 @@ static void gen11_sseu_info_init(struct intel_gt *gt) else intel_sseu_set_info(sseu, 1, 8, 8); + /* + * Although gen11 architecture supported multiple slices, ICL and + * EHL/JSL only had a single slice in practice. + */ s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; + drm_WARN_ON(>->i915->drm, s_en != 0x1); + ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE); eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); - gen11_compute_sseu_info(sseu, s_en, ss_en, 0, eu_en); + gen11_compute_sseu_info(sseu, ss_en, 0, eu_en); /* ICL has no power gating restrictions. */ sseu->has_slice_pg = 1; -- cgit v1.2.3 From bc3c5e0809ae9faa039baf75547e8ee46ec124ef Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 1 Jun 2022 08:07:23 -0700 Subject: drm/i915/sseu: Don't try to store EU mask internally in UAPI format Storing the EU mask internally in the same format the I915_QUERY topology queries use makes the final copy_to_user() a bit simpler, but makes the rest of the driver's SSEU more complicated and harder to follow. Let's switch to an internal representation that's more natural: Xe_HP platforms will be a simple array of u16 masks, whereas pre-Xe_HP platforms will be a two-dimensional array, indexed by [slice][subslice]. We'll convert to the uapi format only when the query uapi is called. v2: - Drop has_common_ss_eumask. We waste some space repeating identical EU masks for every single DSS, but the code is simpler without it. (Tvrtko) v3: - Mask down EUs passed to sseu_set_eus at the callsite rather than inside the function. (Tvrtko) - Eliminate sseu->eu_stride and calculate it when needed. (Tvrtko) Cc: Tvrtko Ursulin Signed-off-by: Matt Roper Acked-by: Tvrtko Ursulin Reviewed-by: Balasubramani Vivekanandan Link: https://patchwork.freedesktop.org/patch/msgid/20220601150725.521468-5-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_sseu.c | 88 ++++++++++++++++++++++++------------ drivers/gpu/drm/i915/gt/intel_sseu.h | 10 +++- drivers/gpu/drm/i915/i915_query.c | 13 +++--- 3 files changed, 73 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 7e5222ad2f98..285cfd758bdc 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -19,8 +19,6 @@ void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE); - sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); - GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE); } unsigned int @@ -78,47 +76,77 @@ intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) return hweight32(intel_sseu_get_subslices(sseu, slice)); } -static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice, - int subslice) -{ - int slice_stride = sseu->max_subslices * sseu->eu_stride; - - return slice * slice_stride + subslice * sseu->eu_stride; -} - static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice, int subslice) { - int i, offset = sseu_eu_idx(sseu, slice, subslice); - u16 eu_mask = 0; - - for (i = 0; i < sseu->eu_stride; i++) - eu_mask |= - ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE); - - return eu_mask; + if (sseu->has_xehp_dss) { + WARN_ON(slice > 0); + return sseu->eu_mask.xehp[subslice]; + } else { + return sseu->eu_mask.hsw[slice][subslice]; + } } static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice, u16 eu_mask) { - int i, offset = sseu_eu_idx(sseu, slice, subslice); - - for (i = 0; i < sseu->eu_stride; i++) - sseu->eu_mask[offset + i] = - (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; + GEM_WARN_ON(eu_mask && __fls(eu_mask) >= sseu->max_eus_per_subslice); + if (sseu->has_xehp_dss) { + GEM_WARN_ON(slice > 0); + sseu->eu_mask.xehp[subslice] = eu_mask; + } else { + sseu->eu_mask.hsw[slice][subslice] = eu_mask; + } } static u16 compute_eu_total(const struct sseu_dev_info *sseu) { - u16 i, total = 0; + int s, ss, total = 0; - for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++) - total += hweight8(sseu->eu_mask[i]); + for (s = 0; s < sseu->max_slices; s++) + for (ss = 0; ss < sseu->max_subslices; ss++) + if (sseu->has_xehp_dss) + total += hweight16(sseu->eu_mask.xehp[ss]); + else + total += hweight16(sseu->eu_mask.hsw[s][ss]); return total; } +/** + * intel_sseu_copy_eumask_to_user - Copy EU mask into a userspace buffer + * @to: Pointer to userspace buffer to copy to + * @sseu: SSEU structure containing EU mask to copy + * + * Copies the EU mask to a userspace buffer in the format expected by + * the query ioctl's topology queries. + * + * Returns the result of the copy_to_user() operation. + */ +int intel_sseu_copy_eumask_to_user(void __user *to, + const struct sseu_dev_info *sseu) +{ + u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE] = {}; + int eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); + int len = sseu->max_slices * sseu->max_subslices * eu_stride; + int s, ss, i; + + for (s = 0; s < sseu->max_slices; s++) { + for (ss = 0; ss < sseu->max_subslices; ss++) { + int uapi_offset = + s * sseu->max_subslices * eu_stride + + ss * eu_stride; + u16 mask = sseu_get_eus(sseu, s, ss); + + for (i = 0; i < eu_stride; i++) + eu_mask[uapi_offset + i] = + (mask >> (BITS_PER_BYTE * i)) & 0xff; + } + } + + return copy_to_user(to, eu_mask, len); +} + static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u32 g_ss_en, u32 c_ss_en, u16 eu_en) { @@ -278,7 +306,7 @@ static void cherryview_sseu_info_init(struct intel_gt *gt) CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); subslice_mask |= BIT(0); - sseu_set_eus(sseu, 0, 0, ~disabled_mask); + sseu_set_eus(sseu, 0, 0, ~disabled_mask & 0xFF); } if (!(fuse & CHV_FGT_DISABLE_SS1)) { @@ -289,7 +317,7 @@ static void cherryview_sseu_info_init(struct intel_gt *gt) CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); subslice_mask |= BIT(1); - sseu_set_eus(sseu, 0, 1, ~disabled_mask); + sseu_set_eus(sseu, 0, 1, ~disabled_mask & 0xFF); } intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask); @@ -362,7 +390,7 @@ static void gen9_sseu_info_init(struct intel_gt *gt) eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask; - sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); + sseu_set_eus(sseu, s, ss, ~eu_disabled_mask & eu_mask); eu_per_ss = sseu->max_eus_per_subslice - hweight8(eu_disabled_mask); @@ -475,7 +503,7 @@ static void bdw_sseu_info_init(struct intel_gt *gt) eu_disabled_mask = eu_disable[s] >> (ss * sseu->max_eus_per_subslice); - sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); + sseu_set_eus(sseu, s, ss, ~eu_disabled_mask & 0xFF); n_disabled = hweight8(eu_disabled_mask); diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 4a041f9dc490..ffa375e68959 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -57,7 +57,11 @@ struct sseu_dev_info { u8 subslice_mask[GEN_SS_MASK_SIZE]; u8 geometry_subslice_mask[GEN_SS_MASK_SIZE]; u8 compute_subslice_mask[GEN_SS_MASK_SIZE]; - u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE]; + union { + u16 hsw[GEN_MAX_HSW_SLICES][GEN_MAX_SS_PER_HSW_SLICE]; + u16 xehp[GEN_MAX_DSS]; + } eu_mask; + u16 eu_total; u8 eu_per_subslice; u8 min_eu_in_pool; @@ -78,7 +82,6 @@ struct sseu_dev_info { u8 max_eus_per_subslice; u8 ss_stride; - u8 eu_stride; }; /* @@ -150,4 +153,7 @@ void intel_sseu_print_topology(struct drm_i915_private *i915, u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice); +int intel_sseu_copy_eumask_to_user(void __user *to, + const struct sseu_dev_info *sseu); + #endif /* __INTEL_SSEU_H__ */ diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index 7584cec53d5d..89c475d525b8 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -35,6 +35,7 @@ static int fill_topology_info(const struct sseu_dev_info *sseu, { struct drm_i915_query_topology_info topo; u32 slice_length, subslice_length, eu_length, total_length; + int eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); int ret; BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask)); @@ -44,7 +45,7 @@ static int fill_topology_info(const struct sseu_dev_info *sseu, slice_length = sizeof(sseu->slice_mask); subslice_length = sseu->max_slices * sseu->ss_stride; - eu_length = sseu->max_slices * sseu->max_subslices * sseu->eu_stride; + eu_length = sseu->max_slices * sseu->max_subslices * eu_stride; total_length = sizeof(topo) + slice_length + subslice_length + eu_length; @@ -61,7 +62,7 @@ static int fill_topology_info(const struct sseu_dev_info *sseu, topo.subslice_offset = slice_length; topo.subslice_stride = sseu->ss_stride; topo.eu_offset = slice_length + subslice_length; - topo.eu_stride = sseu->eu_stride; + topo.eu_stride = eu_stride; if (copy_to_user(u64_to_user_ptr(query_item->data_ptr), &topo, sizeof(topo))) @@ -76,10 +77,10 @@ static int fill_topology_info(const struct sseu_dev_info *sseu, subslice_mask, subslice_length)) return -EFAULT; - if (copy_to_user(u64_to_user_ptr(query_item->data_ptr + - sizeof(topo) + - slice_length + subslice_length), - sseu->eu_mask, eu_length)) + if (intel_sseu_copy_eumask_to_user(u64_to_user_ptr(query_item->data_ptr + + sizeof(topo) + + slice_length + subslice_length), + sseu)) return -EFAULT; return total_length; -- cgit v1.2.3 From b87d39019651c9cae169396cf5ae525393084490 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 1 Jun 2022 08:07:24 -0700 Subject: drm/i915/sseu: Disassociate internal subslice mask representation from uapi As with EU masks, it's easier to store subslice/DSS masks internally in a format that's more natural for the driver to work with, and then only covert into the u8[] uapi form when the query ioctl is invoked. Since the hardware design changed significantly with Xe_HP, we'll use a union to choose between the old "hsw-style" subslice masks or the newer xehp mask. HSW-style masks will be stored in an array of u8's, indexed by slice (there's never more than 6 subslices per slice on older platforms). For Xe_HP and beyond where slices no longer exist, we only need a single bitmask. However we already know that this mask is eventually going to grow too large for a simple u64 to hold, so we'll represent it in a manner that can be operated on by the utilities in linux/bitmap.h. v2: - Fix typo: BIT(s) -> BIT(ss) in gen9_sseu_device_status() v3: - Eliminate sseu->ss_stride and just calculate the stride while specifically handling uapi. (Tvrtko) - Use BITMAP_BITS() macro to refer to size of masks rather than passing I915_MAX_SS_FUSE_BITS directly. (Tvrtko) - Report compute/geometry DSS masks separately when dumping Xe_HP SSEU info. (Tvrtko) - Restore dropped range checks to intel_sseu_has_subslice(). (Tvrtko) v4: - Make the bitmap size macro check the size of the .xehp field rather than the containing union. (Tvrtko) - Don't add GEM_BUG_ON() intel_sseu_has_subslice()'s check for whether slice or subslice ID exceed sseu->max_[sub]slices; various loops in the driver are expected to exceed these, so we should just silently return 'false.' v5: - Move XEHP_BITMAP_BITS() to the header so that we can also replace a usage of I915_MAX_SS_FUSE_BITS in one of the inline functions. (Bala) - Change the local variable in intel_slicemask_from_xehp_dssmask() from u16 to 'unsigned long' to make it a bit more future-proof. Cc: Tvrtko Ursulin Cc: Balasubramani Vivekanandan Signed-off-by: Matt Roper Acked-by: Tvrtko Ursulin Reviewed-by: Balasubramani Vivekanandan Link: https://patchwork.freedesktop.org/patch/msgid/20220601150725.521468-6-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 5 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 +- drivers/gpu/drm/i915/gt/intel_gt.c | 12 +- drivers/gpu/drm/i915/gt/intel_sseu.c | 261 ++++++++++++++++----------- drivers/gpu/drm/i915/gt/intel_sseu.h | 79 +++++--- drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c | 30 +-- drivers/gpu/drm/i915/gt/intel_workarounds.c | 24 +-- drivers/gpu/drm/i915/i915_getparam.c | 3 +- drivers/gpu/drm/i915/i915_query.c | 13 +- 9 files changed, 243 insertions(+), 188 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index ab4c5ab28e4d..a3bb73f5d53b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1875,6 +1875,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt, { const struct sseu_dev_info *device = >->info.sseu; struct drm_i915_private *i915 = gt->i915; + unsigned int dev_subslice_mask = intel_sseu_get_hsw_subslices(device, 0); /* No zeros in any field. */ if (!user->slice_mask || !user->subslice_mask || @@ -1901,7 +1902,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt, if (user->slice_mask & ~device->slice_mask) return -EINVAL; - if (user->subslice_mask & ~device->subslice_mask[0]) + if (user->subslice_mask & ~dev_subslice_mask) return -EINVAL; if (user->max_eus_per_subslice > device->max_eus_per_subslice) @@ -1915,7 +1916,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt, /* Part specific restrictions. */ if (GRAPHICS_VER(i915) == 11) { unsigned int hw_s = hweight8(device->slice_mask); - unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]); + unsigned int hw_ss_per_s = hweight8(dev_subslice_mask); unsigned int req_s = hweight8(context->slice_mask); unsigned int req_ss = hweight8(context->subslice_mask); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 1adbf34c3632..f0acf8518a51 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -674,8 +674,8 @@ static void engine_mask_apply_compute_fuses(struct intel_gt *gt) if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) return; - ccs_mask = intel_slicemask_from_dssmask(intel_sseu_get_compute_subslices(&info->sseu), - ss_per_ccs); + ccs_mask = intel_slicemask_from_xehp_dssmask(info->sseu.compute_subslice_mask, + ss_per_ccs); /* * If all DSS in a quadrant are fused off, the corresponding CCS * engine is not available for use. diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 0a3931c011c6..ddfb98f70489 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -133,13 +133,6 @@ static const struct intel_mmio_range dg2_lncf_steering_table[] = { {}, }; -static u16 slicemask(struct intel_gt *gt, int count) -{ - u64 dss_mask = intel_sseu_get_subslices(>->info.sseu, 0); - - return intel_slicemask_from_dssmask(dss_mask, count); -} - int intel_gt_init_mmio(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; @@ -155,9 +148,12 @@ int intel_gt_init_mmio(struct intel_gt *gt) */ if (HAS_MSLICES(i915)) { gt->info.mslice_mask = - slicemask(gt, GEN_DSS_PER_MSLICE) | + intel_slicemask_from_xehp_dssmask(gt->info.sseu.subslice_mask, + GEN_DSS_PER_MSLICE); + gt->info.mslice_mask |= (intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) & GEN12_MEML3_EN_MASK); + if (!gt->info.mslice_mask) /* should be impossible! */ drm_warn(&i915->drm, "mslice mask all zero!\n"); } diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 285cfd758bdc..826d11f18817 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -16,9 +16,6 @@ void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, sseu->max_slices = max_slices; sseu->max_subslices = max_subslices; sseu->max_eus_per_subslice = max_eus_per_subslice; - - sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); - GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE); } unsigned int @@ -26,54 +23,24 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu) { unsigned int i, total = 0; - for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) - total += hweight8(sseu->subslice_mask[i]); - - return total; -} - -static u32 -sseu_get_subslices(const struct sseu_dev_info *sseu, - const u8 *subslice_mask, u8 slice) -{ - int i, offset = slice * sseu->ss_stride; - u32 mask = 0; - - GEM_BUG_ON(slice >= sseu->max_slices); - - for (i = 0; i < sseu->ss_stride; i++) - mask |= (u32)subslice_mask[offset + i] << i * BITS_PER_BYTE; - - return mask; -} - -u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) -{ - return sseu_get_subslices(sseu, sseu->subslice_mask, slice); -} + if (sseu->has_xehp_dss) + return bitmap_weight(sseu->subslice_mask.xehp, + XEHP_BITMAP_BITS(sseu->subslice_mask)); -static u32 sseu_get_geometry_subslices(const struct sseu_dev_info *sseu) -{ - return sseu_get_subslices(sseu, sseu->geometry_subslice_mask, 0); -} + for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask.hsw); i++) + total += hweight8(sseu->subslice_mask.hsw[i]); -u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu) -{ - return sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0); -} - -void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, - u8 *subslice_mask, u32 ss_mask) -{ - int offset = slice * sseu->ss_stride; - - memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride); + return total; } unsigned int -intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) +intel_sseu_get_hsw_subslices(const struct sseu_dev_info *sseu, u8 slice) { - return hweight32(intel_sseu_get_subslices(sseu, slice)); + WARN_ON(sseu->has_xehp_dss); + if (WARN_ON(slice >= sseu->max_slices)) + return 0; + + return sseu->subslice_mask.hsw[slice]; } static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice, @@ -147,32 +114,66 @@ int intel_sseu_copy_eumask_to_user(void __user *to, return copy_to_user(to, eu_mask, len); } +/** + * intel_sseu_copy_ssmask_to_user - Copy subslice mask into a userspace buffer + * @to: Pointer to userspace buffer to copy to + * @sseu: SSEU structure containing subslice mask to copy + * + * Copies the subslice mask to a userspace buffer in the format expected by + * the query ioctl's topology queries. + * + * Returns the result of the copy_to_user() operation. + */ +int intel_sseu_copy_ssmask_to_user(void __user *to, + const struct sseu_dev_info *sseu) +{ + u8 ss_mask[GEN_SS_MASK_SIZE] = {}; + int ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); + int len = sseu->max_slices * ss_stride; + int s, ss, i; + + for (s = 0; s < sseu->max_slices; s++) { + for (ss = 0; ss < sseu->max_subslices; ss++) { + i = s * ss_stride * BITS_PER_BYTE + ss; + + if (!intel_sseu_has_subslice(sseu, s, ss)) + continue; + + ss_mask[i / BITS_PER_BYTE] |= BIT(i % BITS_PER_BYTE); + } + } + + return copy_to_user(to, ss_mask, len); +} + static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, - u32 g_ss_en, u32 c_ss_en, u16 eu_en) + u32 ss_en, u16 eu_en) { u32 valid_ss_mask = GENMASK(sseu->max_subslices - 1, 0); int ss; - /* g_ss_en/c_ss_en represent entire subslice mask across all slices */ - GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > - sizeof(g_ss_en) * BITS_PER_BYTE); + sseu->slice_mask |= BIT(0); + sseu->subslice_mask.hsw[0] = ss_en & valid_ss_mask; + + for (ss = 0; ss < sseu->max_subslices; ss++) + if (intel_sseu_has_subslice(sseu, 0, ss)) + sseu_set_eus(sseu, 0, ss, eu_en); + + sseu->eu_per_subslice = hweight16(eu_en); + sseu->eu_total = compute_eu_total(sseu); +} + +static void xehp_compute_sseu_info(struct sseu_dev_info *sseu, + u16 eu_en) +{ + int ss; sseu->slice_mask |= BIT(0); - /* - * XeHP introduces the concept of compute vs geometry DSS. To reduce - * variation between GENs around subslice usage, store a mask for both - * the geometry and compute enabled masks since userspace will need to - * be able to query these masks independently. Also compute a total - * enabled subslice count for the purposes of selecting subslices to - * use in a particular GEM context. - */ - intel_sseu_set_subslices(sseu, 0, sseu->compute_subslice_mask, - c_ss_en & valid_ss_mask); - intel_sseu_set_subslices(sseu, 0, sseu->geometry_subslice_mask, - g_ss_en & valid_ss_mask); - intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, - (g_ss_en | c_ss_en) & valid_ss_mask); + bitmap_or(sseu->subslice_mask.xehp, + sseu->compute_subslice_mask.xehp, + sseu->geometry_subslice_mask.xehp, + XEHP_BITMAP_BITS(sseu->subslice_mask)); for (ss = 0; ss < sseu->max_subslices; ss++) if (intel_sseu_has_subslice(sseu, 0, ss)) @@ -182,11 +183,31 @@ static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, sseu->eu_total = compute_eu_total(sseu); } +static void +xehp_load_dss_mask(struct intel_uncore *uncore, + intel_sseu_ss_mask_t *ssmask, + int numregs, + ...) +{ + va_list argp; + u32 fuse_val[I915_MAX_SS_FUSE_REGS] = {}; + int i; + + if (WARN_ON(numregs > I915_MAX_SS_FUSE_REGS)) + numregs = I915_MAX_SS_FUSE_REGS; + + va_start(argp, numregs); + for (i = 0; i < numregs; i++) + fuse_val[i] = intel_uncore_read(uncore, va_arg(argp, i915_reg_t)); + va_end(argp); + + bitmap_from_arr32(ssmask->xehp, fuse_val, numregs * 32); +} + static void xehp_sseu_info_init(struct intel_gt *gt) { struct sseu_dev_info *sseu = >->info.sseu; struct intel_uncore *uncore = gt->uncore; - u32 g_dss_en, c_dss_en = 0; u16 eu_en = 0; u8 eu_en_fuse; int eu; @@ -200,8 +221,10 @@ static void xehp_sseu_info_init(struct intel_gt *gt) intel_sseu_set_info(sseu, 1, 32, 16); sseu->has_xehp_dss = 1; - g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); - c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE); + xehp_load_dss_mask(uncore, &sseu->geometry_subslice_mask, 1, + GEN12_GT_GEOMETRY_DSS_ENABLE); + xehp_load_dss_mask(uncore, &sseu->compute_subslice_mask, 1, + GEN12_GT_COMPUTE_DSS_ENABLE); eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; @@ -209,7 +232,7 @@ static void xehp_sseu_info_init(struct intel_gt *gt) if (eu_en_fuse & BIT(eu)) eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); - gen11_compute_sseu_info(sseu, g_dss_en, c_dss_en, eu_en); + xehp_compute_sseu_info(sseu, eu_en); } static void gen12_sseu_info_init(struct intel_gt *gt) @@ -247,7 +270,7 @@ static void gen12_sseu_info_init(struct intel_gt *gt) if (eu_en_fuse & BIT(eu)) eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); - gen11_compute_sseu_info(sseu, g_dss_en, 0, eu_en); + gen11_compute_sseu_info(sseu, g_dss_en, eu_en); /* TGL only supports slice-level power gating */ sseu->has_slice_pg = 1; @@ -279,7 +302,7 @@ static void gen11_sseu_info_init(struct intel_gt *gt) eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); - gen11_compute_sseu_info(sseu, ss_en, 0, eu_en); + gen11_compute_sseu_info(sseu, ss_en, eu_en); /* ICL has no power gating restrictions. */ sseu->has_slice_pg = 1; @@ -291,7 +314,6 @@ static void cherryview_sseu_info_init(struct intel_gt *gt) { struct sseu_dev_info *sseu = >->info.sseu; u32 fuse; - u8 subslice_mask = 0; fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT); @@ -305,7 +327,7 @@ static void cherryview_sseu_info_init(struct intel_gt *gt) (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); - subslice_mask |= BIT(0); + sseu->subslice_mask.hsw[0] |= BIT(0); sseu_set_eus(sseu, 0, 0, ~disabled_mask & 0xFF); } @@ -316,12 +338,10 @@ static void cherryview_sseu_info_init(struct intel_gt *gt) (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); - subslice_mask |= BIT(1); + sseu->subslice_mask.hsw[0] |= BIT(1); sseu_set_eus(sseu, 0, 1, ~disabled_mask & 0xFF); } - intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask); - sseu->eu_total = compute_eu_total(sseu); /* @@ -376,8 +396,7 @@ static void gen9_sseu_info_init(struct intel_gt *gt) /* skip disabled slice */ continue; - intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, - subslice_mask); + sseu->subslice_mask.hsw[s] = subslice_mask; eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s)); for (ss = 0; ss < sseu->max_subslices; ss++) { @@ -434,8 +453,8 @@ static void gen9_sseu_info_init(struct intel_gt *gt) sseu->has_eu_pg = sseu->eu_per_subslice > 2; if (IS_GEN9_LP(i915)) { -#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask[0] & BIT(ss))) - info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3; +#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask.hsw[0] & BIT(ss))) + info->has_pooled_eu = hweight8(sseu->subslice_mask.hsw[0]) == 3; sseu->min_eu_in_pool = 0; if (info->has_pooled_eu) { @@ -489,8 +508,7 @@ static void bdw_sseu_info_init(struct intel_gt *gt) /* skip disabled slice */ continue; - intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, - subslice_mask); + sseu->subslice_mask.hsw[s] = subslice_mask; for (ss = 0; ss < sseu->max_subslices; ss++) { u8 eu_disabled_mask; @@ -587,8 +605,7 @@ static void hsw_sseu_info_init(struct intel_gt *gt) sseu->eu_per_subslice); for (s = 0; s < sseu->max_slices; s++) { - intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, - subslice_mask); + sseu->subslice_mask.hsw[s] = subslice_mask; for (ss = 0; ss < sseu->max_subslices; ss++) { sseu_set_eus(sseu, s, ss, @@ -677,7 +694,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt, */ if (GRAPHICS_VER(i915) == 11 && slices == 1 && - subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { + subslices > min_t(u8, 4, hweight8(sseu->subslice_mask.hsw[0]) / 2)) { GEM_BUG_ON(subslices & 1); subslice_pg = false; @@ -743,14 +760,29 @@ void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) { int s; - drm_printf(p, "slice total: %u, mask=%04x\n", - hweight8(sseu->slice_mask), sseu->slice_mask); - drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu)); - for (s = 0; s < sseu->max_slices; s++) { - drm_printf(p, "slice%d: %u subslices, mask=%08x\n", - s, intel_sseu_subslices_per_slice(sseu, s), - intel_sseu_get_subslices(sseu, s)); + if (sseu->has_xehp_dss) { + drm_printf(p, "subslice total: %u\n", + intel_sseu_subslice_total(sseu)); + drm_printf(p, "geometry dss mask=%*pb\n", + XEHP_BITMAP_BITS(sseu->geometry_subslice_mask), + sseu->geometry_subslice_mask.xehp); + drm_printf(p, "compute dss mask=%*pb\n", + XEHP_BITMAP_BITS(sseu->compute_subslice_mask), + sseu->compute_subslice_mask.xehp); + } else { + drm_printf(p, "slice total: %u, mask=%04x\n", + hweight8(sseu->slice_mask), sseu->slice_mask); + drm_printf(p, "subslice total: %u\n", + intel_sseu_subslice_total(sseu)); + + for (s = 0; s < sseu->max_slices; s++) { + u8 ss_mask = sseu->subslice_mask.hsw[s]; + + drm_printf(p, "slice%d: %u subslices, mask=%08x\n", + s, hweight8(ss_mask), ss_mask); + } } + drm_printf(p, "EU total: %u\n", sseu->eu_total); drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); drm_printf(p, "has slice power gating: %s\n", @@ -767,9 +799,10 @@ static void sseu_print_hsw_topology(const struct sseu_dev_info *sseu, int s, ss; for (s = 0; s < sseu->max_slices; s++) { + u8 ss_mask = sseu->subslice_mask.hsw[s]; + drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n", - s, intel_sseu_subslices_per_slice(sseu, s), - intel_sseu_get_subslices(sseu, s)); + s, hweight8(ss_mask), ss_mask); for (ss = 0; ss < sseu->max_subslices; ss++) { u16 enabled_eus = sseu_get_eus(sseu, s, ss); @@ -783,16 +816,14 @@ static void sseu_print_hsw_topology(const struct sseu_dev_info *sseu, static void sseu_print_xehp_topology(const struct sseu_dev_info *sseu, struct drm_printer *p) { - u32 g_dss_mask = sseu_get_geometry_subslices(sseu); - u32 c_dss_mask = intel_sseu_get_compute_subslices(sseu); int dss; for (dss = 0; dss < sseu->max_subslices; dss++) { u16 enabled_eus = sseu_get_eus(sseu, 0, dss); drm_printf(p, "DSS_%02d: G:%3s C:%3s, %2u EUs (0x%04hx)\n", dss, - str_yes_no(g_dss_mask & BIT(dss)), - str_yes_no(c_dss_mask & BIT(dss)), + str_yes_no(test_bit(dss, sseu->geometry_subslice_mask.xehp)), + str_yes_no(test_bit(dss, sseu->compute_subslice_mask.xehp)), hweight16(enabled_eus), enabled_eus); } } @@ -810,20 +841,44 @@ void intel_sseu_print_topology(struct drm_i915_private *i915, } } -u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice) +void intel_sseu_print_ss_info(const char *type, + const struct sseu_dev_info *sseu, + struct seq_file *m) +{ + int s; + + if (sseu->has_xehp_dss) { + seq_printf(m, " %s Geometry DSS: %u\n", type, + bitmap_weight(sseu->geometry_subslice_mask.xehp, + XEHP_BITMAP_BITS(sseu->geometry_subslice_mask))); + seq_printf(m, " %s Compute DSS: %u\n", type, + bitmap_weight(sseu->compute_subslice_mask.xehp, + XEHP_BITMAP_BITS(sseu->compute_subslice_mask))); + } else { + for (s = 0; s < fls(sseu->slice_mask); s++) + seq_printf(m, " %s Slice%i subslices: %u\n", type, + s, hweight8(sseu->subslice_mask.hsw[s])); + } +} + +u16 intel_slicemask_from_xehp_dssmask(intel_sseu_ss_mask_t dss_mask, + int dss_per_slice) { - u16 slice_mask = 0; + intel_sseu_ss_mask_t per_slice_mask = {}; + unsigned long slice_mask = 0; int i; - WARN_ON(sizeof(dss_mask) * 8 / dss_per_slice > 8 * sizeof(slice_mask)); + WARN_ON(DIV_ROUND_UP(XEHP_BITMAP_BITS(dss_mask), dss_per_slice) > + 8 * sizeof(slice_mask)); - for (i = 0; dss_mask; i++) { - if (dss_mask & GENMASK(dss_per_slice - 1, 0)) + bitmap_fill(per_slice_mask.xehp, dss_per_slice); + for (i = 0; !bitmap_empty(dss_mask.xehp, XEHP_BITMAP_BITS(dss_mask)); i++) { + if (bitmap_intersects(dss_mask.xehp, per_slice_mask.xehp, dss_per_slice)) slice_mask |= BIT(i); - dss_mask >>= dss_per_slice; + bitmap_shift_right(dss_mask.xehp, dss_mask.xehp, dss_per_slice, + XEHP_BITMAP_BITS(dss_mask)); } return slice_mask; } - diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index ffa375e68959..f0e09b743faa 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -25,12 +25,16 @@ struct drm_printer; /* * Maximum number of subslices that can exist within a HSW-style slice. This * is only relevant to pre-Xe_HP platforms (Xe_HP and beyond use the - * GEN_MAX_DSS value below). + * I915_MAX_SS_FUSE_BITS value below). */ #define GEN_MAX_SS_PER_HSW_SLICE 6 -/* Maximum number of DSS on newer platforms (Xe_HP and beyond). */ -#define GEN_MAX_DSS 32 +/* + * Maximum number of 32-bit registers used by hardware to express the + * enabled/disabled subslices. + */ +#define I915_MAX_SS_FUSE_REGS 1 +#define I915_MAX_SS_FUSE_BITS (I915_MAX_SS_FUSE_REGS * 32) /* Maximum number of EUs that can exist within a subslice or DSS. */ #define GEN_MAX_EUS_PER_SS 16 @@ -38,7 +42,7 @@ struct drm_printer; #define SSEU_MAX(a, b) ((a) > (b) ? (a) : (b)) /* The maximum number of bits needed to express each subslice/DSS independently */ -#define GEN_SS_MASK_SIZE SSEU_MAX(GEN_MAX_DSS, \ +#define GEN_SS_MASK_SIZE SSEU_MAX(I915_MAX_SS_FUSE_BITS, \ GEN_MAX_HSW_SLICES * GEN_MAX_SS_PER_HSW_SLICE) #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE) @@ -49,17 +53,26 @@ struct drm_printer; #define GEN_DSS_PER_CSLICE 8 #define GEN_DSS_PER_MSLICE 8 -#define GEN_MAX_GSLICES (GEN_MAX_DSS / GEN_DSS_PER_GSLICE) -#define GEN_MAX_CSLICES (GEN_MAX_DSS / GEN_DSS_PER_CSLICE) +#define GEN_MAX_GSLICES (I915_MAX_SS_FUSE_BITS / GEN_DSS_PER_GSLICE) +#define GEN_MAX_CSLICES (I915_MAX_SS_FUSE_BITS / GEN_DSS_PER_CSLICE) + +typedef union { + u8 hsw[GEN_MAX_HSW_SLICES]; + + /* Bitmap compatible with linux/bitmap.h; may exceed size of u64 */ + unsigned long xehp[BITS_TO_LONGS(I915_MAX_SS_FUSE_BITS)]; +} intel_sseu_ss_mask_t; + +#define XEHP_BITMAP_BITS(mask) ((int)BITS_PER_TYPE(typeof(mask.xehp))) struct sseu_dev_info { u8 slice_mask; - u8 subslice_mask[GEN_SS_MASK_SIZE]; - u8 geometry_subslice_mask[GEN_SS_MASK_SIZE]; - u8 compute_subslice_mask[GEN_SS_MASK_SIZE]; + intel_sseu_ss_mask_t subslice_mask; + intel_sseu_ss_mask_t geometry_subslice_mask; + intel_sseu_ss_mask_t compute_subslice_mask; union { u16 hsw[GEN_MAX_HSW_SLICES][GEN_MAX_SS_PER_HSW_SLICE]; - u16 xehp[GEN_MAX_DSS]; + u16 xehp[I915_MAX_SS_FUSE_BITS]; } eu_mask; u16 eu_total; @@ -80,8 +93,6 @@ struct sseu_dev_info { u8 max_slices; u8 max_subslices; u8 max_eus_per_subslice; - - u8 ss_stride; }; /* @@ -99,7 +110,7 @@ intel_sseu_from_device_info(const struct sseu_dev_info *sseu) { struct intel_sseu value = { .slice_mask = sseu->slice_mask, - .subslice_mask = sseu->subslice_mask[0], + .subslice_mask = sseu->subslice_mask.hsw[0], .min_eus_per_subslice = sseu->max_eus_per_subslice, .max_eus_per_subslice = sseu->max_eus_per_subslice, }; @@ -111,18 +122,28 @@ static inline bool intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice, int subslice) { - u8 mask; - int ss_idx = subslice / BITS_PER_BYTE; - if (slice >= sseu->max_slices || subslice >= sseu->max_subslices) return false; - GEM_BUG_ON(ss_idx >= sseu->ss_stride); - - mask = sseu->subslice_mask[slice * sseu->ss_stride + ss_idx]; + if (sseu->has_xehp_dss) + return test_bit(subslice, sseu->subslice_mask.xehp); + else + return sseu->subslice_mask.hsw[slice] & BIT(subslice); +} - return mask & BIT(subslice % BITS_PER_BYTE); +/* + * Used to obtain the index of the first DSS. Can start searching from the + * beginning of a specific dss group (e.g., gslice, cslice, etc.) if + * groupsize and groupnum are non-zero. + */ +static inline unsigned int +intel_sseu_find_first_xehp_dss(const struct sseu_dev_info *sseu, int groupsize, + int groupnum) +{ + return find_next_bit(sseu->subslice_mask.xehp, + XEHP_BITMAP_BITS(sseu->subslice_mask), + groupnum * groupsize); } void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, @@ -132,14 +153,10 @@ unsigned int intel_sseu_subslice_total(const struct sseu_dev_info *sseu); unsigned int -intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice); +intel_sseu_get_hsw_subslices(const struct sseu_dev_info *sseu, u8 slice); -u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); - -u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu); - -void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, - u8 *subslice_mask, u32 ss_mask); +intel_sseu_ss_mask_t +intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu); void intel_sseu_info_init(struct intel_gt *gt); @@ -151,9 +168,15 @@ void intel_sseu_print_topology(struct drm_i915_private *i915, const struct sseu_dev_info *sseu, struct drm_printer *p); -u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice); +u16 intel_slicemask_from_xehp_dssmask(intel_sseu_ss_mask_t dss_mask, int dss_per_slice); int intel_sseu_copy_eumask_to_user(void __user *to, const struct sseu_dev_info *sseu); +int intel_sseu_copy_ssmask_to_user(void __user *to, + const struct sseu_dev_info *sseu); + +void intel_sseu_print_ss_info(const char *type, + const struct sseu_dev_info *sseu, + struct seq_file *m); #endif /* __INTEL_SSEU_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c index 2d5d011e01db..c2ee5e1826b5 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c @@ -4,6 +4,7 @@ * Copyright © 2020 Intel Corporation */ +#include #include #include "i915_drv.h" @@ -11,14 +12,6 @@ #include "intel_gt_regs.h" #include "intel_sseu_debugfs.h" -static void sseu_copy_subslices(const struct sseu_dev_info *sseu, - int slice, u8 *to_mask) -{ - int offset = slice * sseu->ss_stride; - - memcpy(&to_mask[offset], &sseu->subslice_mask[offset], sseu->ss_stride); -} - static void cherryview_sseu_device_status(struct intel_gt *gt, struct sseu_dev_info *sseu) { @@ -41,7 +34,7 @@ static void cherryview_sseu_device_status(struct intel_gt *gt, continue; sseu->slice_mask = BIT(0); - sseu->subslice_mask[0] |= BIT(ss); + sseu->subslice_mask.hsw[0] |= BIT(ss); eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) + ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) + ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) + @@ -92,7 +85,7 @@ static void gen11_sseu_device_status(struct intel_gt *gt, continue; sseu->slice_mask |= BIT(s); - sseu_copy_subslices(&info->sseu, s, sseu->subslice_mask); + sseu->subslice_mask.hsw[s] = info->sseu.subslice_mask.hsw[s]; for (ss = 0; ss < info->sseu.max_subslices; ss++) { unsigned int eu_cnt; @@ -147,21 +140,17 @@ static void gen9_sseu_device_status(struct intel_gt *gt, sseu->slice_mask |= BIT(s); if (IS_GEN9_BC(gt->i915)) - sseu_copy_subslices(&info->sseu, s, - sseu->subslice_mask); + sseu->subslice_mask.hsw[s] = info->sseu.subslice_mask.hsw[s]; for (ss = 0; ss < info->sseu.max_subslices; ss++) { unsigned int eu_cnt; - u8 ss_idx = s * info->sseu.ss_stride + - ss / BITS_PER_BYTE; if (IS_GEN9_LP(gt->i915)) { if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) /* skip disabled subslice */ continue; - sseu->subslice_mask[ss_idx] |= - BIT(ss % BITS_PER_BYTE); + sseu->subslice_mask.hsw[s] |= BIT(ss); } eu_cnt = eu_reg[2 * s + ss / 2] & eu_mask[ss % 2]; @@ -188,8 +177,7 @@ static void bdw_sseu_device_status(struct intel_gt *gt, if (sseu->slice_mask) { sseu->eu_per_subslice = info->sseu.eu_per_subslice; for (s = 0; s < fls(sseu->slice_mask); s++) - sseu_copy_subslices(&info->sseu, s, - sseu->subslice_mask); + sseu->subslice_mask.hsw[s] = info->sseu.subslice_mask.hsw[s]; sseu->eu_total = sseu->eu_per_subslice * intel_sseu_subslice_total(sseu); @@ -208,7 +196,6 @@ static void i915_print_sseu_info(struct seq_file *m, const struct sseu_dev_info *sseu) { const char *type = is_available_info ? "Available" : "Enabled"; - int s; seq_printf(m, " %s Slice Mask: %04x\n", type, sseu->slice_mask); @@ -216,10 +203,7 @@ static void i915_print_sseu_info(struct seq_file *m, hweight8(sseu->slice_mask)); seq_printf(m, " %s Subslice Total: %u\n", type, intel_sseu_subslice_total(sseu)); - for (s = 0; s < fls(sseu->slice_mask); s++) { - seq_printf(m, " %s Slice%i subslices: %u\n", type, - s, intel_sseu_subslices_per_slice(sseu, s)); - } + intel_sseu_print_ss_info(type, sseu, m); seq_printf(m, " %s EU Total: %u\n", type, sseu->eu_total); seq_printf(m, " %s EU Per Subslice: %u\n", type, diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index a604bc7c0701..6e875d4f5f65 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -950,8 +950,8 @@ gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) * on s/ss combo, the read should be done with read_subslice_reg. */ slice = ffs(sseu->slice_mask) - 1; - GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask)); - subslice = ffs(intel_sseu_get_subslices(sseu, slice)); + GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask.hsw)); + subslice = ffs(intel_sseu_get_hsw_subslices(sseu, slice)); GEM_BUG_ON(!subslice); subslice--; @@ -1089,11 +1089,10 @@ static void icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) { const struct sseu_dev_info *sseu = >->info.sseu; - unsigned int slice, subslice; + unsigned int subslice; GEM_BUG_ON(GRAPHICS_VER(gt->i915) < 11); GEM_BUG_ON(hweight8(sseu->slice_mask) > 1); - slice = 0; /* * Although a platform may have subslices, we need to always steer @@ -1104,7 +1103,7 @@ icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) * one of the higher subslices, we run the risk of reading back 0's or * random garbage. */ - subslice = __ffs(intel_sseu_get_subslices(sseu, slice)); + subslice = __ffs(intel_sseu_get_hsw_subslices(sseu, 0)); /* * If the subslice we picked above also steers us to a valid L3 bank, @@ -1114,7 +1113,7 @@ icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) if (gt->info.l3bank_mask & BIT(subslice)) gt->steering_table[L3BANK] = NULL; - __add_mcr_wa(gt, wal, slice, subslice); + __add_mcr_wa(gt, wal, 0, subslice); } static void @@ -1122,7 +1121,6 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) { const struct sseu_dev_info *sseu = >->info.sseu; unsigned long slice, subslice = 0, slice_mask = 0; - u64 dss_mask = 0; u32 lncf_mask = 0; int i; @@ -1153,8 +1151,8 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) */ /* Find the potential gslice candidates */ - dss_mask = intel_sseu_get_subslices(sseu, 0); - slice_mask = intel_slicemask_from_dssmask(dss_mask, GEN_DSS_PER_GSLICE); + slice_mask = intel_slicemask_from_xehp_dssmask(sseu->subslice_mask, + GEN_DSS_PER_GSLICE); /* * Find the potential LNCF candidates. Either LNCF within a valid @@ -1179,9 +1177,8 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) } slice = __ffs(slice_mask); - subslice = __ffs(dss_mask >> (slice * GEN_DSS_PER_GSLICE)); + subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice); WARN_ON(subslice > GEN_DSS_PER_GSLICE); - WARN_ON(dss_mask >> (slice * GEN_DSS_PER_GSLICE) == 0); __add_mcr_wa(gt, wal, slice, subslice); @@ -2062,9 +2059,8 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) static bool needs_wa_1308578152(struct intel_engine_cs *engine) { - u64 dss_mask = intel_sseu_get_subslices(&engine->gt->info.sseu, 0); - - return (dss_mask & GENMASK(GEN_DSS_PER_GSLICE - 1, 0)) == 0; + return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) > + GEN_DSS_PER_GSLICE; } static void diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index ac9767c56619..6fd15b39570c 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -162,8 +162,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, return -EINVAL; /* Only copy bits from the first slice */ - memcpy(&value, sseu->subslice_mask, - min(sseu->ss_stride, (u8)sizeof(value))); + value = intel_sseu_get_hsw_subslices(sseu, 0); if (!value) return -ENODEV; break; diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index 89c475d525b8..0094f67c63f2 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -31,10 +31,11 @@ static int copy_query_item(void *query_hdr, size_t query_sz, static int fill_topology_info(const struct sseu_dev_info *sseu, struct drm_i915_query_item *query_item, - const u8 *subslice_mask) + intel_sseu_ss_mask_t subslice_mask) { struct drm_i915_query_topology_info topo; u32 slice_length, subslice_length, eu_length, total_length; + int ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); int eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); int ret; @@ -44,7 +45,7 @@ static int fill_topology_info(const struct sseu_dev_info *sseu, return -ENODEV; slice_length = sizeof(sseu->slice_mask); - subslice_length = sseu->max_slices * sseu->ss_stride; + subslice_length = sseu->max_slices * ss_stride; eu_length = sseu->max_slices * sseu->max_subslices * eu_stride; total_length = sizeof(topo) + slice_length + subslice_length + eu_length; @@ -60,7 +61,7 @@ static int fill_topology_info(const struct sseu_dev_info *sseu, topo.max_eus_per_subslice = sseu->max_eus_per_subslice; topo.subslice_offset = slice_length; - topo.subslice_stride = sseu->ss_stride; + topo.subslice_stride = ss_stride; topo.eu_offset = slice_length + subslice_length; topo.eu_stride = eu_stride; @@ -72,9 +73,9 @@ static int fill_topology_info(const struct sseu_dev_info *sseu, &sseu->slice_mask, slice_length)) return -EFAULT; - if (copy_to_user(u64_to_user_ptr(query_item->data_ptr + - sizeof(topo) + slice_length), - subslice_mask, subslice_length)) + if (intel_sseu_copy_ssmask_to_user(u64_to_user_ptr(query_item->data_ptr + + sizeof(topo) + slice_length), + sseu)) return -EFAULT; if (intel_sseu_copy_eumask_to_user(u64_to_user_ptr(query_item->data_ptr + -- cgit v1.2.3 From 5ac342ef84d7dccd1ba43f5fa2dc10a6feda91e2 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 1 Jun 2022 08:07:25 -0700 Subject: drm/i915/pvc: Add SSEU changes PVC splits the mask of enabled DSS over two registers. It also changes the meaning of the EU fuse register such that each bit represents a single EU rather than a pair of EUs. Signed-off-by: Matt Roper Acked-by: Tvrtko Ursulin Reviewed-by: Balasubramani Vivekanandan Link: https://patchwork.freedesktop.org/patch/msgid/20220601150725.521468-7-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_sseu.c | 31 ++++++++++++++++++++++++------- drivers/gpu/drm/i915/gt/intel_sseu.h | 2 +- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_pci.c | 3 ++- drivers/gpu/drm/i915/intel_device_info.h | 1 + 6 files changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 58e9b464d564..6aa1ceaa8d27 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -561,6 +561,7 @@ #define GEN11_GT_VEBOX_DISABLE_MASK (0x0f << GEN11_GT_VEBOX_DISABLE_SHIFT) #define GEN12_GT_COMPUTE_DSS_ENABLE _MMIO(0x9144) +#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT _MMIO(0x9148) #define GEN6_UCGCTL1 _MMIO(0x9400) #define GEN6_GAMUNIT_CLOCK_GATE_DISABLE (1 << 22) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 826d11f18817..7ef75f0d9c9e 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -210,27 +210,44 @@ static void xehp_sseu_info_init(struct intel_gt *gt) struct intel_uncore *uncore = gt->uncore; u16 eu_en = 0; u8 eu_en_fuse; + int num_compute_regs, num_geometry_regs; int eu; + if (IS_PONTEVECCHIO(gt->i915)) { + num_geometry_regs = 0; + num_compute_regs = 2; + } else { + num_geometry_regs = 1; + num_compute_regs = 1; + } + /* * The concept of slice has been removed in Xe_HP. To be compatible * with prior generations, assume a single slice across the entire * device. Then calculate out the DSS for each workload type within * that software slice. */ - intel_sseu_set_info(sseu, 1, 32, 16); + intel_sseu_set_info(sseu, 1, + 32 * max(num_geometry_regs, num_compute_regs), + 16); sseu->has_xehp_dss = 1; - xehp_load_dss_mask(uncore, &sseu->geometry_subslice_mask, 1, + xehp_load_dss_mask(uncore, &sseu->geometry_subslice_mask, + num_geometry_regs, GEN12_GT_GEOMETRY_DSS_ENABLE); - xehp_load_dss_mask(uncore, &sseu->compute_subslice_mask, 1, - GEN12_GT_COMPUTE_DSS_ENABLE); + xehp_load_dss_mask(uncore, &sseu->compute_subslice_mask, + num_compute_regs, + GEN12_GT_COMPUTE_DSS_ENABLE, + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT); eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; - for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) - if (eu_en_fuse & BIT(eu)) - eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); + if (HAS_ONE_EU_PER_FUSE_BIT(gt->i915)) + eu_en = eu_en_fuse; + else + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) + if (eu_en_fuse & BIT(eu)) + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); xehp_compute_sseu_info(sseu, eu_en); } diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index f0e09b743faa..aa87d3832d60 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -33,7 +33,7 @@ struct drm_printer; * Maximum number of 32-bit registers used by hardware to express the * enabled/disabled subslices. */ -#define I915_MAX_SS_FUSE_REGS 1 +#define I915_MAX_SS_FUSE_REGS 2 #define I915_MAX_SS_FUSE_BITS (I915_MAX_SS_FUSE_REGS * 32) /* Maximum number of EUs that can exist within a subslice or DSS. */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ec1b3484fdaf..fbea4d1ede7c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1422,6 +1422,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_3D_PIPELINE(i915) (INTEL_INFO(i915)->has_3d_pipeline) +#define HAS_ONE_EU_PER_FUSE_BIT(i915) (INTEL_INFO(i915)->has_one_eu_per_fuse_bit) + /* i915_gem.c */ void i915_gem_init_early(struct drm_i915_private *dev_priv); void i915_gem_cleanup_early(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 269d7c8f2f81..177aefa6511e 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1090,7 +1090,8 @@ static const struct intel_device_info ats_m_info = { XE_HP_FEATURES, \ .dma_mask_size = 52, \ .has_3d_pipeline = 0, \ - .has_l3_ccs_read = 1 + .has_l3_ccs_read = 1, \ + .has_one_eu_per_fuse_bit = 1 __maybe_unused static const struct intel_device_info pvc_info = { diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 4e1c80966ab5..346f17f2dce8 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -158,6 +158,7 @@ enum intel_ppgtt_type { func(has_logical_ring_elsq); \ func(has_media_ratio_mode); \ func(has_mslices); \ + func(has_one_eu_per_fuse_bit); \ func(has_pooled_eu); \ func(has_pxp); \ func(has_rc6); \ -- cgit v1.2.3 From b729cfee705a56c6204647ac486107a1f814af36 Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Wed, 1 Jun 2022 14:06:46 -0700 Subject: drm/i915: Add extra registers to GPU error dump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our internal teams have identified a few additional engine registers that are worth inspecting in error state dumps during development & debug. Let's capture and print them as part of our error dump. For simplicity we'll just dump these registers on gen11 and beyond. Most of these registers have existed since earlier platforms (e.g., gen6 or gen7) but were initially introduced only for a subset of the platforms' engines; gen11 seems to be where they became available on all engines. Signed-off-by: Stuart Summers Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220601210646.615946-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_regs.h | 5 +++++ drivers/gpu/drm/i915/i915_gpu_error.c | 19 +++++++++++++++++++ drivers/gpu/drm/i915/i915_gpu_error.h | 7 +++++++ 3 files changed, 31 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index 44de10cf7837..889f0df3940b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -8,6 +8,7 @@ #include "i915_reg_defs.h" +#define RING_EXCC(base) _MMIO((base) + 0x28) #define RING_TAIL(base) _MMIO((base) + 0x30) #define TAIL_ADDR 0x001FFFF8 #define RING_HEAD(base) _MMIO((base) + 0x34) @@ -133,6 +134,8 @@ (REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, (dst) << 1) | \ REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, (src) << 1)) +#define RING_CSCMDOP(base) _MMIO((base) + 0x20c) + /* * CMD_CCTL read/write fields take a MOCS value and _not_ a table index. * The lsb of each can be considered a separate enabling bit for encryption. @@ -149,6 +152,7 @@ REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1)) #define RING_PREDICATE_RESULT(base) _MMIO((base) + 0x3b8) /* gen12+ */ + #define MI_PREDICATE_RESULT_2(base) _MMIO((base) + 0x3bc) #define LOWER_SLICE_ENABLED (1 << 0) #define LOWER_SLICE_DISABLED (0 << 0) @@ -172,6 +176,7 @@ #define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT REG_BIT(2) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3) #define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE REG_BIT(8) +#define RING_CTX_SR_CTL(base) _MMIO((base) + 0x244) #define RING_SEMA_WAIT_POLL(base) _MMIO((base) + 0x24c) #define GEN8_RING_PDP_UDW(base, n) _MMIO((base) + 0x270 + (n) * 8 + 4) #define GEN8_RING_PDP_LDW(base, n) _MMIO((base) + 0x270 + (n) * 8) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 0512c66fa4f3..bff8a111424a 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -581,6 +581,15 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, err_printf(m, " RC PSMI: 0x%08x\n", ee->rc_psmi); err_printf(m, " FAULT_REG: 0x%08x\n", ee->fault_reg); } + if (GRAPHICS_VER(m->i915) >= 11) { + err_printf(m, " NOPID: 0x%08x\n", ee->nopid); + err_printf(m, " EXCC: 0x%08x\n", ee->excc); + err_printf(m, " CMD_CCTL: 0x%08x\n", ee->cmd_cctl); + err_printf(m, " CSCMDOP: 0x%08x\n", ee->cscmdop); + err_printf(m, " CTX_SR_CTL: 0x%08x\n", ee->ctx_sr_ctl); + err_printf(m, " DMA_FADDR_HI: 0x%08x\n", ee->dma_faddr_hi); + err_printf(m, " DMA_FADDR_LO: 0x%08x\n", ee->dma_faddr_lo); + } if (HAS_PPGTT(m->i915)) { err_printf(m, " GFX_MODE: 0x%08x\n", ee->vm_info.gfx_mode); @@ -1224,6 +1233,16 @@ static void engine_record_registers(struct intel_engine_coredump *ee) ee->ipehr = ENGINE_READ(engine, IPEHR); } + if (GRAPHICS_VER(i915) >= 11) { + ee->cmd_cctl = ENGINE_READ(engine, RING_CMD_CCTL); + ee->cscmdop = ENGINE_READ(engine, RING_CSCMDOP); + ee->ctx_sr_ctl = ENGINE_READ(engine, RING_CTX_SR_CTL); + ee->dma_faddr_hi = ENGINE_READ(engine, RING_DMA_FADD_UDW); + ee->dma_faddr_lo = ENGINE_READ(engine, RING_DMA_FADD); + ee->nopid = ENGINE_READ(engine, RING_NOPID); + ee->excc = ENGINE_READ(engine, RING_EXCC); + } + intel_engine_get_instdone(engine, &ee->instdone); ee->instpm = ENGINE_READ(engine, RING_INSTPM); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index a611abacd9c2..55a143b92d10 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -84,6 +84,13 @@ struct intel_engine_coredump { u32 fault_reg; u64 faddr; u32 rc_psmi; /* sleep state */ + u32 nopid; + u32 excc; + u32 cmd_cctl; + u32 cscmdop; + u32 ctx_sr_ctl; + u32 dma_faddr_hi; + u32 dma_faddr_lo; struct intel_instdone instdone; /* GuC matched capture-lists info */ -- cgit v1.2.3 From f7dad0daf2c2368f72828b0729799f01bdcee33b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 2 Jun 2022 16:30:19 -0700 Subject: drm/i915/pvc: GuC depriv applies to PVC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We missed this setting in the initial device info patch's definition of XE_HPC_FEATURES. Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220602233019.1659283-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 177aefa6511e..b8244f72e432 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1090,6 +1090,7 @@ static const struct intel_device_info ats_m_info = { XE_HP_FEATURES, \ .dma_mask_size = 52, \ .has_3d_pipeline = 0, \ + .has_guc_deprivilege = 1, \ .has_l3_ccs_read = 1, \ .has_one_eu_per_fuse_bit = 1 -- cgit v1.2.3 From 34b68c17e9895ba66fc809224b0122a2eed7aa40 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 7 Jun 2022 12:22:07 +0300 Subject: drm/i915/client: only include what's needed Only the uapi header is required. Signed-off-by: Jani Nikula Acked-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20220607092207.476653-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_drm_client.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h index f796c5e8e060..69496af996d9 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.h +++ b/drivers/gpu/drm/i915/i915_drm_client.h @@ -11,7 +11,7 @@ #include #include -#include "gt/intel_engine_types.h" +#include #define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE -- cgit v1.2.3 From c6e3806705d679edf135dff5d540a278fc406f15 Mon Sep 17 00:00:00 2001 From: Anshuman Gupta Date: Tue, 7 Jun 2022 16:15:42 +0530 Subject: drm/i915/dg2: Add Wa_14015795083 i915 must disable Render DOP clock gating globally. v2: - Addressed cosmetic review comments. Bspec: 52621 Cc: Matt Roper Cc: Badal Nilawar Signed-off-by: Anshuman Gupta Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20220607104542.8559-1-anshuman.gupta@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 6aa1ceaa8d27..c8129a351731 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -631,6 +631,7 @@ #define GEN7_MISCCPCTL _MMIO(0x9424) #define GEN7_DOP_CLOCK_GATE_ENABLE (1 << 0) +#define GEN12_DOP_CLOCK_GATE_RENDER_ENABLE REG_BIT(1) #define GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE (1 << 2) #define GEN8_DOP_CLOCK_GATE_GUC_ENABLE (1 << 4) #define GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE (1 << 6) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 6e875d4f5f65..1e7ca3863f20 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1486,6 +1486,9 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) * performance guide section. */ wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS); + + /* Wa_14015795083 */ + wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); } static void -- cgit v1.2.3 From 81298056a78c5163b216f17d17c43736e7069961 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 7 Jun 2022 08:47:24 -0700 Subject: drm/i915/dg2: Correct DSS check for Wa_1308578152 When converting our DSS masks to bitmaps, we fumbled the condition used to check whether any DSS are present in the first gslice. Since intel_sseu_find_first_xehp_dss() returns a 0-based number, we need a >= condition rather than >. Fixes: b87d39019651 ("drm/i915/sseu: Disassociate internal subslice mask representation from uapi") Reported-by: Balasubramani Vivekanandan Signed-off-by: Matt Roper Acked-by: Balasubramani Vivekanandan Link: https://patchwork.freedesktop.org/patch/msgid/20220607154724.3155521-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 1e7ca3863f20..0ac7f5daacc4 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2062,7 +2062,7 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) static bool needs_wa_1308578152(struct intel_engine_cs *engine) { - return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) > + return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >= GEN_DSS_PER_GSLICE; } -- cgit v1.2.3 From 5821a0bbb4c39960975d29d6b58ae290088db0ed Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 11 May 2022 12:46:19 +0300 Subject: drm/i915/uc: remove accidental static from a local variable The arrays are static const, but the pointer shouldn't be static. Fixes: 3d832f370d16 ("drm/i915/uc: Allow platforms to have GuC but not HuC") Cc: John Harrison Cc: Lucas De Marchi Cc: Daniele Ceraolo Spurio Signed-off-by: Jani Nikula Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20220511094619.27889-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 9361532726d6..d2c5c9367cc4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -156,7 +156,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) [INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) }, [INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) }, }; - static const struct uc_fw_platform_requirement *fw_blobs; + const struct uc_fw_platform_requirement *fw_blobs; enum intel_platform p = INTEL_INFO(i915)->platform; u32 fw_count; u8 rev = INTEL_REVID(i915); -- cgit v1.2.3 From c5cb0002d14b6f7aabaf7d67d0515fe70aea7167 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 7 Jun 2022 17:51:08 -0700 Subject: drm/i915: More PVC+DG2 workarounds A new PVC+DG2 workaround has appeared recently: - Wa_16015675438 And a couple existing DG2 workarounds have been extended to PVC: - Wa_14015795083 - Wa_18018781329 Note that Wa_16015675438 asks us to program a register that is in the 0x2xxx range typically associated with the RCS engine, even though PVC does not have an RCS. By default the GuC will think we've made a mistake and throw an exception when it sees this register on a CCS engine's save/restore list, so we need to pass an extra GuC control flag to tell it that this is expected and not a problem. Signed-off-by: Anshuman Gupta Signed-off-by: Badal Nilawar Signed-off-by: Prathap Kumar Valsan Signed-off-by: Matt Roper Reviewed-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20220608005108.3717895-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 24 +++++++++++++++++------- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 4 ++++ drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 1 + 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index c8129a351731..226557018037 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -140,6 +140,7 @@ #define FF_SLICE_CS_CHICKEN2 _MMIO(0x20e4) #define GEN9_TSG_BARRIER_ACK_DISABLE (1 << 8) #define GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE (1 << 10) +#define GEN12_PERF_FIX_BALANCING_CFE_DISABLE REG_BIT(15) #define GEN9_CS_DEBUG_MODE1 _MMIO(0x20ec) #define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 0ac7f5daacc4..aeadbb3b72cb 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1491,13 +1491,20 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); } +static void +pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) +{ + /* Wa_14015795083 */ + wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); +} + static void gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) { struct drm_i915_private *i915 = gt->i915; if (IS_PONTEVECCHIO(i915)) - ; /* none yet */ + pvc_gt_workarounds_init(gt, wal); else if (IS_DG2(i915)) dg2_gt_workarounds_init(gt, wal); else if (IS_XEHPSDV(i915)) @@ -2082,12 +2089,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * performance guide section. */ wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); - - /* Wa_18018781329:dg2 */ - wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB); } if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { @@ -2700,6 +2701,15 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li /* Wa_22014226127:dg2,pvc */ wa_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); + + /* Wa_16015675438:dg2,pvc */ + wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE); + + /* Wa_18018781329:dg2,pvc */ + wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB); + wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB); } } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 2c4ad4a65089..35887cb53201 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -327,6 +327,10 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_FOREVER)) flags |= GUC_WA_CONTEXT_ISOLATION; + /* Wa_16015675438 */ + if (!RCS_MASK(gt)) + flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST; + return flags; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 42cb7a9a6199..b3c9a9327f76 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -105,6 +105,7 @@ #define GUC_WA_PRE_PARSER BIT(14) #define GUC_WA_HOLD_CCS_SWITCHOUT BIT(17) #define GUC_WA_POLLCS BIT(18) +#define GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21) #define GUC_CTL_FEATURE 2 #define GUC_CTL_ENABLE_SLPC BIT(2) -- cgit v1.2.3 From 17f65658c8adce6233f9e53be59d399af6180059 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 7 Jun 2022 10:57:16 -0700 Subject: drm/i915/xehp: Correct steering initialization Another mistake during the conversion to DSS bitmaps: after retrieving the DSS ID intel_sseu_find_first_xehp_dss() we forgot to modulo it down to obtain which ID within the current gslice it is. Fixes: b87d39019651 ("drm/i915/sseu: Disassociate internal subslice mask representation from uapi") Cc: Balasubramani Vivekanandan Signed-off-by: Matt Roper Acked-by: Balasubramani Vivekanandan Link: https://patchwork.freedesktop.org/patch/msgid/20220607175716.3338661-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index aeadbb3b72cb..1ee54a83e459 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1177,8 +1177,8 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) } slice = __ffs(slice_mask); - subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice); - WARN_ON(subslice > GEN_DSS_PER_GSLICE); + subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice) % + GEN_DSS_PER_GSLICE; __add_mcr_wa(gt, wal, slice, subslice); -- cgit v1.2.3 From e0d7371b46c7b47cdf5391717292033365801437 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 8 Jun 2022 10:07:00 -0700 Subject: drm/i915/pvc: Add register steering Ponte Vecchio no longer has MSLICE or LNCF steering, but the bspec does document several new types of multicast register ranges. Fortunately, most of the different MCR types all provide valid values at instance (0,0) so there's no need to read fuse registers and calculate a non-terminated instance. We'll lump all of those range types (BSLICE, HALFBSLICE, TILEPSMI, CC, and L3BANK) into a single category called "INSTANCE0" to keep things simple. We'll also perform explicit steering for each of these multicast register types, even if the implicit steering setup for COMPUTE/DSS ranges would have worked too; this is based on guidance from our hardware architects who suggested that we move away from implicit steering and start explicitly steer all MCR register accesses on modern platforms (we'll work on transitioning COMPUTE/DSS to explicit steering in the future). Note that there's one additional MCR range type defined in the bspec (SQIDI) that we don't handle here. Those ranges use a different steering control register that we never touch; since instance 0 is also always a valid setting there, we can just ignore those ranges. Finally, we'll rename the HAS_MSLICES() macro to HAS_MSLICE_STEERING(). PVC hardware still has units referred to as mslices, but there's no register steering based on mslice for this platform. v2: - Rebase on other recent changes - Swap two table rows to keep table sorted & easy to read. (Harish) Bspec: 67609 Signed-off-by: Matt Roper Reviewed-by: Harish Chegondi Link: https://patchwork.freedesktop.org/patch/msgid/20220608170700.4026648-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 50 +++++++++++++++++++++++++---- drivers/gpu/drm/i915/gt/intel_gt_types.h | 7 ++++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 16 +++++++++ drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/i915_pci.c | 3 +- drivers/gpu/drm/i915/intel_device_info.h | 2 +- 6 files changed, 71 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index ddfb98f70489..f33290358c51 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -106,6 +106,7 @@ static const char * const intel_steering_types[] = { "L3BANK", "MSLICE", "LNCF", + "INSTANCE 0", }; static const struct intel_mmio_range icl_l3bank_steering_table[] = { @@ -133,6 +134,27 @@ static const struct intel_mmio_range dg2_lncf_steering_table[] = { {}, }; +/* + * We have several types of MCR registers on PVC where steering to (0,0) + * will always provide us with a non-terminated value. We'll stick them + * all in the same table for simplicity. + */ +static const struct intel_mmio_range pvc_instance0_steering_table[] = { + { 0x004000, 0x004AFF }, /* HALF-BSLICE */ + { 0x008800, 0x00887F }, /* CC */ + { 0x008A80, 0x008AFF }, /* TILEPSMI */ + { 0x00B000, 0x00B0FF }, /* HALF-BSLICE */ + { 0x00B100, 0x00B3FF }, /* L3BANK */ + { 0x00C800, 0x00CFFF }, /* HALF-BSLICE */ + { 0x00D800, 0x00D8FF }, /* HALF-BSLICE */ + { 0x00DD00, 0x00DDFF }, /* BSLICE */ + { 0x00E900, 0x00E9FF }, /* HALF-BSLICE */ + { 0x00EC00, 0x00EEFF }, /* HALF-BSLICE */ + { 0x00F000, 0x00FFFF }, /* HALF-BSLICE */ + { 0x024180, 0x0241FF }, /* HALF-BSLICE */ + {}, +}; + int intel_gt_init_mmio(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; @@ -146,7 +168,7 @@ int intel_gt_init_mmio(struct intel_gt *gt) * An mslice is unavailable only if both the meml3 for the slice is * disabled *and* all of the DSS in the slice (quadrant) are disabled. */ - if (HAS_MSLICES(i915)) { + if (HAS_MSLICE_STEERING(i915)) { gt->info.mslice_mask = intel_slicemask_from_xehp_dssmask(gt->info.sseu.subslice_mask, GEN_DSS_PER_MSLICE); @@ -158,7 +180,9 @@ int intel_gt_init_mmio(struct intel_gt *gt) drm_warn(&i915->drm, "mslice mask all zero!\n"); } - if (IS_DG2(i915)) { + if (IS_PONTEVECCHIO(i915)) { + gt->steering_table[INSTANCE0] = pvc_instance0_steering_table; + } else if (IS_DG2(i915)) { gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table; gt->steering_table[LNCF] = dg2_lncf_steering_table; } else if (IS_XEHPSDV(i915)) { @@ -172,7 +196,11 @@ int intel_gt_init_mmio(struct intel_gt *gt) GEN10_L3BANK_MASK; if (!gt->info.l3bank_mask) /* should be impossible! */ drm_warn(&i915->drm, "L3 bank mask is all zero!\n"); - } else if (HAS_MSLICES(i915)) { + } else if (GRAPHICS_VER(i915) >= 11) { + /* + * We expect all modern platforms to have at least some + * type of steering that needs to be initialized. + */ MISSING_CASE(INTEL_INFO(i915)->platform); } @@ -888,7 +916,7 @@ static void intel_gt_get_valid_steering(struct intel_gt *gt, *subsliceid = __ffs(gt->info.l3bank_mask); break; case MSLICE: - GEM_WARN_ON(!HAS_MSLICES(gt->i915)); + GEM_WARN_ON(!HAS_MSLICE_STEERING(gt->i915)); *sliceid = __ffs(gt->info.mslice_mask); *subsliceid = 0; /* unused */ break; @@ -897,10 +925,18 @@ static void intel_gt_get_valid_steering(struct intel_gt *gt, * An LNCF is always present if its mslice is present, so we * can safely just steer to LNCF 0 in all cases. */ - GEM_WARN_ON(!HAS_MSLICES(gt->i915)); + GEM_WARN_ON(!HAS_MSLICE_STEERING(gt->i915)); *sliceid = __ffs(gt->info.mslice_mask) << 1; *subsliceid = 0; /* unused */ break; + case INSTANCE0: + /* + * There are a lot of MCR types for which instance (0, 0) + * will always provide a non-terminated value. + */ + *sliceid = 0; + *subsliceid = 0; + break; default: MISSING_CASE(type); *sliceid = 0; @@ -1020,7 +1056,9 @@ void intel_gt_report_steering(struct drm_printer *p, struct intel_gt *gt, gt->default_steering.groupid, gt->default_steering.instanceid); - if (HAS_MSLICES(gt->i915)) { + if (IS_PONTEVECCHIO(gt->i915)) { + report_steering_type(p, gt, INSTANCE0, dump_table); + } else if (HAS_MSLICE_STEERING(gt->i915)) { report_steering_type(p, gt, MSLICE, dump_table); report_steering_type(p, gt, LNCF, dump_table); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 993f003dad1d..df708802889d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -59,6 +59,13 @@ enum intel_steering_type { MSLICE, LNCF, + /* + * On some platforms there are multiple types of MCR registers that + * will always return a non-terminated value at instance (0, 0). We'll + * lump those all into a single category to keep things simple. + */ + INSTANCE0, + NUM_STEERING_TYPES }; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 1ee54a83e459..1e982ac931dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1195,6 +1195,20 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) __set_mcr_steering(wal, SF_MCR_SELECTOR, 0, 2); } +static void +pvc_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) +{ + unsigned int dss; + + /* + * Setup implicit steering for COMPUTE and DSS ranges to the first + * non-fused-off DSS. All other types of MCR registers will be + * explicitly steered. + */ + dss = intel_sseu_find_first_xehp_dss(>->info.sseu, 0, 0); + __add_mcr_wa(gt, wal, dss / GEN_DSS_PER_CSLICE, dss % GEN_DSS_PER_CSLICE); +} + static void icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { @@ -1494,6 +1508,8 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) static void pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { + pvc_init_mcr(gt, wal); + /* Wa_14015795083 */ wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fbea4d1ede7c..59cd888209fe 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1346,8 +1346,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm) #define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc) -#define HAS_MSLICES(dev_priv) \ - (INTEL_INFO(dev_priv)->has_mslices) +#define HAS_MSLICE_STEERING(dev_priv) (INTEL_INFO(dev_priv)->has_mslice_steering) /* * Set this flag, when platform requires 64K GTT page sizes or larger for diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index b8244f72e432..d6d875b2d379 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1021,7 +1021,7 @@ static const struct intel_device_info adl_p_info = { .has_llc = 1, \ .has_logical_ring_contexts = 1, \ .has_logical_ring_elsq = 1, \ - .has_mslices = 1, \ + .has_mslice_steering = 1, \ .has_rc6 = 1, \ .has_reset_engine = 1, \ .has_rps = 1, \ @@ -1092,6 +1092,7 @@ static const struct intel_device_info ats_m_info = { .has_3d_pipeline = 0, \ .has_guc_deprivilege = 1, \ .has_l3_ccs_read = 1, \ + .has_mslice_steering = 0, \ .has_one_eu_per_fuse_bit = 1 __maybe_unused diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 346f17f2dce8..08341174ee0a 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -157,7 +157,7 @@ enum intel_ppgtt_type { func(has_logical_ring_contexts); \ func(has_logical_ring_elsq); \ func(has_media_ratio_mode); \ - func(has_mslices); \ + func(has_mslice_steering); \ func(has_one_eu_per_fuse_bit); \ func(has_pooled_eu); \ func(has_pxp); \ -- cgit v1.2.3 From 9affc1b87ecba31458567359b5a28b0b08920a24 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 10 Jun 2022 16:08:01 -0700 Subject: drm/i915/pvc: Adjust EU per SS according to HAS_ONE_EU_PER_FUSE_BIT() If we're treating each bit in the EU fuse register as a single EU instead of a pair of EUs, then that also cuts the number of potential EUs per subslice in half. Fixes: 5ac342ef84d7 ("drm/i915/pvc: Add SSEU changes") Signed-off-by: Matt Roper Reviewed-by: Anusha Srivatsa Link: https://patchwork.freedesktop.org/patch/msgid/20220610230801.459577-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_sseu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 7ef75f0d9c9e..c6d3050604c8 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -229,7 +229,7 @@ static void xehp_sseu_info_init(struct intel_gt *gt) */ intel_sseu_set_info(sseu, 1, 32 * max(num_geometry_regs, num_compute_regs), - 16); + HAS_ONE_EU_PER_FUSE_BIT(gt->i915) ? 8 : 16); sseu->has_xehp_dss = 1; xehp_load_dss_mask(uncore, &sseu->geometry_subslice_mask, -- cgit v1.2.3 From 1556c3b4c7ed2c8f17f200d53897251fc68b7377 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Jun 2022 09:53:14 -0700 Subject: drm/i915/pvc: Add recommended MMIO setting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As with past platforms, the bspec's performance tuning guide provides recommended MMIO settings. Although not technically "workarounds" we apply these through the workaround framework to ensure that they're re-applied at the proper times (e.g., on engine resets) and that any conflicts with real workarounds are flagged. Bspec: 72161 Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220613165314.862029-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 5 +++++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 9 +++++++++ 2 files changed, 14 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 226557018037..07ef111947b8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -981,6 +981,11 @@ #define XEHP_L3SCQREG7 _MMIO(0xb188) #define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3) +#define XEHPC_L3SCRUB _MMIO(0xb18c) +#define SCRUB_CL_DWNGRADE_SHARED REG_BIT(12) +#define SCRUB_RATE_PER_BANK_MASK REG_GENMASK(2, 0) +#define SCRUB_RATE_4B_PER_CLK REG_FIELD_PREP(SCRUB_RATE_PER_BANK_MASK, 0x6) + #define L3SQCREG1_CCS0 _MMIO(0xb200) #define FLUSHALLNONCOH REG_BIT(5) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 1e982ac931dc..c4af51144216 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2679,6 +2679,15 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li { struct drm_i915_private *i915 = engine->i915; + if (IS_PONTEVECCHIO(i915)) { + /* + * The following is not actually a "workaround" but rather + * a recommended tuning setting documented in the bspec's + * performance guide section. + */ + wa_write(wal, XEHPC_L3SCRUB, SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK); + } + if (IS_XEHPSDV(i915)) { /* Wa_1409954639 */ wa_masked_en(wal, -- cgit v1.2.3 From 45c64ecf97ee370bbdbd8eed7aed9c8ff5d1b0dd Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 27 May 2022 08:24:52 +0100 Subject: drm/i915: Improve user experience and driver robustness under SIGINT or similar We have long standing customer complaints that pressing Ctrl-C (or to the effect of) causes engine resets with otherwise well behaving programs. Not only is logging engine resets during normal operation not desirable since it creates support incidents, but more fundamentally we should avoid going the engine reset path when we can since any engine reset introduces a chance of harming an innocent context. Reason for this undesirable behaviour is that the driver currently does not distinguish between banned contexts and non-persistent contexts which have been closed. To fix this we add the distinction between the two reasons for revoking contexts, which then allows the strict timeout only be applied to banned, while innocent contexts (well behaving) can preempt cleanly and exit without triggering the engine reset path. Note that the added context exiting category applies both to closed non- persistent context, and any exiting context when hangcheck has been disabled by the user. At the same time we rename the backend operation from 'ban' to 'revoke' which more accurately describes the actual semantics. (There is no ban at the backend level since banning is a concept driven by the scheduling frontend. Backends are simply able to revoke a running context so that is the more appropriate name chosen.) Signed-off-by: Tvrtko Ursulin Reviewed-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20220527072452.2225610-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 23 +++++++++++++------- drivers/gpu/drm/i915/gt/intel_context.c | 24 +++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_context.h | 25 ++++++++++++++++------ drivers/gpu/drm/i915/gt/intel_context_types.h | 4 +++- .../gpu/drm/i915/gt/intel_execlists_submission.c | 6 +++--- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 7 +++--- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 15 +++++++------ drivers/gpu/drm/i915/i915_request.c | 2 +- 8 files changed, 77 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index a3bb73f5d53b..e116f82fc37c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1367,7 +1367,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce) return engine; } -static void kill_engines(struct i915_gem_engines *engines, bool ban) +static void +kill_engines(struct i915_gem_engines *engines, bool exit, bool persistent) { struct i915_gem_engines_iter it; struct intel_context *ce; @@ -1381,9 +1382,15 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) */ for_each_gem_engine(ce, engines, it) { struct intel_engine_cs *engine; + bool skip = false; - if (ban && intel_context_ban(ce, NULL)) - continue; + if (exit) + skip = intel_context_set_exiting(ce); + else if (!persistent) + skip = intel_context_exit_nonpersistent(ce, NULL); + + if (skip) + continue; /* Already marked. */ /* * Check the current active state of this context; if we @@ -1395,7 +1402,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) engine = active_engine(ce); /* First attempt to gracefully cancel the context */ - if (engine && !__cancel_engine(engine) && ban) + if (engine && !__cancel_engine(engine) && (exit || !persistent)) /* * If we are unable to send a preemptive pulse to bump * the context from the GPU, we have to resort to a full @@ -1407,8 +1414,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) static void kill_context(struct i915_gem_context *ctx) { - bool ban = (!i915_gem_context_is_persistent(ctx) || - !ctx->i915->params.enable_hangcheck); struct i915_gem_engines *pos, *next; spin_lock_irq(&ctx->stale.lock); @@ -1421,7 +1426,8 @@ static void kill_context(struct i915_gem_context *ctx) spin_unlock_irq(&ctx->stale.lock); - kill_engines(pos, ban); + kill_engines(pos, !ctx->i915->params.enable_hangcheck, + i915_gem_context_is_persistent(ctx)); spin_lock_irq(&ctx->stale.lock); GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence)); @@ -1467,7 +1473,8 @@ static void engines_idle_release(struct i915_gem_context *ctx, kill: if (list_empty(&engines->link)) /* raced, already closed */ - kill_engines(engines, true); + kill_engines(engines, true, + i915_gem_context_is_persistent(ctx)); i915_sw_fence_commit(&engines->fence); } diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 4070cb5711d8..654a092ed3d6 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -601,6 +601,30 @@ u64 intel_context_get_avg_runtime_ns(struct intel_context *ce) return avg; } +bool intel_context_ban(struct intel_context *ce, struct i915_request *rq) +{ + bool ret = intel_context_set_banned(ce); + + trace_intel_context_ban(ce); + + if (ce->ops->revoke) + ce->ops->revoke(ce, rq, + INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS); + + return ret; +} + +bool intel_context_exit_nonpersistent(struct intel_context *ce, + struct i915_request *rq) +{ + bool ret = intel_context_set_exiting(ce); + + if (ce->ops->revoke) + ce->ops->revoke(ce, rq, ce->engine->props.preempt_timeout_ms); + + return ret; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_context.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index b7d3214d2cdd..8e2d70630c49 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -25,6 +25,8 @@ ##__VA_ARGS__); \ } while (0) +#define INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS (1) + struct i915_gem_ww_ctx; void intel_context_init(struct intel_context *ce, @@ -309,18 +311,27 @@ static inline bool intel_context_set_banned(struct intel_context *ce) return test_and_set_bit(CONTEXT_BANNED, &ce->flags); } -static inline bool intel_context_ban(struct intel_context *ce, - struct i915_request *rq) +bool intel_context_ban(struct intel_context *ce, struct i915_request *rq); + +static inline bool intel_context_is_schedulable(const struct intel_context *ce) { - bool ret = intel_context_set_banned(ce); + return !test_bit(CONTEXT_EXITING, &ce->flags) && + !test_bit(CONTEXT_BANNED, &ce->flags); +} - trace_intel_context_ban(ce); - if (ce->ops->ban) - ce->ops->ban(ce, rq); +static inline bool intel_context_is_exiting(const struct intel_context *ce) +{ + return test_bit(CONTEXT_EXITING, &ce->flags); +} - return ret; +static inline bool intel_context_set_exiting(struct intel_context *ce) +{ + return test_and_set_bit(CONTEXT_EXITING, &ce->flags); } +bool intel_context_exit_nonpersistent(struct intel_context *ce, + struct i915_request *rq); + static inline bool intel_context_force_single_submission(const struct intel_context *ce) { diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 09f82545789f..d2d75d9c0c8d 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -40,7 +40,8 @@ struct intel_context_ops { int (*alloc)(struct intel_context *ce); - void (*ban)(struct intel_context *ce, struct i915_request *rq); + void (*revoke)(struct intel_context *ce, struct i915_request *rq, + unsigned int preempt_timeout_ms); int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr); int (*pin)(struct intel_context *ce, void *vaddr); @@ -122,6 +123,7 @@ struct intel_context { #define CONTEXT_GUC_INIT 10 #define CONTEXT_PERMA_PIN 11 #define CONTEXT_IS_PARKING 12 +#define CONTEXT_EXITING 13 struct { u64 timeout_us; diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 2b0266cab66b..98f1c7258cc0 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -480,9 +480,9 @@ __execlists_schedule_in(struct i915_request *rq) if (unlikely(intel_context_is_closed(ce) && !intel_engine_has_heartbeat(engine))) - intel_context_set_banned(ce); + intel_context_set_exiting(ce); - if (unlikely(intel_context_is_banned(ce) || bad_request(rq))) + if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq))) reset_active(rq, engine); if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) @@ -1243,7 +1243,7 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine, /* Force a fast reset for terminated contexts (ignoring sysfs!) */ if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq))) - return 1; + return INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS; return READ_ONCE(engine->props.preempt_timeout_ms); } diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index f8f279a195c0..d5d6f1fadcae 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -598,8 +598,9 @@ static void ring_context_reset(struct intel_context *ce) clear_bit(CONTEXT_VALID_BIT, &ce->flags); } -static void ring_context_ban(struct intel_context *ce, - struct i915_request *rq) +static void ring_context_revoke(struct intel_context *ce, + struct i915_request *rq, + unsigned int preempt_timeout_ms) { struct intel_engine_cs *engine; @@ -634,7 +635,7 @@ static const struct intel_context_ops ring_context_ops = { .cancel_request = ring_context_cancel_request, - .ban = ring_context_ban, + .revoke = ring_context_revoke, .pre_pin = ring_context_pre_pin, .pin = ring_context_pin, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 5a1dfacf24ea..e62ea35513ea 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2790,7 +2790,9 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc, __guc_context_set_context_policies(guc, &policy, true); } -static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) +static void +guc_context_revoke(struct intel_context *ce, struct i915_request *rq, + unsigned int preempt_timeout_ms) { struct intel_guc *guc = ce_to_guc(ce); struct intel_runtime_pm *runtime_pm = @@ -2829,7 +2831,8 @@ static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) * gets kicked off the HW ASAP. */ with_intel_runtime_pm(runtime_pm, wakeref) { - __guc_context_set_preemption_timeout(guc, guc_id, 1); + __guc_context_set_preemption_timeout(guc, guc_id, + preempt_timeout_ms); __guc_context_sched_disable(guc, ce, guc_id); } } else { @@ -2837,7 +2840,7 @@ static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) with_intel_runtime_pm(runtime_pm, wakeref) __guc_context_set_preemption_timeout(guc, ce->guc_id.id, - 1); + preempt_timeout_ms); spin_unlock_irqrestore(&ce->guc_state.lock, flags); } } @@ -3190,7 +3193,7 @@ static const struct intel_context_ops guc_context_ops = { .unpin = guc_context_unpin, .post_unpin = guc_context_post_unpin, - .ban = guc_context_ban, + .revoke = guc_context_revoke, .cancel_request = guc_context_cancel_request, @@ -3439,7 +3442,7 @@ static const struct intel_context_ops virtual_guc_context_ops = { .unpin = guc_virtual_context_unpin, .post_unpin = guc_context_post_unpin, - .ban = guc_context_ban, + .revoke = guc_context_revoke, .cancel_request = guc_context_cancel_request, @@ -3528,7 +3531,7 @@ static const struct intel_context_ops virtual_parent_context_ops = { .unpin = guc_parent_context_unpin, .post_unpin = guc_context_post_unpin, - .ban = guc_context_ban, + .revoke = guc_context_revoke, .cancel_request = guc_context_cancel_request, diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 73d5195146b0..c3937640b119 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -611,7 +611,7 @@ bool __i915_request_submit(struct i915_request *request) goto active; } - if (unlikely(intel_context_is_banned(request->context))) + if (unlikely(!intel_context_is_schedulable(request->context))) i915_request_set_error_once(request, -EIO); if (unlikely(fatal_error(request->fence.error))) -- cgit v1.2.3 From 9f1b1d0b2242171b2891a0398def233801601c14 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 16 Jun 2022 15:00:56 +0100 Subject: drm/i915/fdinfo: Don't show engine classes not present Stop displaying engine classes with no engines - it is not a huge problem if they are shown, since the values will correctly be all zeroes, but it does count as misleading. Signed-off-by: Tvrtko Ursulin Fixes: 055634e4b62f ("drm/i915: Expose client engine utilisation via fdinfo") Cc: Umesh Nerlige Ramappa Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20220616140056.559074-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_drm_client.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c index 18d38cb59923..b09d1d386574 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.c +++ b/drivers/gpu/drm/i915/i915_drm_client.c @@ -116,8 +116,9 @@ show_client_class(struct seq_file *m, total += busy_add(ctx, class); rcu_read_unlock(); - seq_printf(m, "drm-engine-%s:\t%llu ns\n", - uabi_class_names[class], total); + if (capacity) + seq_printf(m, "drm-engine-%s:\t%llu ns\n", + uabi_class_names[class], total); if (capacity > 1) seq_printf(m, "drm-engine-capacity-%s:\t%u\n", -- cgit v1.2.3 From e7858254f9af9ad4f1570d781666e3af4c298a88 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 14 Jun 2022 17:10:18 -0700 Subject: drm/i915/gt: Move multicast register handling to a dedicated file Handling of multicast/replicated registers is spread across intel_gt.c and intel_uncore.c today. As multicast handling and the related steering logic gets more complicated with the addition of new platforms and new rules it makes sense to centralize it all in one place. For now the existing functions have been moved to the new .c/.h as-is. Function renames and updates to operate in a more consistent manner will be done in subsequent patches. Signed-off-by: Matt Roper Acked-by: Jani Nikula Reviewed-by: Harish Chegondi Link: https://patchwork.freedesktop.org/patch/msgid/20220615001019.1821989-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 1 + drivers/gpu/drm/i915/gt/intel_engine_cs.c | 3 +- drivers/gpu/drm/i915/gt/intel_gt.c | 297 +----------------- drivers/gpu/drm/i915/gt/intel_gt.h | 15 - drivers/gpu/drm/i915/gt/intel_gt_debugfs.c | 1 + drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 448 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt_mcr.h | 37 +++ drivers/gpu/drm/i915/gt/intel_region_lmem.c | 1 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 2 - drivers/gpu/drm/i915/intel_uncore.c | 112 ------- drivers/gpu/drm/i915/intel_uncore.h | 8 - 14 files changed, 495 insertions(+), 433 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_mcr.c create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_mcr.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index d2b18f03a33c..08f5d0d6e83a 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -103,6 +103,7 @@ gt-y += \ gt/intel_gt_debugfs.o \ gt/intel_gt_engines_debugfs.o \ gt/intel_gt_irq.o \ + gt/intel_gt_mcr.o \ gt/intel_gt_pm.o \ gt/intel_gt_pm_debugfs.o \ gt/intel_gt_pm_irq.o \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 47b5e0e342ab..da30503d3ca2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -13,6 +13,7 @@ #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_mcr.h" #include "gt/intel_region_lmem.h" #include "i915_drv.h" #include "i915_gem_stolen.h" diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index f0acf8518a51..244af1bdb7db 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -21,8 +21,9 @@ #include "intel_engine_user.h" #include "intel_execlists_submission.h" #include "intel_gt.h" -#include "intel_gt_requests.h" +#include "intel_gt_mcr.h" #include "intel_gt_pm.h" +#include "intel_gt_requests.h" #include "intel_lrc.h" #include "intel_lrc_reg.h" #include "intel_reset.h" diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index f33290358c51..be9877c4b496 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -17,6 +17,7 @@ #include "intel_gt_clock_utils.h" #include "intel_gt_debugfs.h" #include "intel_gt_gmch.h" +#include "intel_gt_mcr.h" #include "intel_gt_pm.h" #include "intel_gt_regs.h" #include "intel_gt_requests.h" @@ -102,107 +103,13 @@ int intel_gt_assign_ggtt(struct intel_gt *gt) return gt->ggtt ? 0 : -ENOMEM; } -static const char * const intel_steering_types[] = { - "L3BANK", - "MSLICE", - "LNCF", - "INSTANCE 0", -}; - -static const struct intel_mmio_range icl_l3bank_steering_table[] = { - { 0x00B100, 0x00B3FF }, - {}, -}; - -static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = { - { 0x004000, 0x004AFF }, - { 0x00C800, 0x00CFFF }, - { 0x00DD00, 0x00DDFF }, - { 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */ - {}, -}; - -static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = { - { 0x00B000, 0x00B0FF }, - { 0x00D800, 0x00D8FF }, - {}, -}; - -static const struct intel_mmio_range dg2_lncf_steering_table[] = { - { 0x00B000, 0x00B0FF }, - { 0x00D880, 0x00D8FF }, - {}, -}; - -/* - * We have several types of MCR registers on PVC where steering to (0,0) - * will always provide us with a non-terminated value. We'll stick them - * all in the same table for simplicity. - */ -static const struct intel_mmio_range pvc_instance0_steering_table[] = { - { 0x004000, 0x004AFF }, /* HALF-BSLICE */ - { 0x008800, 0x00887F }, /* CC */ - { 0x008A80, 0x008AFF }, /* TILEPSMI */ - { 0x00B000, 0x00B0FF }, /* HALF-BSLICE */ - { 0x00B100, 0x00B3FF }, /* L3BANK */ - { 0x00C800, 0x00CFFF }, /* HALF-BSLICE */ - { 0x00D800, 0x00D8FF }, /* HALF-BSLICE */ - { 0x00DD00, 0x00DDFF }, /* BSLICE */ - { 0x00E900, 0x00E9FF }, /* HALF-BSLICE */ - { 0x00EC00, 0x00EEFF }, /* HALF-BSLICE */ - { 0x00F000, 0x00FFFF }, /* HALF-BSLICE */ - { 0x024180, 0x0241FF }, /* HALF-BSLICE */ - {}, -}; - int intel_gt_init_mmio(struct intel_gt *gt) { - struct drm_i915_private *i915 = gt->i915; - intel_gt_init_clock_frequency(gt); intel_uc_init_mmio(>->uc); intel_sseu_info_init(gt); - - /* - * An mslice is unavailable only if both the meml3 for the slice is - * disabled *and* all of the DSS in the slice (quadrant) are disabled. - */ - if (HAS_MSLICE_STEERING(i915)) { - gt->info.mslice_mask = - intel_slicemask_from_xehp_dssmask(gt->info.sseu.subslice_mask, - GEN_DSS_PER_MSLICE); - gt->info.mslice_mask |= - (intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) & - GEN12_MEML3_EN_MASK); - - if (!gt->info.mslice_mask) /* should be impossible! */ - drm_warn(&i915->drm, "mslice mask all zero!\n"); - } - - if (IS_PONTEVECCHIO(i915)) { - gt->steering_table[INSTANCE0] = pvc_instance0_steering_table; - } else if (IS_DG2(i915)) { - gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table; - gt->steering_table[LNCF] = dg2_lncf_steering_table; - } else if (IS_XEHPSDV(i915)) { - gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table; - gt->steering_table[LNCF] = xehpsdv_lncf_steering_table; - } else if (GRAPHICS_VER(i915) >= 11 && - GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) { - gt->steering_table[L3BANK] = icl_l3bank_steering_table; - gt->info.l3bank_mask = - ~intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) & - GEN10_L3BANK_MASK; - if (!gt->info.l3bank_mask) /* should be impossible! */ - drm_warn(&i915->drm, "L3 bank mask is all zero!\n"); - } else if (GRAPHICS_VER(i915) >= 11) { - /* - * We expect all modern platforms to have at least some - * type of steering that needs to be initialized. - */ - MISSING_CASE(INTEL_INFO(i915)->platform); - } + intel_gt_mcr_init(gt); return intel_engines_init_mmio(gt); } @@ -864,206 +771,6 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915) } } -/** - * intel_gt_reg_needs_read_steering - determine whether a register read - * requires explicit steering - * @gt: GT structure - * @reg: the register to check steering requirements for - * @type: type of multicast steering to check - * - * Determines whether @reg needs explicit steering of a specific type for - * reads. - * - * Returns false if @reg does not belong to a register range of the given - * steering type, or if the default (subslice-based) steering IDs are suitable - * for @type steering too. - */ -static bool intel_gt_reg_needs_read_steering(struct intel_gt *gt, - i915_reg_t reg, - enum intel_steering_type type) -{ - const u32 offset = i915_mmio_reg_offset(reg); - const struct intel_mmio_range *entry; - - if (likely(!intel_gt_needs_read_steering(gt, type))) - return false; - - for (entry = gt->steering_table[type]; entry->end; entry++) { - if (offset >= entry->start && offset <= entry->end) - return true; - } - - return false; -} - -/** - * intel_gt_get_valid_steering - determines valid IDs for a class of MCR steering - * @gt: GT structure - * @type: multicast register type - * @sliceid: Slice ID returned - * @subsliceid: Subslice ID returned - * - * Determines sliceid and subsliceid values that will steer reads - * of a specific multicast register class to a valid value. - */ -static void intel_gt_get_valid_steering(struct intel_gt *gt, - enum intel_steering_type type, - u8 *sliceid, u8 *subsliceid) -{ - switch (type) { - case L3BANK: - *sliceid = 0; /* unused */ - *subsliceid = __ffs(gt->info.l3bank_mask); - break; - case MSLICE: - GEM_WARN_ON(!HAS_MSLICE_STEERING(gt->i915)); - *sliceid = __ffs(gt->info.mslice_mask); - *subsliceid = 0; /* unused */ - break; - case LNCF: - /* - * An LNCF is always present if its mslice is present, so we - * can safely just steer to LNCF 0 in all cases. - */ - GEM_WARN_ON(!HAS_MSLICE_STEERING(gt->i915)); - *sliceid = __ffs(gt->info.mslice_mask) << 1; - *subsliceid = 0; /* unused */ - break; - case INSTANCE0: - /* - * There are a lot of MCR types for which instance (0, 0) - * will always provide a non-terminated value. - */ - *sliceid = 0; - *subsliceid = 0; - break; - default: - MISSING_CASE(type); - *sliceid = 0; - *subsliceid = 0; - } -} - -/** - * intel_gt_read_register_fw - reads a GT register with support for multicast - * @gt: GT structure - * @reg: register to read - * - * This function will read a GT register. If the register is a multicast - * register, the read will be steered to a valid instance (i.e., one that - * isn't fused off or powered down by power gating). - * - * Returns the value from a valid instance of @reg. - */ -u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg) -{ - int type; - u8 sliceid, subsliceid; - - for (type = 0; type < NUM_STEERING_TYPES; type++) { - if (intel_gt_reg_needs_read_steering(gt, reg, type)) { - intel_gt_get_valid_steering(gt, type, &sliceid, - &subsliceid); - return intel_uncore_read_with_mcr_steering_fw(gt->uncore, - reg, - sliceid, - subsliceid); - } - } - - return intel_uncore_read_fw(gt->uncore, reg); -} - -/** - * intel_gt_get_valid_steering_for_reg - get a valid steering for a register - * @gt: GT structure - * @reg: register for which the steering is required - * @sliceid: return variable for slice steering - * @subsliceid: return variable for subslice steering - * - * This function returns a slice/subslice pair that is guaranteed to work for - * read steering of the given register. Note that a value will be returned even - * if the register is not replicated and therefore does not actually require - * steering. - */ -void intel_gt_get_valid_steering_for_reg(struct intel_gt *gt, i915_reg_t reg, - u8 *sliceid, u8 *subsliceid) -{ - int type; - - for (type = 0; type < NUM_STEERING_TYPES; type++) { - if (intel_gt_reg_needs_read_steering(gt, reg, type)) { - intel_gt_get_valid_steering(gt, type, sliceid, - subsliceid); - return; - } - } - - *sliceid = gt->default_steering.groupid; - *subsliceid = gt->default_steering.instanceid; -} - -u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg) -{ - int type; - u8 sliceid, subsliceid; - - for (type = 0; type < NUM_STEERING_TYPES; type++) { - if (intel_gt_reg_needs_read_steering(gt, reg, type)) { - intel_gt_get_valid_steering(gt, type, &sliceid, - &subsliceid); - return intel_uncore_read_with_mcr_steering(gt->uncore, - reg, - sliceid, - subsliceid); - } - } - - return intel_uncore_read(gt->uncore, reg); -} - -static void report_steering_type(struct drm_printer *p, - struct intel_gt *gt, - enum intel_steering_type type, - bool dump_table) -{ - const struct intel_mmio_range *entry; - u8 slice, subslice; - - BUILD_BUG_ON(ARRAY_SIZE(intel_steering_types) != NUM_STEERING_TYPES); - - if (!gt->steering_table[type]) { - drm_printf(p, "%s steering: uses default steering\n", - intel_steering_types[type]); - return; - } - - intel_gt_get_valid_steering(gt, type, &slice, &subslice); - drm_printf(p, "%s steering: sliceid=0x%x, subsliceid=0x%x\n", - intel_steering_types[type], slice, subslice); - - if (!dump_table) - return; - - for (entry = gt->steering_table[type]; entry->end; entry++) - drm_printf(p, "\t0x%06x - 0x%06x\n", entry->start, entry->end); -} - -void intel_gt_report_steering(struct drm_printer *p, struct intel_gt *gt, - bool dump_table) -{ - drm_printf(p, "Default steering: sliceid=0x%x, subsliceid=0x%x\n", - gt->default_steering.groupid, - gt->default_steering.instanceid); - - if (IS_PONTEVECCHIO(gt->i915)) { - report_steering_type(p, gt, INSTANCE0, dump_table); - } else if (HAS_MSLICE_STEERING(gt->i915)) { - report_steering_type(p, gt, MSLICE, dump_table); - report_steering_type(p, gt, LNCF, dump_table); - } -} - static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr) { int ret; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 44c6cb63ccbc..61d30d5c7e90 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -93,21 +93,6 @@ static inline bool intel_gt_is_wedged(const struct intel_gt *gt) return unlikely(test_bit(I915_WEDGED, >->reset.flags)); } -static inline bool intel_gt_needs_read_steering(struct intel_gt *gt, - enum intel_steering_type type) -{ - return gt->steering_table[type]; -} - -void intel_gt_get_valid_steering_for_reg(struct intel_gt *gt, i915_reg_t reg, - u8 *sliceid, u8 *subsliceid); - -u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg); -u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg); - -void intel_gt_report_steering(struct drm_printer *p, struct intel_gt *gt, - bool dump_table); - int intel_gt_probe_all(struct drm_i915_private *i915); int intel_gt_tiles_init(struct drm_i915_private *i915); void intel_gt_release_all(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c index d886fdc2c694..ea07f2bb846f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c @@ -9,6 +9,7 @@ #include "intel_gt.h" #include "intel_gt_debugfs.h" #include "intel_gt_engines_debugfs.h" +#include "intel_gt_mcr.h" #include "intel_gt_pm_debugfs.h" #include "intel_sseu_debugfs.h" #include "pxp/intel_pxp_debugfs.h" diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c new file mode 100644 index 000000000000..1279a1fe1001 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -0,0 +1,448 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "i915_drv.h" + +#include "intel_gt_mcr.h" +#include "intel_gt_regs.h" + +/** + * DOC: GT Multicast/Replicated (MCR) Register Support + * + * Some GT registers are designed as "multicast" or "replicated" registers: + * multiple instances of the same register share a single MMIO offset. MCR + * registers are generally used when the hardware needs to potentially track + * independent values of a register per hardware unit (e.g., per-subslice, + * per-L3bank, etc.). The specific types of replication that exist vary + * per-platform. + * + * MMIO accesses to MCR registers are controlled according to the settings + * programmed in the platform's MCR_SELECTOR register(s). MMIO writes to MCR + * registers can be done in either a (i.e., a single write updates all + * instances of the register to the same value) or unicast (a write updates only + * one specific instance). Reads of MCR registers always operate in a unicast + * manner regardless of how the multicast/unicast bit is set in MCR_SELECTOR. + * Selection of a specific MCR instance for unicast operations is referred to + * as "steering." + * + * If MCR register operations are steered toward a hardware unit that is + * fused off or currently powered down due to power gating, the MMIO operation + * is "terminated" by the hardware. Terminated read operations will return a + * value of zero and terminated unicast write operations will be silently + * ignored. + */ + +#define HAS_MSLICE_STEERING(dev_priv) (INTEL_INFO(dev_priv)->has_mslice_steering) + +static const char * const intel_steering_types[] = { + "L3BANK", + "MSLICE", + "LNCF", + "INSTANCE 0", +}; + +static const struct intel_mmio_range icl_l3bank_steering_table[] = { + { 0x00B100, 0x00B3FF }, + {}, +}; + +static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = { + { 0x004000, 0x004AFF }, + { 0x00C800, 0x00CFFF }, + { 0x00DD00, 0x00DDFF }, + { 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */ + {}, +}; + +static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = { + { 0x00B000, 0x00B0FF }, + { 0x00D800, 0x00D8FF }, + {}, +}; + +static const struct intel_mmio_range dg2_lncf_steering_table[] = { + { 0x00B000, 0x00B0FF }, + { 0x00D880, 0x00D8FF }, + {}, +}; + +/* + * We have several types of MCR registers on PVC where steering to (0,0) + * will always provide us with a non-terminated value. We'll stick them + * all in the same table for simplicity. + */ +static const struct intel_mmio_range pvc_instance0_steering_table[] = { + { 0x004000, 0x004AFF }, /* HALF-BSLICE */ + { 0x008800, 0x00887F }, /* CC */ + { 0x008A80, 0x008AFF }, /* TILEPSMI */ + { 0x00B000, 0x00B0FF }, /* HALF-BSLICE */ + { 0x00B100, 0x00B3FF }, /* L3BANK */ + { 0x00C800, 0x00CFFF }, /* HALF-BSLICE */ + { 0x00D800, 0x00D8FF }, /* HALF-BSLICE */ + { 0x00DD00, 0x00DDFF }, /* BSLICE */ + { 0x00E900, 0x00E9FF }, /* HALF-BSLICE */ + { 0x00EC00, 0x00EEFF }, /* HALF-BSLICE */ + { 0x00F000, 0x00FFFF }, /* HALF-BSLICE */ + { 0x024180, 0x0241FF }, /* HALF-BSLICE */ + {}, +}; + +void intel_gt_mcr_init(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + + /* + * An mslice is unavailable only if both the meml3 for the slice is + * disabled *and* all of the DSS in the slice (quadrant) are disabled. + */ + if (HAS_MSLICE_STEERING(i915)) { + gt->info.mslice_mask = + intel_slicemask_from_xehp_dssmask(gt->info.sseu.subslice_mask, + GEN_DSS_PER_MSLICE); + gt->info.mslice_mask |= + (intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) & + GEN12_MEML3_EN_MASK); + + if (!gt->info.mslice_mask) /* should be impossible! */ + drm_warn(&i915->drm, "mslice mask all zero!\n"); + } + + if (IS_PONTEVECCHIO(i915)) { + gt->steering_table[INSTANCE0] = pvc_instance0_steering_table; + } else if (IS_DG2(i915)) { + gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table; + gt->steering_table[LNCF] = dg2_lncf_steering_table; + } else if (IS_XEHPSDV(i915)) { + gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table; + gt->steering_table[LNCF] = xehpsdv_lncf_steering_table; + } else if (GRAPHICS_VER(i915) >= 11 && + GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) { + gt->steering_table[L3BANK] = icl_l3bank_steering_table; + gt->info.l3bank_mask = + ~intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) & + GEN10_L3BANK_MASK; + if (!gt->info.l3bank_mask) /* should be impossible! */ + drm_warn(&i915->drm, "L3 bank mask is all zero!\n"); + } else if (GRAPHICS_VER(i915) >= 11) { + /* + * We expect all modern platforms to have at least some + * type of steering that needs to be initialized. + */ + MISSING_CASE(INTEL_INFO(i915)->platform); + } +} + +/** + * uncore_rw_with_mcr_steering_fw - Access a register after programming + * the MCR selector register. + * @uncore: pointer to struct intel_uncore + * @reg: register being accessed + * @rw_flag: FW_REG_READ for read access or FW_REG_WRITE for write access + * @slice: slice number (ignored for multi-cast write) + * @subslice: sub-slice number (ignored for multi-cast write) + * @value: register value to be written (ignored for read) + * + * Return: 0 for write access. register value for read access. + * + * Caller needs to make sure the relevant forcewake wells are up. + */ +static u32 uncore_rw_with_mcr_steering_fw(struct intel_uncore *uncore, + i915_reg_t reg, u8 rw_flag, + int slice, int subslice, u32 value) +{ + u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0; + + lockdep_assert_held(&uncore->lock); + + if (GRAPHICS_VER(uncore->i915) >= 11) { + mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; + mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); + + /* + * Wa_22013088509 + * + * The setting of the multicast/unicast bit usually wouldn't + * matter for read operations (which always return the value + * from a single register instance regardless of how that bit + * is set), but some platforms have a workaround requiring us + * to remain in multicast mode for reads. There's no real + * downside to this, so we'll just go ahead and do so on all + * platforms; we'll only clear the multicast bit from the mask + * when exlicitly doing a write operation. + */ + if (rw_flag == FW_REG_WRITE) + mcr_mask |= GEN11_MCR_MULTICAST; + } else { + mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; + mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); + } + + old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); + + mcr &= ~mcr_mask; + mcr |= mcr_ss; + intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); + + if (rw_flag == FW_REG_READ) + val = intel_uncore_read_fw(uncore, reg); + else + intel_uncore_write_fw(uncore, reg, value); + + mcr &= ~mcr_mask; + mcr |= old_mcr & mcr_mask; + + intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); + + return val; +} + +static u32 uncore_rw_with_mcr_steering(struct intel_uncore *uncore, + i915_reg_t reg, u8 rw_flag, + int slice, int subslice, + u32 value) +{ + enum forcewake_domains fw_domains; + u32 val; + + fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, + rw_flag); + fw_domains |= intel_uncore_forcewake_for_reg(uncore, + GEN8_MCR_SELECTOR, + FW_REG_READ | FW_REG_WRITE); + + spin_lock_irq(&uncore->lock); + intel_uncore_forcewake_get__locked(uncore, fw_domains); + + val = uncore_rw_with_mcr_steering_fw(uncore, reg, rw_flag, + slice, subslice, value); + + intel_uncore_forcewake_put__locked(uncore, fw_domains); + spin_unlock_irq(&uncore->lock); + + return val; +} + +u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore, + i915_reg_t reg, int slice, int subslice) +{ + return uncore_rw_with_mcr_steering_fw(uncore, reg, FW_REG_READ, + slice, subslice, 0); +} + +u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore, + i915_reg_t reg, int slice, int subslice) +{ + return uncore_rw_with_mcr_steering(uncore, reg, FW_REG_READ, + slice, subslice, 0); +} + +void intel_uncore_write_with_mcr_steering(struct intel_uncore *uncore, + i915_reg_t reg, u32 value, + int slice, int subslice) +{ + uncore_rw_with_mcr_steering(uncore, reg, FW_REG_WRITE, + slice, subslice, value); +} + +/** + * intel_gt_reg_needs_read_steering - determine whether a register read + * requires explicit steering + * @gt: GT structure + * @reg: the register to check steering requirements for + * @type: type of multicast steering to check + * + * Determines whether @reg needs explicit steering of a specific type for + * reads. + * + * Returns false if @reg does not belong to a register range of the given + * steering type, or if the default (subslice-based) steering IDs are suitable + * for @type steering too. + */ +static bool intel_gt_reg_needs_read_steering(struct intel_gt *gt, + i915_reg_t reg, + enum intel_steering_type type) +{ + const u32 offset = i915_mmio_reg_offset(reg); + const struct intel_mmio_range *entry; + + if (likely(!intel_gt_needs_read_steering(gt, type))) + return false; + + for (entry = gt->steering_table[type]; entry->end; entry++) { + if (offset >= entry->start && offset <= entry->end) + return true; + } + + return false; +} + +/** + * intel_gt_get_valid_steering - determines valid IDs for a class of MCR steering + * @gt: GT structure + * @type: multicast register type + * @sliceid: Slice ID returned + * @subsliceid: Subslice ID returned + * + * Determines sliceid and subsliceid values that will steer reads + * of a specific multicast register class to a valid value. + */ +static void intel_gt_get_valid_steering(struct intel_gt *gt, + enum intel_steering_type type, + u8 *sliceid, u8 *subsliceid) +{ + switch (type) { + case L3BANK: + *sliceid = 0; /* unused */ + *subsliceid = __ffs(gt->info.l3bank_mask); + break; + case MSLICE: + GEM_WARN_ON(!HAS_MSLICE_STEERING(gt->i915)); + *sliceid = __ffs(gt->info.mslice_mask); + *subsliceid = 0; /* unused */ + break; + case LNCF: + /* + * An LNCF is always present if its mslice is present, so we + * can safely just steer to LNCF 0 in all cases. + */ + GEM_WARN_ON(!HAS_MSLICE_STEERING(gt->i915)); + *sliceid = __ffs(gt->info.mslice_mask) << 1; + *subsliceid = 0; /* unused */ + break; + case INSTANCE0: + /* + * There are a lot of MCR types for which instance (0, 0) + * will always provide a non-terminated value. + */ + *sliceid = 0; + *subsliceid = 0; + break; + default: + MISSING_CASE(type); + *sliceid = 0; + *subsliceid = 0; + } +} + +/** + * intel_gt_get_valid_steering_for_reg - get a valid steering for a register + * @gt: GT structure + * @reg: register for which the steering is required + * @sliceid: return variable for slice steering + * @subsliceid: return variable for subslice steering + * + * This function returns a slice/subslice pair that is guaranteed to work for + * read steering of the given register. Note that a value will be returned even + * if the register is not replicated and therefore does not actually require + * steering. + */ +void intel_gt_get_valid_steering_for_reg(struct intel_gt *gt, i915_reg_t reg, + u8 *sliceid, u8 *subsliceid) +{ + int type; + + for (type = 0; type < NUM_STEERING_TYPES; type++) { + if (intel_gt_reg_needs_read_steering(gt, reg, type)) { + intel_gt_get_valid_steering(gt, type, sliceid, + subsliceid); + return; + } + } + + *sliceid = gt->default_steering.groupid; + *subsliceid = gt->default_steering.instanceid; +} + +/** + * intel_gt_read_register_fw - reads a GT register with support for multicast + * @gt: GT structure + * @reg: register to read + * + * This function will read a GT register. If the register is a multicast + * register, the read will be steered to a valid instance (i.e., one that + * isn't fused off or powered down by power gating). + * + * Returns the value from a valid instance of @reg. + */ +u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg) +{ + int type; + u8 sliceid, subsliceid; + + for (type = 0; type < NUM_STEERING_TYPES; type++) { + if (intel_gt_reg_needs_read_steering(gt, reg, type)) { + intel_gt_get_valid_steering(gt, type, &sliceid, + &subsliceid); + return intel_uncore_read_with_mcr_steering_fw(gt->uncore, + reg, + sliceid, + subsliceid); + } + } + + return intel_uncore_read_fw(gt->uncore, reg); +} + +u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg) +{ + int type; + u8 sliceid, subsliceid; + + for (type = 0; type < NUM_STEERING_TYPES; type++) { + if (intel_gt_reg_needs_read_steering(gt, reg, type)) { + intel_gt_get_valid_steering(gt, type, &sliceid, + &subsliceid); + return intel_uncore_read_with_mcr_steering(gt->uncore, + reg, + sliceid, + subsliceid); + } + } + + return intel_uncore_read(gt->uncore, reg); +} + +static void report_steering_type(struct drm_printer *p, + struct intel_gt *gt, + enum intel_steering_type type, + bool dump_table) +{ + const struct intel_mmio_range *entry; + u8 slice, subslice; + + BUILD_BUG_ON(ARRAY_SIZE(intel_steering_types) != NUM_STEERING_TYPES); + + if (!gt->steering_table[type]) { + drm_printf(p, "%s steering: uses default steering\n", + intel_steering_types[type]); + return; + } + + intel_gt_get_valid_steering(gt, type, &slice, &subslice); + drm_printf(p, "%s steering: sliceid=0x%x, subsliceid=0x%x\n", + intel_steering_types[type], slice, subslice); + + if (!dump_table) + return; + + for (entry = gt->steering_table[type]; entry->end; entry++) + drm_printf(p, "\t0x%06x - 0x%06x\n", entry->start, entry->end); +} + +void intel_gt_report_steering(struct drm_printer *p, struct intel_gt *gt, + bool dump_table) +{ + drm_printf(p, "Default steering: sliceid=0x%x, subsliceid=0x%x\n", + gt->default_steering.groupid, + gt->default_steering.instanceid); + + if (IS_PONTEVECCHIO(gt->i915)) { + report_steering_type(p, gt, INSTANCE0, dump_table); + } else if (HAS_MSLICE_STEERING(gt->i915)) { + report_steering_type(p, gt, MSLICE, dump_table); + report_steering_type(p, gt, LNCF, dump_table); + } +} + diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h new file mode 100644 index 000000000000..b570c1571243 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef __INTEL_GT_MCR__ +#define __INTEL_GT_MCR__ + +#include "intel_gt_types.h" + +void intel_gt_mcr_init(struct intel_gt *gt); + +u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore, + i915_reg_t reg, + int slice, int subslice); +u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore, + i915_reg_t reg, int slice, int subslice); +void intel_uncore_write_with_mcr_steering(struct intel_uncore *uncore, + i915_reg_t reg, u32 value, + int slice, int subslice); + +u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg); +u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg); + +static inline bool intel_gt_needs_read_steering(struct intel_gt *gt, + enum intel_steering_type type) +{ + return gt->steering_table[type]; +} + +void intel_gt_get_valid_steering_for_reg(struct intel_gt *gt, i915_reg_t reg, + u8 *sliceid, u8 *subsliceid); + +void intel_gt_report_steering(struct drm_printer *p, struct intel_gt *gt, + bool dump_table); + +#endif /* __INTEL_GT_MCR__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index e9c12e0d6f59..1f4e7237a924 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -12,6 +12,7 @@ #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_mcr.h" #include "gt/intel_gt_regs.h" static int diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index c4af51144216..e4913aefac97 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -9,6 +9,7 @@ #include "intel_engine_regs.h" #include "intel_gpu_commands.h" #include "intel_gt.h" +#include "intel_gt_mcr.h" #include "intel_gt_regs.h" #include "intel_ring.h" #include "intel_workarounds.h" diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index bb197610fd5b..dea138d78111 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -7,6 +7,7 @@ #include "gt/intel_engine_regs.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_mcr.h" #include "gt/intel_gt_regs.h" #include "gt/intel_lrc.h" #include "gt/shmem_utils.h" diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 59cd888209fe..80d73222b125 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1346,8 +1346,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm) #define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc) -#define HAS_MSLICE_STEERING(dev_priv) (INTEL_INFO(dev_priv)->has_mslice_steering) - /* * Set this flag, when platform requires 64K GTT page sizes or larger for * device local memory access. diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 33304eb987e4..a852c471d1b3 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -2644,118 +2644,6 @@ intel_uncore_forcewake_for_reg(struct intel_uncore *uncore, return fw_domains; } -/** - * uncore_rw_with_mcr_steering_fw - Access a register after programming - * the MCR selector register. - * @uncore: pointer to struct intel_uncore - * @reg: register being accessed - * @rw_flag: FW_REG_READ for read access or FW_REG_WRITE for write access - * @slice: slice number (ignored for multi-cast write) - * @subslice: sub-slice number (ignored for multi-cast write) - * @value: register value to be written (ignored for read) - * - * Return: 0 for write access. register value for read access. - * - * Caller needs to make sure the relevant forcewake wells are up. - */ -static u32 uncore_rw_with_mcr_steering_fw(struct intel_uncore *uncore, - i915_reg_t reg, u8 rw_flag, - int slice, int subslice, u32 value) -{ - u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0; - - lockdep_assert_held(&uncore->lock); - - if (GRAPHICS_VER(uncore->i915) >= 11) { - mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; - mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); - - /* - * Wa_22013088509 - * - * The setting of the multicast/unicast bit usually wouldn't - * matter for read operations (which always return the value - * from a single register instance regardless of how that bit - * is set), but some platforms have a workaround requiring us - * to remain in multicast mode for reads. There's no real - * downside to this, so we'll just go ahead and do so on all - * platforms; we'll only clear the multicast bit from the mask - * when exlicitly doing a write operation. - */ - if (rw_flag == FW_REG_WRITE) - mcr_mask |= GEN11_MCR_MULTICAST; - } else { - mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; - mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); - } - - old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); - - mcr &= ~mcr_mask; - mcr |= mcr_ss; - intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); - - if (rw_flag == FW_REG_READ) - val = intel_uncore_read_fw(uncore, reg); - else - intel_uncore_write_fw(uncore, reg, value); - - mcr &= ~mcr_mask; - mcr |= old_mcr & mcr_mask; - - intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); - - return val; -} - -static u32 uncore_rw_with_mcr_steering(struct intel_uncore *uncore, - i915_reg_t reg, u8 rw_flag, - int slice, int subslice, - u32 value) -{ - enum forcewake_domains fw_domains; - u32 val; - - fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, - rw_flag); - fw_domains |= intel_uncore_forcewake_for_reg(uncore, - GEN8_MCR_SELECTOR, - FW_REG_READ | FW_REG_WRITE); - - spin_lock_irq(&uncore->lock); - intel_uncore_forcewake_get__locked(uncore, fw_domains); - - val = uncore_rw_with_mcr_steering_fw(uncore, reg, rw_flag, - slice, subslice, value); - - intel_uncore_forcewake_put__locked(uncore, fw_domains); - spin_unlock_irq(&uncore->lock); - - return val; -} - -u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore, - i915_reg_t reg, int slice, int subslice) -{ - return uncore_rw_with_mcr_steering_fw(uncore, reg, FW_REG_READ, - slice, subslice, 0); -} - -u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore, - i915_reg_t reg, int slice, int subslice) -{ - return uncore_rw_with_mcr_steering(uncore, reg, FW_REG_READ, - slice, subslice, 0); -} - -void intel_uncore_write_with_mcr_steering(struct intel_uncore *uncore, - i915_reg_t reg, u32 value, - int slice, int subslice) -{ - uncore_rw_with_mcr_steering(uncore, reg, FW_REG_WRITE, - slice, subslice, value); -} - #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_uncore.c" #include "selftests/intel_uncore.c" diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 52fe3d89dd2b..b1fa912a65e7 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -210,14 +210,6 @@ intel_uncore_has_fifo(const struct intel_uncore *uncore) return uncore->flags & UNCORE_HAS_FIFO; } -u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore, - i915_reg_t reg, - int slice, int subslice); -u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore, - i915_reg_t reg, int slice, int subslice); -void intel_uncore_write_with_mcr_steering(struct intel_uncore *uncore, - i915_reg_t reg, u32 value, - int slice, int subslice); void intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug); void intel_uncore_init_early(struct intel_uncore *uncore, -- cgit v1.2.3 From 3fe6c7f53eaa62e3700d8ae076e9c42a1d855242 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 14 Jun 2022 17:10:19 -0700 Subject: drm/i915/gt: Cleanup interface for MCR operations Let's replace the assortment of intel_gt_* and intel_uncore_* functions that operate on MCR registers with a cleaner set of interfaces: * intel_gt_mcr_read -- unicast read from specific instance * intel_gt_mcr_read_any[_fw] -- unicast read from any non-terminated instance * intel_gt_mcr_unicast_write -- unicast write to specific instance * intel_gt_mcr_multicast_write[_fw] -- multicast write to all instances We'll also replace the historic "slice" and "subslice" terminology with "group" and "instance" to match the documentation for more recent platforms; these days MCR steering applies to more types of replication than just slice/subslice. v2: - Reference the new kerneldoc from i915.rst. (Jani) - Tweak the wording of the documentation for a couple functions to clarify the difference between "_fw" and non-"_fw" forms. v3: - s/read/write/ to fix copy-paste mistake in a couple comments. (Harish) Signed-off-by: Matt Roper Acked-by: Jani Nikula Reviewed-by: Harish Chegondi Link: https://patchwork.freedesktop.org/patch/msgid/20220615001019.1821989-3-matthew.d.roper@intel.com --- Documentation/gpu/i915.rst | 12 ++ drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 33 ++-- drivers/gpu/drm/i915/gt/intel_gt_debugfs.c | 2 +- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 239 +++++++++++++++++----------- drivers/gpu/drm/i915/gt/intel_gt_mcr.h | 43 +++-- drivers/gpu/drm/i915/gt/intel_region_lmem.c | 4 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 8 +- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 2 +- 9 files changed, 200 insertions(+), 145 deletions(-) diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 54060cd6c419..4e59db1cfb00 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -246,6 +246,18 @@ Display State Buffer .. kernel-doc:: drivers/gpu/drm/i915/display/intel_dsb.c :internal: +GT Programming +============== + +Multicast/Replicated (MCR) Registers +------------------------------------ + +.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_gt_mcr.c + :doc: GT Multicast/Replicated (MCR) Register Support + +.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_gt_mcr.c + :internal: + Memory Management and Command Submission ======================================== diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index da30503d3ca2..fa54823d1219 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -835,7 +835,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, } else { resource_size_t lmem_range; - lmem_range = intel_gt_read_register(&i915->gt0, XEHPSDV_TILE0_ADDR_RANGE) & 0xFFFF; + lmem_range = intel_gt_mcr_read_any(&i915->gt0, XEHPSDV_TILE0_ADDR_RANGE) & 0xFFFF; lmem_size = lmem_range >> XEHPSDV_TILE_LMEM_RANGE_SHIFT; lmem_size *= SZ_1G; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 244af1bdb7db..136cc44c3deb 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1428,14 +1428,6 @@ void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine) ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); } -static u32 -read_subslice_reg(const struct intel_engine_cs *engine, - int slice, int subslice, i915_reg_t reg) -{ - return intel_uncore_read_with_mcr_steering(engine->uncore, reg, - slice, subslice); -} - /* NB: please notice the memset */ void intel_engine_get_instdone(const struct intel_engine_cs *engine, struct intel_instdone *instdone) @@ -1469,28 +1461,33 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { instdone->sampler[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_SAMPLER_INSTDONE); + intel_gt_mcr_read(engine->gt, + GEN7_SAMPLER_INSTDONE, + slice, subslice); instdone->row[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_ROW_INSTDONE); + intel_gt_mcr_read(engine->gt, + GEN7_ROW_INSTDONE, + slice, subslice); } } else { for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { instdone->sampler[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_SAMPLER_INSTDONE); + intel_gt_mcr_read(engine->gt, + GEN7_SAMPLER_INSTDONE, + slice, subslice); instdone->row[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_ROW_INSTDONE); + intel_gt_mcr_read(engine->gt, + GEN7_ROW_INSTDONE, + slice, subslice); } } if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) instdone->geom_svg[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - XEHPG_INSTDONE_GEOM_SVG); + intel_gt_mcr_read(engine->gt, + XEHPG_INSTDONE_GEOM_SVG, + slice, subslice); } } else if (GRAPHICS_VER(i915) >= 7) { instdone->instdone = diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c index ea07f2bb846f..dd53641f3637 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c @@ -65,7 +65,7 @@ static int steering_show(struct seq_file *m, void *data) struct drm_printer p = drm_seq_file_printer(m); struct intel_gt *gt = m->private; - intel_gt_report_steering(&p, gt, true); + intel_gt_mcr_report_steering(&p, gt, true); return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 1279a1fe1001..777025d5bd66 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -134,23 +134,22 @@ void intel_gt_mcr_init(struct intel_gt *gt) } } -/** - * uncore_rw_with_mcr_steering_fw - Access a register after programming - * the MCR selector register. +/* + * rw_with_mcr_steering_fw - Access a register with specific MCR steering * @uncore: pointer to struct intel_uncore * @reg: register being accessed * @rw_flag: FW_REG_READ for read access or FW_REG_WRITE for write access - * @slice: slice number (ignored for multi-cast write) - * @subslice: sub-slice number (ignored for multi-cast write) + * @group: group number (documented as "sliceid" on older platforms) + * @instance: instance number (documented as "subsliceid" on older platforms) * @value: register value to be written (ignored for read) * * Return: 0 for write access. register value for read access. * * Caller needs to make sure the relevant forcewake wells are up. */ -static u32 uncore_rw_with_mcr_steering_fw(struct intel_uncore *uncore, - i915_reg_t reg, u8 rw_flag, - int slice, int subslice, u32 value) +static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore, + i915_reg_t reg, u8 rw_flag, + int group, int instance, u32 value) { u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0; @@ -158,7 +157,7 @@ static u32 uncore_rw_with_mcr_steering_fw(struct intel_uncore *uncore, if (GRAPHICS_VER(uncore->i915) >= 11) { mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; - mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); + mcr_ss = GEN11_MCR_SLICE(group) | GEN11_MCR_SUBSLICE(instance); /* * Wa_22013088509 @@ -176,7 +175,7 @@ static u32 uncore_rw_with_mcr_steering_fw(struct intel_uncore *uncore, mcr_mask |= GEN11_MCR_MULTICAST; } else { mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; - mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); + mcr_ss = GEN8_MCR_SLICE(group) | GEN8_MCR_SUBSLICE(instance); } old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); @@ -198,10 +197,10 @@ static u32 uncore_rw_with_mcr_steering_fw(struct intel_uncore *uncore, return val; } -static u32 uncore_rw_with_mcr_steering(struct intel_uncore *uncore, - i915_reg_t reg, u8 rw_flag, - int slice, int subslice, - u32 value) +static u32 rw_with_mcr_steering(struct intel_uncore *uncore, + i915_reg_t reg, u8 rw_flag, + int group, int instance, + u32 value) { enum forcewake_domains fw_domains; u32 val; @@ -215,8 +214,7 @@ static u32 uncore_rw_with_mcr_steering(struct intel_uncore *uncore, spin_lock_irq(&uncore->lock); intel_uncore_forcewake_get__locked(uncore, fw_domains); - val = uncore_rw_with_mcr_steering_fw(uncore, reg, rw_flag, - slice, subslice, value); + val = rw_with_mcr_steering_fw(uncore, reg, rw_flag, group, instance, value); intel_uncore_forcewake_put__locked(uncore, fw_domains); spin_unlock_irq(&uncore->lock); @@ -224,31 +222,73 @@ static u32 uncore_rw_with_mcr_steering(struct intel_uncore *uncore, return val; } -u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore, - i915_reg_t reg, int slice, int subslice) +/** + * intel_gt_mcr_read - read a specific instance of an MCR register + * @gt: GT structure + * @reg: the MCR register to read + * @group: the MCR group + * @instance: the MCR instance + * + * Returns the value read from an MCR register after steering toward a specific + * group/instance. + */ +u32 intel_gt_mcr_read(struct intel_gt *gt, + i915_reg_t reg, + int group, int instance) { - return uncore_rw_with_mcr_steering_fw(uncore, reg, FW_REG_READ, - slice, subslice, 0); + return rw_with_mcr_steering(gt->uncore, reg, FW_REG_READ, group, instance, 0); } -u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore, - i915_reg_t reg, int slice, int subslice) +/** + * intel_gt_mcr_unicast_write - write a specific instance of an MCR register + * @gt: GT structure + * @reg: the MCR register to write + * @value: value to write + * @group: the MCR group + * @instance: the MCR instance + * + * Write an MCR register in unicast mode after steering toward a specific + * group/instance. + */ +void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_reg_t reg, u32 value, + int group, int instance) { - return uncore_rw_with_mcr_steering(uncore, reg, FW_REG_READ, - slice, subslice, 0); + rw_with_mcr_steering(gt->uncore, reg, FW_REG_WRITE, group, instance, value); } -void intel_uncore_write_with_mcr_steering(struct intel_uncore *uncore, - i915_reg_t reg, u32 value, - int slice, int subslice) +/** + * intel_gt_mcr_multicast_write - write a value to all instances of an MCR register + * @gt: GT structure + * @reg: the MCR register to write + * @value: value to write + * + * Write an MCR register in multicast mode to update all instances. + */ +void intel_gt_mcr_multicast_write(struct intel_gt *gt, + i915_reg_t reg, u32 value) { - uncore_rw_with_mcr_steering(uncore, reg, FW_REG_WRITE, - slice, subslice, value); + intel_uncore_write(gt->uncore, reg, value); } /** - * intel_gt_reg_needs_read_steering - determine whether a register read - * requires explicit steering + * intel_gt_mcr_multicast_write_fw - write a value to all instances of an MCR register + * @gt: GT structure + * @reg: the MCR register to write + * @value: value to write + * + * Write an MCR register in multicast mode to update all instances. This + * function assumes the caller is already holding any necessary forcewake + * domains; use intel_gt_mcr_multicast_write() in cases where forcewake should + * be obtained automatically. + */ +void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_reg_t reg, u32 value) +{ + intel_uncore_write_fw(gt->uncore, reg, value); +} + +/* + * reg_needs_read_steering - determine whether a register read requires + * explicit steering * @gt: GT structure * @reg: the register to check steering requirements for * @type: type of multicast steering to check @@ -260,14 +300,14 @@ void intel_uncore_write_with_mcr_steering(struct intel_uncore *uncore, * steering type, or if the default (subslice-based) steering IDs are suitable * for @type steering too. */ -static bool intel_gt_reg_needs_read_steering(struct intel_gt *gt, - i915_reg_t reg, - enum intel_steering_type type) +static bool reg_needs_read_steering(struct intel_gt *gt, + i915_reg_t reg, + enum intel_steering_type type) { const u32 offset = i915_mmio_reg_offset(reg); const struct intel_mmio_range *entry; - if (likely(!intel_gt_needs_read_steering(gt, type))) + if (likely(!gt->steering_table[type])) return false; for (entry = gt->steering_table[type]; entry->end; entry++) { @@ -278,29 +318,29 @@ static bool intel_gt_reg_needs_read_steering(struct intel_gt *gt, return false; } -/** - * intel_gt_get_valid_steering - determines valid IDs for a class of MCR steering +/* + * get_nonterminated_steering - determines valid IDs for a class of MCR steering * @gt: GT structure * @type: multicast register type - * @sliceid: Slice ID returned - * @subsliceid: Subslice ID returned + * @group: Group ID returned + * @instance: Instance ID returned * - * Determines sliceid and subsliceid values that will steer reads - * of a specific multicast register class to a valid value. + * Determines group and instance values that will steer reads of the specified + * MCR class to a non-terminated instance. */ -static void intel_gt_get_valid_steering(struct intel_gt *gt, - enum intel_steering_type type, - u8 *sliceid, u8 *subsliceid) +static void get_nonterminated_steering(struct intel_gt *gt, + enum intel_steering_type type, + u8 *group, u8 *instance) { switch (type) { case L3BANK: - *sliceid = 0; /* unused */ - *subsliceid = __ffs(gt->info.l3bank_mask); + *group = 0; /* unused */ + *instance = __ffs(gt->info.l3bank_mask); break; case MSLICE: GEM_WARN_ON(!HAS_MSLICE_STEERING(gt->i915)); - *sliceid = __ffs(gt->info.mslice_mask); - *subsliceid = 0; /* unused */ + *group = __ffs(gt->info.mslice_mask); + *instance = 0; /* unused */ break; case LNCF: /* @@ -308,96 +348,105 @@ static void intel_gt_get_valid_steering(struct intel_gt *gt, * can safely just steer to LNCF 0 in all cases. */ GEM_WARN_ON(!HAS_MSLICE_STEERING(gt->i915)); - *sliceid = __ffs(gt->info.mslice_mask) << 1; - *subsliceid = 0; /* unused */ + *group = __ffs(gt->info.mslice_mask) << 1; + *instance = 0; /* unused */ break; case INSTANCE0: /* * There are a lot of MCR types for which instance (0, 0) * will always provide a non-terminated value. */ - *sliceid = 0; - *subsliceid = 0; + *group = 0; + *instance = 0; break; default: MISSING_CASE(type); - *sliceid = 0; - *subsliceid = 0; + *group = 0; + *instance = 0; } } /** - * intel_gt_get_valid_steering_for_reg - get a valid steering for a register + * intel_gt_mcr_get_nonterminated_steering - find group/instance values that + * will steer a register to a non-terminated instance * @gt: GT structure * @reg: register for which the steering is required - * @sliceid: return variable for slice steering - * @subsliceid: return variable for subslice steering + * @group: return variable for group steering + * @instance: return variable for instance steering * - * This function returns a slice/subslice pair that is guaranteed to work for + * This function returns a group/instance pair that is guaranteed to work for * read steering of the given register. Note that a value will be returned even * if the register is not replicated and therefore does not actually require * steering. */ -void intel_gt_get_valid_steering_for_reg(struct intel_gt *gt, i915_reg_t reg, - u8 *sliceid, u8 *subsliceid) +void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt, + i915_reg_t reg, + u8 *group, u8 *instance) { int type; for (type = 0; type < NUM_STEERING_TYPES; type++) { - if (intel_gt_reg_needs_read_steering(gt, reg, type)) { - intel_gt_get_valid_steering(gt, type, sliceid, - subsliceid); + if (reg_needs_read_steering(gt, reg, type)) { + get_nonterminated_steering(gt, type, group, instance); return; } } - *sliceid = gt->default_steering.groupid; - *subsliceid = gt->default_steering.instanceid; + *group = gt->default_steering.groupid; + *instance = gt->default_steering.instanceid; } /** - * intel_gt_read_register_fw - reads a GT register with support for multicast + * intel_gt_mcr_read_any_fw - reads one instance of an MCR register * @gt: GT structure * @reg: register to read * - * This function will read a GT register. If the register is a multicast - * register, the read will be steered to a valid instance (i.e., one that - * isn't fused off or powered down by power gating). + * Reads a GT MCR register. The read will be steered to a non-terminated + * instance (i.e., one that isn't fused off or powered down by power gating). + * This function assumes the caller is already holding any necessary forcewake + * domains; use intel_gt_mcr_read_any() in cases where forcewake should be + * obtained automatically. * - * Returns the value from a valid instance of @reg. + * Returns the value from a non-terminated instance of @reg. */ -u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg) +u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg) { int type; - u8 sliceid, subsliceid; + u8 group, instance; for (type = 0; type < NUM_STEERING_TYPES; type++) { - if (intel_gt_reg_needs_read_steering(gt, reg, type)) { - intel_gt_get_valid_steering(gt, type, &sliceid, - &subsliceid); - return intel_uncore_read_with_mcr_steering_fw(gt->uncore, - reg, - sliceid, - subsliceid); + if (reg_needs_read_steering(gt, reg, type)) { + get_nonterminated_steering(gt, type, &group, &instance); + return rw_with_mcr_steering_fw(gt->uncore, reg, + FW_REG_READ, + group, instance, 0); } } return intel_uncore_read_fw(gt->uncore, reg); } -u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg) +/** + * intel_gt_mcr_read_any - reads one instance of an MCR register + * @gt: GT structure + * @reg: register to read + * + * Reads a GT MCR register. The read will be steered to a non-terminated + * instance (i.e., one that isn't fused off or powered down by power gating). + * + * Returns the value from a non-terminated instance of @reg. + */ +u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg) { int type; - u8 sliceid, subsliceid; + u8 group, instance; for (type = 0; type < NUM_STEERING_TYPES; type++) { - if (intel_gt_reg_needs_read_steering(gt, reg, type)) { - intel_gt_get_valid_steering(gt, type, &sliceid, - &subsliceid); - return intel_uncore_read_with_mcr_steering(gt->uncore, - reg, - sliceid, - subsliceid); + if (reg_needs_read_steering(gt, reg, type)) { + get_nonterminated_steering(gt, type, &group, &instance); + return rw_with_mcr_steering(gt->uncore, reg, + FW_REG_READ, + group, instance, 0); } } @@ -410,7 +459,7 @@ static void report_steering_type(struct drm_printer *p, bool dump_table) { const struct intel_mmio_range *entry; - u8 slice, subslice; + u8 group, instance; BUILD_BUG_ON(ARRAY_SIZE(intel_steering_types) != NUM_STEERING_TYPES); @@ -420,9 +469,9 @@ static void report_steering_type(struct drm_printer *p, return; } - intel_gt_get_valid_steering(gt, type, &slice, &subslice); - drm_printf(p, "%s steering: sliceid=0x%x, subsliceid=0x%x\n", - intel_steering_types[type], slice, subslice); + get_nonterminated_steering(gt, type, &group, &instance); + drm_printf(p, "%s steering: group=0x%x, instance=0x%x\n", + intel_steering_types[type], group, instance); if (!dump_table) return; @@ -431,10 +480,10 @@ static void report_steering_type(struct drm_printer *p, drm_printf(p, "\t0x%06x - 0x%06x\n", entry->start, entry->end); } -void intel_gt_report_steering(struct drm_printer *p, struct intel_gt *gt, - bool dump_table) +void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, + bool dump_table) { - drm_printf(p, "Default steering: sliceid=0x%x, subsliceid=0x%x\n", + drm_printf(p, "Default steering: group=0x%x, instance=0x%x\n", gt->default_steering.groupid, gt->default_steering.instanceid); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h index b570c1571243..506b0cbc8db3 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h @@ -10,28 +10,25 @@ void intel_gt_mcr_init(struct intel_gt *gt); -u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore, - i915_reg_t reg, - int slice, int subslice); -u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore, - i915_reg_t reg, int slice, int subslice); -void intel_uncore_write_with_mcr_steering(struct intel_uncore *uncore, - i915_reg_t reg, u32 value, - int slice, int subslice); - -u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg); -u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg); - -static inline bool intel_gt_needs_read_steering(struct intel_gt *gt, - enum intel_steering_type type) -{ - return gt->steering_table[type]; -} - -void intel_gt_get_valid_steering_for_reg(struct intel_gt *gt, i915_reg_t reg, - u8 *sliceid, u8 *subsliceid); - -void intel_gt_report_steering(struct drm_printer *p, struct intel_gt *gt, - bool dump_table); +u32 intel_gt_mcr_read(struct intel_gt *gt, + i915_reg_t reg, + int group, int instance); +u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg); +u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg); + +void intel_gt_mcr_unicast_write(struct intel_gt *gt, + i915_reg_t reg, u32 value, + int group, int instance); +void intel_gt_mcr_multicast_write(struct intel_gt *gt, + i915_reg_t reg, u32 value); +void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, + i915_reg_t reg, u32 value); + +void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt, + i915_reg_t reg, + u8 *group, u8 *instance); + +void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, + bool dump_table); #endif /* __INTEL_GT_MCR__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index 1f4e7237a924..2ff448047020 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -105,11 +105,11 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) resource_size_t lmem_range; u64 tile_stolen, flat_ccs_base; - lmem_range = intel_gt_read_register(&i915->gt0, XEHPSDV_TILE0_ADDR_RANGE) & 0xFFFF; + lmem_range = intel_gt_mcr_read_any(&i915->gt0, XEHPSDV_TILE0_ADDR_RANGE) & 0xFFFF; lmem_size = lmem_range >> XEHPSDV_TILE_LMEM_RANGE_SHIFT; lmem_size *= SZ_1G; - flat_ccs_base = intel_gt_read_register(gt, XEHPSDV_FLAT_CCS_BASE_ADDR); + flat_ccs_base = intel_gt_mcr_read_any(gt, XEHPSDV_FLAT_CCS_BASE_ADDR); flat_ccs_base = (flat_ccs_base >> XEHPSDV_CCS_BASE_SHIFT) * SZ_64K; /* FIXME: Remove this when we have small-bar enabled */ diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index e4913aefac97..3213c593a55f 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1083,7 +1083,7 @@ static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal, gt->default_steering.instanceid = subslice; if (drm_debug_enabled(DRM_UT_DRIVER)) - intel_gt_report_steering(&p, gt, false); + intel_gt_mcr_report_steering(&p, gt, false); } static void @@ -1624,13 +1624,13 @@ wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal) u32 val, old = 0; /* open-coded rmw due to steering */ - old = wa->clr ? intel_gt_read_register_fw(gt, wa->reg) : 0; + old = wa->clr ? intel_gt_mcr_read_any_fw(gt, wa->reg) : 0; val = (old & ~wa->clr) | wa->set; if (val != old || !wa->clr) intel_uncore_write_fw(uncore, wa->reg, val); if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - wa_verify(wa, intel_gt_read_register_fw(gt, wa->reg), + wa_verify(wa, intel_gt_mcr_read_any_fw(gt, wa->reg), wal->name, "application"); } @@ -1661,7 +1661,7 @@ static bool wa_list_verify(struct intel_gt *gt, for (i = 0, wa = wal->list; i < wal->count; i++, wa++) ok &= wa_verify(wa, - intel_gt_read_register_fw(gt, wa->reg), + intel_gt_mcr_read_any_fw(gt, wa->reg), wal->name, from); intel_uncore_forcewake_put__locked(uncore, fw); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index dea138d78111..ba7541f3ca61 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -314,7 +314,7 @@ static long __must_check guc_mmio_reg_add(struct intel_gt *gt, * tracking, it is easier to just program the default steering for all * regs that don't need a non-default one. */ - intel_gt_get_valid_steering_for_reg(gt, reg, &group, &inst); + intel_gt_mcr_get_nonterminated_steering(gt, reg, &group, &inst); entry.flags |= GUC_REGSET_STEERING(group, inst); slot = __mmio_reg_add(regset, &entry); -- cgit v1.2.3 From 2ef6efa79fecd5e3457b324155d35524d95f2b6b Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 17 Jun 2022 17:28:55 +0200 Subject: drm/i915: Improve on suspend / resume time with VT-d enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When DMAR / VT-d is enabled, the display engine uses overfetching, presumably to deal with the increased latency. To avoid display engine errors and DMAR faults, as a workaround the GGTT is populated with scatch PTEs when VT-d is enabled. However starting with gen10, Write-combined writing of scratch PTES is no longer possible and as a result, populating the full GGTT with scratch PTEs like on resume becomes very slow as uncached access is needed. Therefore, on integrated GPUs utilize the fact that the PTEs are stored in stolen memory which retain content across S3 suspend. Don't clear the PTEs on suspend and resume. This improves on resume time with around 100 ms. While 100+ms might appear like a short time it's 10% to 20% of total resume time and important in some applications. One notable exception is Intel Rapid Start Technology which may cause stolen memory to be lost across what the OS percieves as S3 suspend. If IRST is enabled or if we can't detect whether IRST is enabled, retain the old workaround, clearing and re-instating PTEs. As an additional measure, if we detect that the last ggtt pte was lost during suspend, print a warning and re-populate the GGTT ptes On discrete GPUs, the display engine scans out from LMEM which isn't subject to DMAR, and presumably the workaround is therefore not needed, but that needs to be verified and disabling the workaround for dGPU, if possible, will be deferred to a follow-up patch. v2: - Rely on retained ptes to also speed up suspend and resume re-binding. - Re-build GGTT ptes if Intel rst is enabled. v3: - Re-build GGTT ptes also if we can't detect whether Intel rst is enabled, and if the guard page PTE and end of GGTT was lost. v4: - Fix some kerneldoc issues (Matthew Auld), rebase. Signed-off-by: Thomas Hellström Acked-by: Daniel Vetter Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220617152856.249295-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 56 ++++++++++++++++++++++++++++++++---- drivers/gpu/drm/i915/gt/intel_gtt.h | 24 ++++++++++++++++ drivers/gpu/drm/i915/i915_driver.c | 16 +++++++++++ 3 files changed, 90 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index e6b2eb122ad7..0849a6f66309 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -22,6 +22,13 @@ #include "intel_gtt.h" #include "gen8_ppgtt.h" +static inline bool suspend_retains_ptes(struct i915_address_space *vm) +{ + return GRAPHICS_VER(vm->i915) >= 8 && + !HAS_LMEM(vm->i915) && + vm->is_ggtt; +} + static void i915_ggtt_color_adjust(const struct drm_mm_node *node, unsigned long color, u64 *start, @@ -93,6 +100,23 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915) return 0; } +/* + * Return the value of the last GGTT pte cast to an u64, if + * the system is supposed to retain ptes across resume. 0 otherwise. + */ +static u64 read_last_pte(struct i915_address_space *vm) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen8_pte_t __iomem *ptep; + + if (!suspend_retains_ptes(vm)) + return 0; + + GEM_BUG_ON(GRAPHICS_VER(vm->i915) < 8); + ptep = (typeof(ptep))ggtt->gsm + (ggtt_total_entries(ggtt) - 1); + return readq(ptep); +} + /** * i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM * @vm: The VM to suspend the mappings for @@ -156,7 +180,10 @@ retry: i915_gem_object_unlock(obj); } - vm->clear_range(vm, 0, vm->total); + if (!suspend_retains_ptes(vm)) + vm->clear_range(vm, 0, vm->total); + else + i915_vm_to_ggtt(vm)->probed_pte = read_last_pte(vm); vm->skip_pte_rewrite = save_skip_rewrite; @@ -299,6 +326,8 @@ static int init_ggtt(struct i915_ggtt *ggtt) struct drm_mm_node *entry; int ret; + ggtt->pte_lost = true; + /* * GuC requires all resources that we're sharing with it to be placed in * non-WOPCM memory. If GuC is not present or not in use we still need a @@ -675,11 +704,20 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm) { struct i915_vma *vma; bool write_domain_objs = false; + bool retained_ptes; drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); - /* First fill our portion of the GTT with scratch pages */ - vm->clear_range(vm, 0, vm->total); + /* + * First fill our portion of the GTT with scratch pages if + * they were not retained across suspend. + */ + retained_ptes = suspend_retains_ptes(vm) && + !i915_vm_to_ggtt(vm)->pte_lost && + !GEM_WARN_ON(i915_vm_to_ggtt(vm)->probed_pte != read_last_pte(vm)); + + if (!retained_ptes) + vm->clear_range(vm, 0, vm->total); /* clflush objects bound into the GGTT and rebind them. */ list_for_each_entry(vma, &vm->bound_list, vm_link) { @@ -688,9 +726,10 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm) atomic_read(&vma->flags) & I915_VMA_BIND_MASK; GEM_BUG_ON(!was_bound); - vma->ops->bind_vma(vm, NULL, vma->resource, - obj ? obj->cache_level : 0, - was_bound); + if (!retained_ptes) + vma->ops->bind_vma(vm, NULL, vma->resource, + obj ? obj->cache_level : 0, + was_bound); if (obj) { /* only used during resume => exclusive access */ write_domain_objs |= fetch_and_zero(&obj->write_domain); obj->read_domains |= I915_GEM_DOMAIN_GTT; @@ -718,3 +757,8 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt) intel_ggtt_restore_fences(ggtt); } + +void i915_ggtt_mark_pte_lost(struct drm_i915_private *i915, bool val) +{ + to_gt(i915)->ggtt->pte_lost = val; +} diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index a40d928b3888..128b31476938 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -345,6 +345,19 @@ struct i915_ggtt { bool do_idle_maps; + /** + * @pte_lost: Are ptes lost on resume? + * + * Whether the system was recently restored from hibernate and + * thus may have lost pte content. + */ + bool pte_lost; + + /** + * @probed_pte: Probed pte value on suspend. Re-checked on resume. + */ + u64 probed_pte; + int mtrr; /** Bit 6 swizzling required for X tiling */ @@ -581,6 +594,17 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm); void i915_ggtt_suspend(struct i915_ggtt *gtt); void i915_ggtt_resume(struct i915_ggtt *ggtt); +/** + * i915_ggtt_mark_pte_lost - Mark ggtt ptes as lost or clear such a marking + * @i915 The device private. + * @val whether the ptes should be marked as lost. + * + * In some cases pte content is retained across suspend, but typically lost + * across hibernate. Typically they should be marked as lost on + * hibernation restore and such marking cleared on suspend. + */ +void i915_ggtt_mark_pte_lost(struct drm_i915_private *i915, bool val); + void fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count); diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index d26dcca7e654..0e224761d0ed 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -100,6 +100,9 @@ #include "intel_region_ttm.h" #include "vlv_suspend.h" +/* Intel Rapid Start Technology ACPI device name */ +static const char irst_name[] = "INT3392"; + static const struct drm_driver i915_drm_driver; static int i915_get_bridge_dev(struct drm_i915_private *dev_priv) @@ -1441,6 +1444,8 @@ static int i915_pm_suspend(struct device *kdev) return -ENODEV; } + i915_ggtt_mark_pte_lost(i915, false); + if (i915->drm.switch_power_state == DRM_SWITCH_POWER_OFF) return 0; @@ -1493,6 +1498,14 @@ static int i915_pm_resume(struct device *kdev) if (i915->drm.switch_power_state == DRM_SWITCH_POWER_OFF) return 0; + /* + * If IRST is enabled, or if we can't detect whether it's enabled, + * then we must assume we lost the GGTT page table entries, since + * they are not retained if IRST decided to enter S4. + */ + if (!IS_ENABLED(CONFIG_ACPI) || acpi_dev_present(irst_name, NULL, -1)) + i915_ggtt_mark_pte_lost(i915, true); + return i915_drm_resume(&i915->drm); } @@ -1552,6 +1565,9 @@ static int i915_pm_restore_early(struct device *kdev) static int i915_pm_restore(struct device *kdev) { + struct drm_i915_private *i915 = kdev_to_i915(kdev); + + i915_ggtt_mark_pte_lost(i915, true); return i915_pm_resume(kdev); } -- cgit v1.2.3 From fc98eb494c4be6aed076c1ad21d46d5950415ca0 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Fri, 17 Jun 2022 14:20:32 -0700 Subject: drm/i915: Add global forcewake request to drpc We have seen multiple RC6 issues where it is useful to know which global forcewake bits are set. Add this to the 'drpc' debugfs output. v2: Review comments (Ashutosh) Reviewed-by: Ashutosh Dixit Signed-off-by: Vinay Belgaumkar Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220617212032.34577-1-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 90a440865037..40bdd4cb629f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -100,14 +100,16 @@ static int vlv_drpc(struct seq_file *m) { struct intel_gt *gt = m->private; struct intel_uncore *uncore = gt->uncore; - u32 rcctl1, pw_status; + u32 rcctl1, pw_status, mt_fwake_req; + mt_fwake_req = intel_uncore_read_fw(uncore, FORCEWAKE_MT); pw_status = intel_uncore_read(uncore, VLV_GTLC_PW_STATUS); rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL); seq_printf(m, "RC6 Enabled: %s\n", str_yes_no(rcctl1 & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1)))); + seq_printf(m, "Multi-threaded Forcewake Request: 0x%x\n", mt_fwake_req); seq_printf(m, "Render Power Well: %s\n", (pw_status & VLV_GTLC_PW_RENDER_STATUS_MASK) ? "Up" : "Down"); seq_printf(m, "Media Power Well: %s\n", @@ -124,9 +126,10 @@ static int gen6_drpc(struct seq_file *m) struct intel_gt *gt = m->private; struct drm_i915_private *i915 = gt->i915; struct intel_uncore *uncore = gt->uncore; - u32 gt_core_status, rcctl1, rc6vids = 0; + u32 gt_core_status, mt_fwake_req, rcctl1, rc6vids = 0; u32 gen9_powergate_enable = 0, gen9_powergate_status = 0; + mt_fwake_req = intel_uncore_read_fw(uncore, FORCEWAKE_MT); gt_core_status = intel_uncore_read_fw(uncore, GEN6_GT_CORE_STATUS); rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL); @@ -178,6 +181,7 @@ static int gen6_drpc(struct seq_file *m) seq_printf(m, "Core Power Down: %s\n", str_yes_no(gt_core_status & GEN6_CORE_CPD_STATE_MASK)); + seq_printf(m, "Multi-threaded Forcewake Request: 0x%x\n", mt_fwake_req); if (GRAPHICS_VER(i915) >= 9) { seq_printf(m, "Render Power Well: %s\n", (gen9_powergate_status & -- cgit v1.2.3 From 14d6a086f6b91238873d3363f840a91b3aa062de Mon Sep 17 00:00:00 2001 From: pengfuyuan Date: Thu, 16 Jun 2022 15:08:03 +0800 Subject: drm/i915: Fix spelling typo in comment Fix spelling typo in comment. Reported-by: k2ci Signed-off-by: pengfuyuan Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/tencent_7B226C4A9BC2B5EEB37B70C188B5015D290A@qq.com --- drivers/gpu/drm/i915/gem/i915_gem_tiling.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index 80ac0db1ae8c..85518b28cd72 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -114,7 +114,7 @@ u32 i915_gem_fence_alignment(struct drm_i915_private *i915, u32 size, return i915_gem_fence_size(i915, size, tiling, stride); } -/* Check pitch constriants for all chips & tiling formats */ +/* Check pitch constraints for all chips & tiling formats */ static bool i915_tiling_ok(struct drm_i915_gem_object *obj, unsigned int tiling, unsigned int stride) -- cgit v1.2.3 From 7482a65664c16cc88eb84d2b545a1fed887378a1 Mon Sep 17 00:00:00 2001 From: katrinzhou Date: Tue, 21 Jun 2022 13:49:26 +0100 Subject: drm/i915/gem: add missing else Add missing else in set_proto_ctx_param() to fix coverity issue. Addresses-Coverity: ("Unused value") Fixes: d4433c7600f7 ("drm/i915/gem: Use the proto-context to handle create parameters (v5)") Suggested-by: Tvrtko Ursulin Signed-off-by: katrinzhou [tursulin: fixup alignment] Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20220621124926.615884-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index e116f82fc37c..dabdfe09f5e5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -933,8 +933,9 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, case I915_CONTEXT_PARAM_PERSISTENCE: if (args->size) ret = -EINVAL; - ret = proto_context_set_persistence(fpriv->dev_priv, pc, - args->value); + else + ret = proto_context_set_persistence(fpriv->dev_priv, pc, + args->value); break; case I915_CONTEXT_PARAM_PROTECTED_CONTENT: -- cgit v1.2.3 From afd5cb3907eaf43e4ca88c162b92143551f0323e Mon Sep 17 00:00:00 2001 From: Juha-Pekka Heikkila Date: Fri, 10 Jun 2022 15:12:05 +0300 Subject: drm/i915: don't leak lmem mapping in vma_evict Don't leak lmem mapping in vma_evict, move __i915_vma_iounmap outside i915_vma_is_map_and_fenceable. Signed-off-by: Juha-Pekka Heikkila Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220610121205.29645-3-juhapekka.heikkila@gmail.com --- drivers/gpu/drm/i915/i915_vma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 0bffb70b3c5f..d333c4416997 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1907,9 +1907,11 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async) /* release the fence reg _after_ flushing */ i915_vma_revoke_fence(vma); - __i915_vma_iounmap(vma); clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma)); } + + __i915_vma_iounmap(vma); + GEM_BUG_ON(vma->fence); GEM_BUG_ON(i915_vma_has_userfault(vma)); -- cgit v1.2.3 From d976521a995a817007ae3f471ac22b93b1bd39f7 Mon Sep 17 00:00:00 2001 From: CQ Tang Date: Fri, 10 Jun 2022 15:12:04 +0300 Subject: drm/i915: extend i915_vma_pin_iomap() In the future display might try call this with a normal smem object, which doesn't require PIN_MAPPABLE underneath in order to CPU map the pages (unlike stolen). Extend i915_vma_pin_iomap() to directly use i915_gem_object_pin_map() for such cases. This change was suggested by Chris P Wilson, that we pin the smem with i915_gem_object_pin_map_unlocked(). v2 (jheikkil): Change i915_gem_object_pin_map_unlocked to i915_gem_object_pin_map Signed-off-by: CQ Tang Signed-off-by: Juha-Pekka Heikkila Cc: Chris Wilson Cc: Jari Tahvanainen Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld [mauld: tweak commit message, plus minor checkpatch fix] Link: https://patchwork.freedesktop.org/patch/msgid/20220610121205.29645-2-juhapekka.heikkila@gmail.com --- drivers/gpu/drm/i915/i915_vma.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index d333c4416997..5d5828b9a242 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -551,13 +551,6 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) if (WARN_ON_ONCE(vma->obj->flags & I915_BO_ALLOC_GPU_ONLY)) return IOMEM_ERR_PTR(-EINVAL); - if (!i915_gem_object_is_lmem(vma->obj)) { - if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { - err = -ENODEV; - goto err; - } - } - GEM_BUG_ON(!i915_vma_is_ggtt(vma)); GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)); GEM_BUG_ON(i915_vma_verify_bind_complete(vma)); @@ -570,20 +563,33 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) * of pages, that way we can also drop the * I915_BO_ALLOC_CONTIGUOUS when allocating the object. */ - if (i915_gem_object_is_lmem(vma->obj)) + if (i915_gem_object_is_lmem(vma->obj)) { ptr = i915_gem_object_lmem_io_map(vma->obj, 0, vma->obj->base.size); - else + } else if (i915_vma_is_map_and_fenceable(vma)) { ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap, vma->node.start, vma->node.size); + } else { + ptr = (void __iomem *) + i915_gem_object_pin_map(vma->obj, I915_MAP_WC); + if (IS_ERR(ptr)) { + err = PTR_ERR(ptr); + goto err; + } + ptr = page_pack_bits(ptr, 1); + } + if (ptr == NULL) { err = -ENOMEM; goto err; } if (unlikely(cmpxchg(&vma->iomap, NULL, ptr))) { - io_mapping_unmap(ptr); + if (page_unmask_bits(ptr)) + __i915_gem_object_release_map(vma->obj); + else + io_mapping_unmap(ptr); ptr = vma->iomap; } } @@ -597,7 +603,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) i915_vma_set_ggtt_write(vma); /* NB Access through the GTT requires the device to be awake. */ - return ptr; + return page_mask_bits(ptr); err_unpin: __i915_vma_unpin(vma); @@ -615,6 +621,8 @@ void i915_vma_unpin_iomap(struct i915_vma *vma) { GEM_BUG_ON(vma->iomap == NULL); + /* XXX We keep the mapping until __i915_vma_unbind()/evict() */ + i915_vma_flush_writes(vma); i915_vma_unpin_fence(vma); @@ -1763,7 +1771,10 @@ static void __i915_vma_iounmap(struct i915_vma *vma) if (vma->iomap == NULL) return; - io_mapping_unmap(vma->iomap); + if (page_unmask_bits(vma->iomap)) + __i915_gem_object_release_map(vma->obj); + else + io_mapping_unmap(vma->iomap); vma->iomap = NULL; } -- cgit v1.2.3 From 0dc987b699ce4266450d407d6d79d41eab88c5d0 Mon Sep 17 00:00:00 2001 From: Juha-Pekka Heikkila Date: Fri, 10 Jun 2022 15:12:03 +0300 Subject: drm/i915/display: Add smem fallback allocation for dpt Add fallback smem allocation for dpt if stolen memory allocation failed. Signed-off-by: Juha-Pekka Heikkila Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220610121205.29645-1-juhapekka.heikkila@gmail.com --- drivers/gpu/drm/i915/display/intel_dpt.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index fb0e7e79e0cd..ac587647e1f5 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -4,6 +4,7 @@ */ #include "gem/i915_gem_domain.h" +#include "gem/i915_gem_internal.h" #include "gt/gen8_ppgtt.h" #include "i915_drv.h" @@ -127,8 +128,12 @@ struct i915_vma *intel_dpt_pin(struct i915_address_space *vm) struct i915_vma *vma; void __iomem *iomem; struct i915_gem_ww_ctx ww; + u64 pin_flags = 0; int err; + if (i915_gem_object_is_stolen(dpt->obj)) + pin_flags |= PIN_MAPPABLE; + wakeref = intel_runtime_pm_get(&i915->runtime_pm); atomic_inc(&i915->gpu_error.pending_fb_pin); @@ -138,7 +143,7 @@ struct i915_vma *intel_dpt_pin(struct i915_address_space *vm) continue; vma = i915_gem_object_ggtt_pin_ww(dpt->obj, &ww, NULL, 0, 4096, - HAS_LMEM(i915) ? 0 : PIN_MAPPABLE); + pin_flags); if (IS_ERR(vma)) { err = PTR_ERR(vma); continue; @@ -248,10 +253,13 @@ intel_dpt_create(struct intel_framebuffer *fb) size = round_up(size * sizeof(gen8_pte_t), I915_GTT_PAGE_SIZE); - if (HAS_LMEM(i915)) - dpt_obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_CONTIGUOUS); - else + dpt_obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(dpt_obj) && i915_ggtt_has_aperture(to_gt(i915)->ggtt)) dpt_obj = i915_gem_object_create_stolen(i915, size); + if (IS_ERR(dpt_obj) && !HAS_LMEM(i915)) { + drm_dbg_kms(&i915->drm, "Allocating dpt from smem\n"); + dpt_obj = i915_gem_object_create_internal(i915, size); + } if (IS_ERR(dpt_obj)) return ERR_CAST(dpt_obj); -- cgit v1.2.3 From 64e06652e348f0725368853688d3c15784549fd2 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 17 Jun 2022 16:05:58 -0700 Subject: agp/intel: Rename intel-gtt symbols Exporting the symbols like intel_gtt_* creates some confusion inside i915 that has symbols named similarly. In an attempt to isolate platforms needing intel-gtt.ko, commit 7a5c922377b4 ("drm/i915/gt: Split intel-gtt functions by arch") moved way too much inside gt/intel_gt_gmch.c, even the functions that don't callout to this module. Rename the symbols to make the separation clear. Signed-off-by: Lucas De Marchi Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20220617230559.2109427-1-lucas.demarchi@intel.com --- drivers/char/agp/intel-gtt.c | 58 ++++++++++++++++----------------- drivers/gpu/drm/i915/gt/intel_gt_gmch.c | 16 ++++----- include/drm/intel-gtt.h | 24 +++++++------- 3 files changed, 49 insertions(+), 49 deletions(-) diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 79a1b65527c2..fe7e2105e766 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -744,7 +744,7 @@ static void i830_write_entry(dma_addr_t addr, unsigned int entry, writel_relaxed(addr | pte_flags, intel_private.gtt + entry); } -bool intel_enable_gtt(void) +bool intel_gmch_enable_gtt(void) { u8 __iomem *reg; @@ -787,7 +787,7 @@ bool intel_enable_gtt(void) return true; } -EXPORT_SYMBOL(intel_enable_gtt); +EXPORT_SYMBOL(intel_gmch_enable_gtt); static int i830_setup(void) { @@ -821,8 +821,8 @@ static int intel_fake_agp_free_gatt_table(struct agp_bridge_data *bridge) static int intel_fake_agp_configure(void) { - if (!intel_enable_gtt()) - return -EIO; + if (!intel_gmch_enable_gtt()) + return -EIO; intel_private.clear_fake_agp = true; agp_bridge->gart_bus_addr = intel_private.gma_bus_addr; @@ -844,20 +844,20 @@ static bool i830_check_flags(unsigned int flags) return false; } -void intel_gtt_insert_page(dma_addr_t addr, - unsigned int pg, - unsigned int flags) +void intel_gmch_gtt_insert_page(dma_addr_t addr, + unsigned int pg, + unsigned int flags) { intel_private.driver->write_entry(addr, pg, flags); readl(intel_private.gtt + pg); if (intel_private.driver->chipset_flush) intel_private.driver->chipset_flush(); } -EXPORT_SYMBOL(intel_gtt_insert_page); +EXPORT_SYMBOL(intel_gmch_gtt_insert_page); -void intel_gtt_insert_sg_entries(struct sg_table *st, - unsigned int pg_start, - unsigned int flags) +void intel_gmch_gtt_insert_sg_entries(struct sg_table *st, + unsigned int pg_start, + unsigned int flags) { struct scatterlist *sg; unsigned int len, m; @@ -879,13 +879,13 @@ void intel_gtt_insert_sg_entries(struct sg_table *st, if (intel_private.driver->chipset_flush) intel_private.driver->chipset_flush(); } -EXPORT_SYMBOL(intel_gtt_insert_sg_entries); +EXPORT_SYMBOL(intel_gmch_gtt_insert_sg_entries); #if IS_ENABLED(CONFIG_AGP_INTEL) -static void intel_gtt_insert_pages(unsigned int first_entry, - unsigned int num_entries, - struct page **pages, - unsigned int flags) +static void intel_gmch_gtt_insert_pages(unsigned int first_entry, + unsigned int num_entries, + struct page **pages, + unsigned int flags) { int i, j; @@ -905,7 +905,7 @@ static int intel_fake_agp_insert_entries(struct agp_memory *mem, if (intel_private.clear_fake_agp) { int start = intel_private.stolen_size / PAGE_SIZE; int end = intel_private.gtt_mappable_entries; - intel_gtt_clear_range(start, end - start); + intel_gmch_gtt_clear_range(start, end - start); intel_private.clear_fake_agp = false; } @@ -934,12 +934,12 @@ static int intel_fake_agp_insert_entries(struct agp_memory *mem, if (ret != 0) return ret; - intel_gtt_insert_sg_entries(&st, pg_start, type); + intel_gmch_gtt_insert_sg_entries(&st, pg_start, type); mem->sg_list = st.sgl; mem->num_sg = st.nents; } else - intel_gtt_insert_pages(pg_start, mem->page_count, mem->pages, - type); + intel_gmch_gtt_insert_pages(pg_start, mem->page_count, mem->pages, + type); out: ret = 0; @@ -949,7 +949,7 @@ out_err: } #endif -void intel_gtt_clear_range(unsigned int first_entry, unsigned int num_entries) +void intel_gmch_gtt_clear_range(unsigned int first_entry, unsigned int num_entries) { unsigned int i; @@ -959,7 +959,7 @@ void intel_gtt_clear_range(unsigned int first_entry, unsigned int num_entries) } wmb(); } -EXPORT_SYMBOL(intel_gtt_clear_range); +EXPORT_SYMBOL(intel_gmch_gtt_clear_range); #if IS_ENABLED(CONFIG_AGP_INTEL) static int intel_fake_agp_remove_entries(struct agp_memory *mem, @@ -968,7 +968,7 @@ static int intel_fake_agp_remove_entries(struct agp_memory *mem, if (mem->page_count == 0) return 0; - intel_gtt_clear_range(pg_start, mem->page_count); + intel_gmch_gtt_clear_range(pg_start, mem->page_count); if (intel_private.needs_dmar) { intel_gtt_unmap_memory(mem->sg_list, mem->num_sg); @@ -1431,22 +1431,22 @@ int intel_gmch_probe(struct pci_dev *bridge_pdev, struct pci_dev *gpu_pdev, } EXPORT_SYMBOL(intel_gmch_probe); -void intel_gtt_get(u64 *gtt_total, - phys_addr_t *mappable_base, - resource_size_t *mappable_end) +void intel_gmch_gtt_get(u64 *gtt_total, + phys_addr_t *mappable_base, + resource_size_t *mappable_end) { *gtt_total = intel_private.gtt_total_entries << PAGE_SHIFT; *mappable_base = intel_private.gma_bus_addr; *mappable_end = intel_private.gtt_mappable_entries << PAGE_SHIFT; } -EXPORT_SYMBOL(intel_gtt_get); +EXPORT_SYMBOL(intel_gmch_gtt_get); -void intel_gtt_chipset_flush(void) +void intel_gmch_gtt_flush(void) { if (intel_private.driver->chipset_flush) intel_private.driver->chipset_flush(); } -EXPORT_SYMBOL(intel_gtt_chipset_flush); +EXPORT_SYMBOL(intel_gmch_gtt_flush); void intel_gmch_remove(void) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.c b/drivers/gpu/drm/i915/gt/intel_gt_gmch.c index 18e488672d1b..b1a6ff4c9377 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_gmch.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_gmch.c @@ -134,7 +134,7 @@ static void gen5_ggtt_insert_page(struct i915_address_space *vm, unsigned int flags = (cache_level == I915_CACHE_NONE) ? AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; - intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); + intel_gmch_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); } static void gen6_ggtt_insert_page(struct i915_address_space *vm, @@ -175,8 +175,8 @@ static void gen5_ggtt_insert_entries(struct i915_address_space *vm, unsigned int flags = (cache_level == I915_CACHE_NONE) ? AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; - intel_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT, - flags); + intel_gmch_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT, + flags); } /* @@ -306,18 +306,18 @@ static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, void intel_gt_gmch_gen5_chipset_flush(struct intel_gt *gt) { - intel_gtt_chipset_flush(); + intel_gmch_gtt_flush(); } static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) { - intel_gtt_chipset_flush(); + intel_gmch_gtt_flush(); } static void gen5_ggtt_clear_range(struct i915_address_space *vm, u64 start, u64 length) { - intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); + intel_gmch_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); } static void gen6_ggtt_clear_range(struct i915_address_space *vm, @@ -494,7 +494,7 @@ int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt) return -EIO; } - intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end); + intel_gmch_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end); ggtt->gmadr = (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end); @@ -647,7 +647,7 @@ int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt) int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915) { - if (GRAPHICS_VER(i915) < 6 && !intel_enable_gtt()) + if (GRAPHICS_VER(i915) < 6 && !intel_gmch_enable_gtt()) return -EIO; return 0; diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h index 67530bfef129..cb0d5b7200c7 100644 --- a/include/drm/intel-gtt.h +++ b/include/drm/intel-gtt.h @@ -10,24 +10,24 @@ struct agp_bridge_data; struct pci_dev; struct sg_table; -void intel_gtt_get(u64 *gtt_total, - phys_addr_t *mappable_base, - resource_size_t *mappable_end); +void intel_gmch_gtt_get(u64 *gtt_total, + phys_addr_t *mappable_base, + resource_size_t *mappable_end); int intel_gmch_probe(struct pci_dev *bridge_pdev, struct pci_dev *gpu_pdev, struct agp_bridge_data *bridge); void intel_gmch_remove(void); -bool intel_enable_gtt(void); +bool intel_gmch_enable_gtt(void); -void intel_gtt_chipset_flush(void); -void intel_gtt_insert_page(dma_addr_t addr, - unsigned int pg, - unsigned int flags); -void intel_gtt_insert_sg_entries(struct sg_table *st, - unsigned int pg_start, - unsigned int flags); -void intel_gtt_clear_range(unsigned int first_entry, unsigned int num_entries); +void intel_gmch_gtt_flush(void); +void intel_gmch_gtt_insert_page(dma_addr_t addr, + unsigned int pg, + unsigned int flags); +void intel_gmch_gtt_insert_sg_entries(struct sg_table *st, + unsigned int pg_start, + unsigned int flags); +void intel_gmch_gtt_clear_range(unsigned int first_entry, unsigned int num_entries); /* Special gtt memory types */ #define AGP_DCACHE_MEMORY 1 -- cgit v1.2.3 From 9ce07d94c9f80e1b33f5f6bc2c5a27f6cc56a6dc Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 17 Jun 2022 16:05:59 -0700 Subject: drm/i915/gt: Re-do the intel-gtt split Re-do what was attempted in commit 7a5c922377b4 ("drm/i915/gt: Split intel-gtt functions by arch"). The goal of that commit was to split the handlers for older hardware that depend on intel-gtt.ko so i915 can be built for non-x86 archs, after some more patches. Other archs do not need intel-gtt.ko. Main issue with the previous approach: it moved all the hooks, including the gen8, which is used by all platforms gen8 and newer. Re-do the split moving only the handlers for gen < 6, which are the only ones calling out to the separate module. While at it do some minor cleanups: - Rename the prefix s/gen5_/gmch_/ to be more accurate what platforms are covered by intel_ggtt_gmch.c - Remove dead code for gen12 out of needs_idle_maps() - Remove TODO comment leftover - Re-order if/else ladder in ggtt_probe_hw() to keep newest platforms first v2: Add minor cleanups (Matt Roper) Signed-off-by: Lucas De Marchi Acked-by: Tvrtko Ursulin Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20220617230559.2109427-2-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/Makefile | 2 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 561 ++++++++++++++++++++++++- drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c | 132 ++++++ drivers/gpu/drm/i915/gt/intel_ggtt_gmch.h | 27 ++ drivers/gpu/drm/i915/gt/intel_gt.c | 5 +- drivers/gpu/drm/i915/gt/intel_gt.h | 9 - drivers/gpu/drm/i915/gt/intel_gt_gmch.c | 654 ------------------------------ drivers/gpu/drm/i915/gt/intel_gt_gmch.h | 46 --- drivers/gpu/drm/i915/gt/intel_gtt.h | 12 +- 9 files changed, 714 insertions(+), 734 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c create mode 100644 drivers/gpu/drm/i915/gt/intel_ggtt_gmch.h delete mode 100644 drivers/gpu/drm/i915/gt/intel_gt_gmch.c delete mode 100644 drivers/gpu/drm/i915/gt/intel_gt_gmch.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 08f5d0d6e83a..e776aded71c0 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -130,7 +130,7 @@ gt-y += \ gt/shmem_utils.o \ gt/sysfs_engines.o # x86 intel-gtt module support -gt-$(CONFIG_X86) += gt/intel_gt_gmch.o +gt-$(CONFIG_X86) += gt/intel_ggtt_gmch.o # autogenerated null render state gt-y += \ gt/gen6_renderstate.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 0849a6f66309..ffff96180313 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -3,16 +3,18 @@ * Copyright © 2020 Intel Corporation */ -#include #include #include +#include +#include #include +#include #include "gem/i915_gem_lmem.h" +#include "intel_ggtt_gmch.h" #include "intel_gt.h" -#include "intel_gt_gmch.h" #include "intel_gt_regs.h" #include "i915_drv.h" #include "i915_scatterlist.h" @@ -208,7 +210,7 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) spin_unlock_irq(&uncore->lock); } -void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) +static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) { struct intel_uncore *uncore = ggtt->vm.gt->uncore; @@ -245,11 +247,232 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr, return pte; } +static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) +{ + writeq(pte, addr); +} + +static void gen8_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 flags) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen8_pte_t __iomem *pte = + (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; + + gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags)); + + ggtt->invalidate(ggtt); +} + +static void gen8_ggtt_insert_entries(struct i915_address_space *vm, + struct i915_vma_resource *vma_res, + enum i915_cache_level level, + u32 flags) +{ + const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags); + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen8_pte_t __iomem *gte; + gen8_pte_t __iomem *end; + struct sgt_iter iter; + dma_addr_t addr; + + /* + * Note that we ignore PTE_READ_ONLY here. The caller must be careful + * not to allow the user to override access to a read only page. + */ + + gte = (gen8_pte_t __iomem *)ggtt->gsm; + gte += vma_res->start / I915_GTT_PAGE_SIZE; + end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE; + + for_each_sgt_daddr(addr, iter, vma_res->bi.pages) + gen8_set_pte(gte++, pte_encode | addr); + GEM_BUG_ON(gte > end); + + /* Fill the allocated but "unused" space beyond the end of the buffer */ + while (gte < end) + gen8_set_pte(gte++, vm->scratch[0]->encode); + + /* + * We want to flush the TLBs only after we're certain all the PTE + * updates have finished. + */ + ggtt->invalidate(ggtt); +} + +static void gen6_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 flags) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen6_pte_t __iomem *pte = + (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; + + iowrite32(vm->pte_encode(addr, level, flags), pte); + + ggtt->invalidate(ggtt); +} + +/* + * Binds an object into the global gtt with the specified cache level. + * The object will be accessible to the GPU via commands whose operands + * reference offsets within the global GTT as well as accessible by the GPU + * through the GMADR mapped BAR (i915->mm.gtt->gtt). + */ +static void gen6_ggtt_insert_entries(struct i915_address_space *vm, + struct i915_vma_resource *vma_res, + enum i915_cache_level level, + u32 flags) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen6_pte_t __iomem *gte; + gen6_pte_t __iomem *end; + struct sgt_iter iter; + dma_addr_t addr; + + gte = (gen6_pte_t __iomem *)ggtt->gsm; + gte += vma_res->start / I915_GTT_PAGE_SIZE; + end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE; + + for_each_sgt_daddr(addr, iter, vma_res->bi.pages) + iowrite32(vm->pte_encode(addr, level, flags), gte++); + GEM_BUG_ON(gte > end); + + /* Fill the allocated but "unused" space beyond the end of the buffer */ + while (gte < end) + iowrite32(vm->scratch[0]->encode, gte++); + + /* + * We want to flush the TLBs only after we're certain all the PTE + * updates have finished. + */ + ggtt->invalidate(ggtt); +} + +static void nop_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ +} + +static void gen8_ggtt_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + unsigned int first_entry = start / I915_GTT_PAGE_SIZE; + unsigned int num_entries = length / I915_GTT_PAGE_SIZE; + const gen8_pte_t scratch_pte = vm->scratch[0]->encode; + gen8_pte_t __iomem *gtt_base = + (gen8_pte_t __iomem *)ggtt->gsm + first_entry; + const int max_entries = ggtt_total_entries(ggtt) - first_entry; + int i; + + if (WARN(num_entries > max_entries, + "First entry = %d; Num entries = %d (max=%d)\n", + first_entry, num_entries, max_entries)) + num_entries = max_entries; + + for (i = 0; i < num_entries; i++) + gen8_set_pte(>t_base[i], scratch_pte); +} + +static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) +{ + /* + * Make sure the internal GAM fifo has been cleared of all GTT + * writes before exiting stop_machine(). This guarantees that + * any aperture accesses waiting to start in another process + * cannot back up behind the GTT writes causing a hang. + * The register can be any arbitrary GAM register. + */ + intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6); +} + +struct insert_page { + struct i915_address_space *vm; + dma_addr_t addr; + u64 offset; + enum i915_cache_level level; +}; + +static int bxt_vtd_ggtt_insert_page__cb(void *_arg) +{ + struct insert_page *arg = _arg; + + gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0); + bxt_vtd_ggtt_wa(arg->vm); + + return 0; +} + +static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 unused) +{ + struct insert_page arg = { vm, addr, offset, level }; + + stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); +} + +struct insert_entries { + struct i915_address_space *vm; + struct i915_vma_resource *vma_res; + enum i915_cache_level level; + u32 flags; +}; + +static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) +{ + struct insert_entries *arg = _arg; + + gen8_ggtt_insert_entries(arg->vm, arg->vma_res, arg->level, arg->flags); + bxt_vtd_ggtt_wa(arg->vm); + + return 0; +} + +static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, + struct i915_vma_resource *vma_res, + enum i915_cache_level level, + u32 flags) +{ + struct insert_entries arg = { vm, vma_res, level, flags }; + + stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); +} + +static void gen6_ggtt_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + unsigned int first_entry = start / I915_GTT_PAGE_SIZE; + unsigned int num_entries = length / I915_GTT_PAGE_SIZE; + gen6_pte_t scratch_pte, __iomem *gtt_base = + (gen6_pte_t __iomem *)ggtt->gsm + first_entry; + const int max_entries = ggtt_total_entries(ggtt) - first_entry; + int i; + + if (WARN(num_entries > max_entries, + "First entry = %d; Num entries = %d (max=%d)\n", + first_entry, num_entries, max_entries)) + num_entries = max_entries; + + scratch_pte = vm->scratch[0]->encode; + for (i = 0; i < num_entries; i++) + iowrite32(scratch_pte, >t_base[i]); +} + void intel_ggtt_bind_vma(struct i915_address_space *vm, - struct i915_vm_pt_stash *stash, - struct i915_vma_resource *vma_res, - enum i915_cache_level cache_level, - u32 flags) + struct i915_vm_pt_stash *stash, + struct i915_vma_resource *vma_res, + enum i915_cache_level cache_level, + u32 flags) { u32 pte_flags; @@ -270,7 +493,7 @@ void intel_ggtt_bind_vma(struct i915_address_space *vm, } void intel_ggtt_unbind_vma(struct i915_address_space *vm, - struct i915_vma_resource *vma_res) + struct i915_vma_resource *vma_res) { vm->clear_range(vm, vma_res->start, vma_res->vma_size); } @@ -589,12 +812,316 @@ void i915_ggtt_driver_late_release(struct drm_i915_private *i915) dma_resv_fini(&ggtt->vm._resv); } -struct resource intel_pci_resource(struct pci_dev *pdev, int bar) +static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) +{ + snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; + snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; + return snb_gmch_ctl << 20; +} + +static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) +{ + bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; + bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; + if (bdw_gmch_ctl) + bdw_gmch_ctl = 1 << bdw_gmch_ctl; + +#ifdef CONFIG_X86_32 + /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ + if (bdw_gmch_ctl > 4) + bdw_gmch_ctl = 4; +#endif + + return bdw_gmch_ctl << 20; +} + +static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) +{ + gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; + gmch_ctrl &= SNB_GMCH_GGMS_MASK; + + if (gmch_ctrl) + return 1 << (20 + gmch_ctrl); + + return 0; +} + +static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915) +{ + /* + * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset + * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset + */ + GEM_BUG_ON(GRAPHICS_VER(i915) < 6); + return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M; +} + +static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915) +{ + return gen6_gttmmadr_size(i915) / 2; +} + +static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + phys_addr_t phys_addr; + u32 pte_flags; + int ret; + + GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915)); + phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915); + + /* + * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range + * will be dropped. For WC mappings in general we have 64 byte burst + * writes when the WC buffer is flushed, so we can't use it, but have to + * resort to an uncached mapping. The WC issue is easily caught by the + * readback check when writing GTT PTE entries. + */ + if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11) + ggtt->gsm = ioremap(phys_addr, size); + else + ggtt->gsm = ioremap_wc(phys_addr, size); + if (!ggtt->gsm) { + drm_err(&i915->drm, "Failed to map the ggtt page table\n"); + return -ENOMEM; + } + + kref_init(&ggtt->vm.resv_ref); + ret = setup_scratch_page(&ggtt->vm); + if (ret) { + drm_err(&i915->drm, "Scratch setup failed\n"); + /* iounmap will also get called at remove, but meh */ + iounmap(ggtt->gsm); + return ret; + } + + pte_flags = 0; + if (i915_gem_object_is_lmem(ggtt->vm.scratch[0])) + pte_flags |= PTE_LM; + + ggtt->vm.scratch[0]->encode = + ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]), + I915_CACHE_NONE, pte_flags); + + return 0; +} + +static void gen6_gmch_remove(struct i915_address_space *vm) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + + iounmap(ggtt->gsm); + free_scratch(vm); +} + +static struct resource pci_resource(struct pci_dev *pdev, int bar) { return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar), pci_resource_len(pdev, bar)); } +static int gen8_gmch_probe(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + unsigned int size; + u16 snb_gmch_ctl; + + if (!HAS_LMEM(i915)) { + ggtt->gmadr = pci_resource(pdev, 2); + ggtt->mappable_end = resource_size(&ggtt->gmadr); + } + + pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + if (IS_CHERRYVIEW(i915)) + size = chv_get_total_gtt_size(snb_gmch_ctl); + else + size = gen8_get_total_gtt_size(snb_gmch_ctl); + + ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.alloc_scratch_dma = alloc_pt_dma; + ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY; + + ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; + ggtt->vm.cleanup = gen6_gmch_remove; + ggtt->vm.insert_page = gen8_ggtt_insert_page; + ggtt->vm.clear_range = nop_clear_range; + if (intel_scanout_needs_vtd_wa(i915)) + ggtt->vm.clear_range = gen8_ggtt_clear_range; + + ggtt->vm.insert_entries = gen8_ggtt_insert_entries; + + /* + * Serialize GTT updates with aperture access on BXT if VT-d is on, + * and always on CHV. + */ + if (intel_vm_no_concurrent_access_wa(i915)) { + ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; + ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; + ggtt->vm.bind_async_flags = + I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; + } + + ggtt->invalidate = gen8_ggtt_invalidate; + + ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; + ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; + + ggtt->vm.pte_encode = gen8_ggtt_pte_encode; + + setup_private_pat(ggtt->vm.gt->uncore); + + return ggtt_probe_common(ggtt, size); +} + +static u64 snb_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + switch (level) { + case I915_CACHE_L3_LLC: + case I915_CACHE_LLC: + pte |= GEN6_PTE_CACHE_LLC; + break; + case I915_CACHE_NONE: + pte |= GEN6_PTE_UNCACHED; + break; + default: + MISSING_CASE(level); + } + + return pte; +} + +static u64 ivb_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + switch (level) { + case I915_CACHE_L3_LLC: + pte |= GEN7_PTE_CACHE_L3_LLC; + break; + case I915_CACHE_LLC: + pte |= GEN6_PTE_CACHE_LLC; + break; + case I915_CACHE_NONE: + pte |= GEN6_PTE_UNCACHED; + break; + default: + MISSING_CASE(level); + } + + return pte; +} + +static u64 byt_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + if (!(flags & PTE_READ_ONLY)) + pte |= BYT_PTE_WRITEABLE; + + if (level != I915_CACHE_NONE) + pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; + + return pte; +} + +static u64 hsw_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + if (level != I915_CACHE_NONE) + pte |= HSW_WB_LLC_AGE3; + + return pte; +} + +static u64 iris_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + switch (level) { + case I915_CACHE_NONE: + break; + case I915_CACHE_WT: + pte |= HSW_WT_ELLC_LLC_AGE3; + break; + default: + pte |= HSW_WB_ELLC_LLC_AGE3; + break; + } + + return pte; +} + +static int gen6_gmch_probe(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + unsigned int size; + u16 snb_gmch_ctl; + + ggtt->gmadr = pci_resource(pdev, 2); + ggtt->mappable_end = resource_size(&ggtt->gmadr); + + /* + * 64/512MB is the current min/max we actually know of, but this is + * just a coarse sanity check. + */ + if (ggtt->mappable_end < (64 << 20) || + ggtt->mappable_end > (512 << 20)) { + drm_err(&i915->drm, "Unknown GMADR size (%pa)\n", + &ggtt->mappable_end); + return -ENXIO; + } + + pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + + size = gen6_get_total_gtt_size(snb_gmch_ctl); + ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; + + ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.alloc_scratch_dma = alloc_pt_dma; + + ggtt->vm.clear_range = nop_clear_range; + if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915)) + ggtt->vm.clear_range = gen6_ggtt_clear_range; + ggtt->vm.insert_page = gen6_ggtt_insert_page; + ggtt->vm.insert_entries = gen6_ggtt_insert_entries; + ggtt->vm.cleanup = gen6_gmch_remove; + + ggtt->invalidate = gen6_ggtt_invalidate; + + if (HAS_EDRAM(i915)) + ggtt->vm.pte_encode = iris_pte_encode; + else if (IS_HASWELL(i915)) + ggtt->vm.pte_encode = hsw_pte_encode; + else if (IS_VALLEYVIEW(i915)) + ggtt->vm.pte_encode = byt_pte_encode; + else if (GRAPHICS_VER(i915) >= 7) + ggtt->vm.pte_encode = ivb_pte_encode; + else + ggtt->vm.pte_encode = snb_pte_encode; + + ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; + ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; + + return ggtt_probe_common(ggtt, size); +} + static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; @@ -605,12 +1132,13 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) ggtt->vm.dma = i915->drm.dev; dma_resv_init(&ggtt->vm._resv); - if (GRAPHICS_VER(i915) <= 5) - ret = intel_gt_gmch_gen5_probe(ggtt); - else if (GRAPHICS_VER(i915) < 8) - ret = intel_gt_gmch_gen6_probe(ggtt); + if (GRAPHICS_VER(i915) >= 8) + ret = gen8_gmch_probe(ggtt); + else if (GRAPHICS_VER(i915) >= 6) + ret = gen6_gmch_probe(ggtt); else - ret = intel_gt_gmch_gen8_probe(ggtt); + ret = intel_ggtt_gmch_probe(ggtt); + if (ret) { dma_resv_fini(&ggtt->vm._resv); return ret; @@ -664,7 +1192,10 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915) int i915_ggtt_enable_hw(struct drm_i915_private *i915) { - return intel_gt_gmch_gen5_enable_hw(i915); + if (GRAPHICS_VER(i915) < 6) + return intel_ggtt_gmch_enable_hw(i915); + + return 0; } void i915_ggtt_enable_guc(struct i915_ggtt *ggtt) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c new file mode 100644 index 000000000000..4e2163a1aa46 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "intel_ggtt_gmch.h" + +#include +#include + +#include + +#include "i915_drv.h" +#include "i915_utils.h" +#include "intel_gtt.h" +#include "intel_gt_regs.h" +#include "intel_gt.h" + +static void gmch_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level cache_level, + u32 unused) +{ + unsigned int flags = (cache_level == I915_CACHE_NONE) ? + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; + + intel_gmch_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); +} + +static void gmch_ggtt_insert_entries(struct i915_address_space *vm, + struct i915_vma_resource *vma_res, + enum i915_cache_level cache_level, + u32 unused) +{ + unsigned int flags = (cache_level == I915_CACHE_NONE) ? + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; + + intel_gmch_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT, + flags); +} + +static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) +{ + intel_gmch_gtt_flush(); +} + +static void gmch_ggtt_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + intel_gmch_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); +} + +static void gmch_ggtt_remove(struct i915_address_space *vm) +{ + intel_gmch_remove(); +} + +/* + * Certain Gen5 chipsets require idling the GPU before unmapping anything from + * the GTT when VT-d is enabled. + */ +static bool needs_idle_maps(struct drm_i915_private *i915) +{ + /* + * Query intel_iommu to see if we need the workaround. Presumably that + * was loaded first. + */ + if (!i915_vtd_active(i915)) + return false; + + if (GRAPHICS_VER(i915) == 5 && IS_MOBILE(i915)) + return true; + + return false; +} + +int intel_ggtt_gmch_probe(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + phys_addr_t gmadr_base; + int ret; + + ret = intel_gmch_probe(i915->bridge_dev, to_pci_dev(i915->drm.dev), NULL); + if (!ret) { + drm_err(&i915->drm, "failed to set up gmch\n"); + return -EIO; + } + + intel_gmch_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end); + + ggtt->gmadr = + (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end); + + ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.alloc_scratch_dma = alloc_pt_dma; + + if (needs_idle_maps(i915)) { + drm_notice(&i915->drm, + "Flushing DMA requests before IOMMU unmaps; performance may be degraded\n"); + ggtt->do_idle_maps = true; + } + + ggtt->vm.insert_page = gmch_ggtt_insert_page; + ggtt->vm.insert_entries = gmch_ggtt_insert_entries; + ggtt->vm.clear_range = gmch_ggtt_clear_range; + ggtt->vm.cleanup = gmch_ggtt_remove; + + ggtt->invalidate = gmch_ggtt_invalidate; + + ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; + ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; + + if (unlikely(ggtt->do_idle_maps)) + drm_notice(&i915->drm, + "Applying Ironlake quirks for intel_iommu\n"); + + return 0; +} + +int intel_ggtt_gmch_enable_hw(struct drm_i915_private *i915) +{ + if (!intel_gmch_enable_gtt()) + return -EIO; + + return 0; +} + +void intel_ggtt_gmch_flush(void) +{ + intel_gmch_gtt_flush(); +} diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.h b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.h new file mode 100644 index 000000000000..370bf321b4e2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef __INTEL_GGTT_GMCH_H__ +#define __INTEL_GGTT_GMCH_H__ + +#include "intel_gtt.h" + +/* For x86 platforms */ +#if IS_ENABLED(CONFIG_X86) + +void intel_ggtt_gmch_flush(void); +int intel_ggtt_gmch_enable_hw(struct drm_i915_private *i915); +int intel_ggtt_gmch_probe(struct i915_ggtt *ggtt); + +/* Stubs for non-x86 platforms */ +#else + +static inline void intel_ggtt_gmch_flush(void) { } +static inline int intel_ggtt_gmch_enable_hw(struct drm_i915_private *i915) { return -ENODEV; } +static inline int intel_ggtt_gmch_probe(struct i915_ggtt *ggtt) { return -ENODEV; } + +#endif + +#endif /* __INTEL_GGTT_GMCH_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index be9877c4b496..8da3314bb6bf 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -4,6 +4,7 @@ */ #include +#include #include "gem/i915_gem_internal.h" #include "gem/i915_gem_lmem.h" @@ -12,11 +13,11 @@ #include "i915_drv.h" #include "intel_context.h" #include "intel_engine_regs.h" +#include "intel_ggtt_gmch.h" #include "intel_gt.h" #include "intel_gt_buffer_pool.h" #include "intel_gt_clock_utils.h" #include "intel_gt_debugfs.h" -#include "intel_gt_gmch.h" #include "intel_gt_mcr.h" #include "intel_gt_pm.h" #include "intel_gt_regs.h" @@ -387,7 +388,7 @@ void intel_gt_chipset_flush(struct intel_gt *gt) { wmb(); if (GRAPHICS_VER(gt->i915) < 6) - intel_gt_gmch_gen5_chipset_flush(gt); + intel_ggtt_gmch_flush(); } void intel_gt_driver_register(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 61d30d5c7e90..82d6f248d876 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -13,13 +13,6 @@ struct drm_i915_private; struct drm_printer; -struct insert_entries { - struct i915_address_space *vm; - struct i915_vma_resource *vma_res; - enum i915_cache_level level; - u32 flags; -}; - #define GT_TRACE(gt, fmt, ...) do { \ const struct intel_gt *gt__ __maybe_unused = (gt); \ GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \ @@ -110,6 +103,4 @@ void intel_gt_watchdog_work(struct work_struct *work); void intel_gt_invalidate_tlbs(struct intel_gt *gt); -struct resource intel_pci_resource(struct pci_dev *pdev, int bar); - #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.c b/drivers/gpu/drm/i915/gt/intel_gt_gmch.c deleted file mode 100644 index b1a6ff4c9377..000000000000 --- a/drivers/gpu/drm/i915/gt/intel_gt_gmch.c +++ /dev/null @@ -1,654 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include -#include - -#include -#include - -#include "i915_drv.h" -#include "intel_gt_gmch.h" -#include "intel_gt_regs.h" -#include "intel_gt.h" -#include "i915_utils.h" - -#include "gen8_ppgtt.h" - -struct insert_page { - struct i915_address_space *vm; - dma_addr_t addr; - u64 offset; - enum i915_cache_level level; -}; - -static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) -{ - writeq(pte, addr); -} - -static void nop_clear_range(struct i915_address_space *vm, - u64 start, u64 length) -{ -} - -static u64 snb_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; - - switch (level) { - case I915_CACHE_L3_LLC: - case I915_CACHE_LLC: - pte |= GEN6_PTE_CACHE_LLC; - break; - case I915_CACHE_NONE: - pte |= GEN6_PTE_UNCACHED; - break; - default: - MISSING_CASE(level); - } - - return pte; -} - -static u64 ivb_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; - - switch (level) { - case I915_CACHE_L3_LLC: - pte |= GEN7_PTE_CACHE_L3_LLC; - break; - case I915_CACHE_LLC: - pte |= GEN6_PTE_CACHE_LLC; - break; - case I915_CACHE_NONE: - pte |= GEN6_PTE_UNCACHED; - break; - default: - MISSING_CASE(level); - } - - return pte; -} - -static u64 byt_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; - - if (!(flags & PTE_READ_ONLY)) - pte |= BYT_PTE_WRITEABLE; - - if (level != I915_CACHE_NONE) - pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; - - return pte; -} - -static u64 hsw_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; - - if (level != I915_CACHE_NONE) - pte |= HSW_WB_LLC_AGE3; - - return pte; -} - -static u64 iris_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; - - switch (level) { - case I915_CACHE_NONE: - break; - case I915_CACHE_WT: - pte |= HSW_WT_ELLC_LLC_AGE3; - break; - default: - pte |= HSW_WB_ELLC_LLC_AGE3; - break; - } - - return pte; -} - -static void gen5_ggtt_insert_page(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - enum i915_cache_level cache_level, - u32 unused) -{ - unsigned int flags = (cache_level == I915_CACHE_NONE) ? - AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; - - intel_gmch_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); -} - -static void gen6_ggtt_insert_page(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - enum i915_cache_level level, - u32 flags) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - gen6_pte_t __iomem *pte = - (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; - - iowrite32(vm->pte_encode(addr, level, flags), pte); - - ggtt->invalidate(ggtt); -} - -static void gen8_ggtt_insert_page(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - enum i915_cache_level level, - u32 flags) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - gen8_pte_t __iomem *pte = - (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; - - gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags)); - - ggtt->invalidate(ggtt); -} - -static void gen5_ggtt_insert_entries(struct i915_address_space *vm, - struct i915_vma_resource *vma_res, - enum i915_cache_level cache_level, - u32 unused) -{ - unsigned int flags = (cache_level == I915_CACHE_NONE) ? - AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; - - intel_gmch_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT, - flags); -} - -/* - * Binds an object into the global gtt with the specified cache level. - * The object will be accessible to the GPU via commands whose operands - * reference offsets within the global GTT as well as accessible by the GPU - * through the GMADR mapped BAR (i915->mm.gtt->gtt). - */ -static void gen6_ggtt_insert_entries(struct i915_address_space *vm, - struct i915_vma_resource *vma_res, - enum i915_cache_level level, - u32 flags) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - gen6_pte_t __iomem *gte; - gen6_pte_t __iomem *end; - struct sgt_iter iter; - dma_addr_t addr; - - gte = (gen6_pte_t __iomem *)ggtt->gsm; - gte += vma_res->start / I915_GTT_PAGE_SIZE; - end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE; - - for_each_sgt_daddr(addr, iter, vma_res->bi.pages) - iowrite32(vm->pte_encode(addr, level, flags), gte++); - GEM_BUG_ON(gte > end); - - /* Fill the allocated but "unused" space beyond the end of the buffer */ - while (gte < end) - iowrite32(vm->scratch[0]->encode, gte++); - - /* - * We want to flush the TLBs only after we're certain all the PTE - * updates have finished. - */ - ggtt->invalidate(ggtt); -} - -static void gen8_ggtt_insert_entries(struct i915_address_space *vm, - struct i915_vma_resource *vma_res, - enum i915_cache_level level, - u32 flags) -{ - const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags); - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - gen8_pte_t __iomem *gte; - gen8_pte_t __iomem *end; - struct sgt_iter iter; - dma_addr_t addr; - - /* - * Note that we ignore PTE_READ_ONLY here. The caller must be careful - * not to allow the user to override access to a read only page. - */ - - gte = (gen8_pte_t __iomem *)ggtt->gsm; - gte += vma_res->start / I915_GTT_PAGE_SIZE; - end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE; - - for_each_sgt_daddr(addr, iter, vma_res->bi.pages) - gen8_set_pte(gte++, pte_encode | addr); - GEM_BUG_ON(gte > end); - - /* Fill the allocated but "unused" space beyond the end of the buffer */ - while (gte < end) - gen8_set_pte(gte++, vm->scratch[0]->encode); - - /* - * We want to flush the TLBs only after we're certain all the PTE - * updates have finished. - */ - ggtt->invalidate(ggtt); -} - -static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) -{ - /* - * Make sure the internal GAM fifo has been cleared of all GTT - * writes before exiting stop_machine(). This guarantees that - * any aperture accesses waiting to start in another process - * cannot back up behind the GTT writes causing a hang. - * The register can be any arbitrary GAM register. - */ - intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6); -} - -static int bxt_vtd_ggtt_insert_page__cb(void *_arg) -{ - struct insert_page *arg = _arg; - - gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0); - bxt_vtd_ggtt_wa(arg->vm); - - return 0; -} - -static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - enum i915_cache_level level, - u32 unused) -{ - struct insert_page arg = { vm, addr, offset, level }; - - stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); -} - -static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) -{ - struct insert_entries *arg = _arg; - - gen8_ggtt_insert_entries(arg->vm, arg->vma_res, arg->level, arg->flags); - bxt_vtd_ggtt_wa(arg->vm); - - return 0; -} - -static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, - struct i915_vma_resource *vma_res, - enum i915_cache_level level, - u32 flags) -{ - struct insert_entries arg = { vm, vma_res, level, flags }; - - stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); -} - -void intel_gt_gmch_gen5_chipset_flush(struct intel_gt *gt) -{ - intel_gmch_gtt_flush(); -} - -static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) -{ - intel_gmch_gtt_flush(); -} - -static void gen5_ggtt_clear_range(struct i915_address_space *vm, - u64 start, u64 length) -{ - intel_gmch_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); -} - -static void gen6_ggtt_clear_range(struct i915_address_space *vm, - u64 start, u64 length) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - unsigned int first_entry = start / I915_GTT_PAGE_SIZE; - unsigned int num_entries = length / I915_GTT_PAGE_SIZE; - gen6_pte_t scratch_pte, __iomem *gtt_base = - (gen6_pte_t __iomem *)ggtt->gsm + first_entry; - const int max_entries = ggtt_total_entries(ggtt) - first_entry; - int i; - - if (WARN(num_entries > max_entries, - "First entry = %d; Num entries = %d (max=%d)\n", - first_entry, num_entries, max_entries)) - num_entries = max_entries; - - scratch_pte = vm->scratch[0]->encode; - for (i = 0; i < num_entries; i++) - iowrite32(scratch_pte, >t_base[i]); -} - -static void gen8_ggtt_clear_range(struct i915_address_space *vm, - u64 start, u64 length) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - unsigned int first_entry = start / I915_GTT_PAGE_SIZE; - unsigned int num_entries = length / I915_GTT_PAGE_SIZE; - const gen8_pte_t scratch_pte = vm->scratch[0]->encode; - gen8_pte_t __iomem *gtt_base = - (gen8_pte_t __iomem *)ggtt->gsm + first_entry; - const int max_entries = ggtt_total_entries(ggtt) - first_entry; - int i; - - if (WARN(num_entries > max_entries, - "First entry = %d; Num entries = %d (max=%d)\n", - first_entry, num_entries, max_entries)) - num_entries = max_entries; - - for (i = 0; i < num_entries; i++) - gen8_set_pte(>t_base[i], scratch_pte); -} - -static void gen5_gmch_remove(struct i915_address_space *vm) -{ - intel_gmch_remove(); -} - -static void gen6_gmch_remove(struct i915_address_space *vm) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - - iounmap(ggtt->gsm); - free_scratch(vm); -} - -/* - * Certain Gen5 chipsets require idling the GPU before - * unmapping anything from the GTT when VT-d is enabled. - */ -static bool needs_idle_maps(struct drm_i915_private *i915) -{ - /* - * Query intel_iommu to see if we need the workaround. Presumably that - * was loaded first. - */ - if (!i915_vtd_active(i915)) - return false; - - if (GRAPHICS_VER(i915) == 5 && IS_MOBILE(i915)) - return true; - - if (GRAPHICS_VER(i915) == 12) - return true; /* XXX DMAR fault reason 7 */ - - return false; -} - -static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915) -{ - /* - * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset - * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset - */ - GEM_BUG_ON(GRAPHICS_VER(i915) < 6); - return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M; -} - -static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) -{ - snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; - snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; - return snb_gmch_ctl << 20; -} - -static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) -{ - bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; - bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; - if (bdw_gmch_ctl) - bdw_gmch_ctl = 1 << bdw_gmch_ctl; - -#ifdef CONFIG_X86_32 - /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ - if (bdw_gmch_ctl > 4) - bdw_gmch_ctl = 4; -#endif - - return bdw_gmch_ctl << 20; -} - -static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915) -{ - return gen6_gttmmadr_size(i915) / 2; -} - -static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) -{ - struct drm_i915_private *i915 = ggtt->vm.i915; - struct pci_dev *pdev = to_pci_dev(i915->drm.dev); - phys_addr_t phys_addr; - u32 pte_flags; - int ret; - - GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915)); - phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915); - - /* - * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range - * will be dropped. For WC mappings in general we have 64 byte burst - * writes when the WC buffer is flushed, so we can't use it, but have to - * resort to an uncached mapping. The WC issue is easily caught by the - * readback check when writing GTT PTE entries. - */ - if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11) - ggtt->gsm = ioremap(phys_addr, size); - else - ggtt->gsm = ioremap_wc(phys_addr, size); - if (!ggtt->gsm) { - drm_err(&i915->drm, "Failed to map the ggtt page table\n"); - return -ENOMEM; - } - - kref_init(&ggtt->vm.resv_ref); - ret = setup_scratch_page(&ggtt->vm); - if (ret) { - drm_err(&i915->drm, "Scratch setup failed\n"); - /* iounmap will also get called at remove, but meh */ - iounmap(ggtt->gsm); - return ret; - } - - pte_flags = 0; - if (i915_gem_object_is_lmem(ggtt->vm.scratch[0])) - pte_flags |= PTE_LM; - - ggtt->vm.scratch[0]->encode = - ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]), - I915_CACHE_NONE, pte_flags); - - return 0; -} - -int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt) -{ - struct drm_i915_private *i915 = ggtt->vm.i915; - phys_addr_t gmadr_base; - int ret; - - ret = intel_gmch_probe(i915->bridge_dev, to_pci_dev(i915->drm.dev), NULL); - if (!ret) { - drm_err(&i915->drm, "failed to set up gmch\n"); - return -EIO; - } - - intel_gmch_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end); - - ggtt->gmadr = - (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end); - - ggtt->vm.alloc_pt_dma = alloc_pt_dma; - ggtt->vm.alloc_scratch_dma = alloc_pt_dma; - - if (needs_idle_maps(i915)) { - drm_notice(&i915->drm, - "Flushing DMA requests before IOMMU unmaps; performance may be degraded\n"); - ggtt->do_idle_maps = true; - } - - ggtt->vm.insert_page = gen5_ggtt_insert_page; - ggtt->vm.insert_entries = gen5_ggtt_insert_entries; - ggtt->vm.clear_range = gen5_ggtt_clear_range; - ggtt->vm.cleanup = gen5_gmch_remove; - - ggtt->invalidate = gmch_ggtt_invalidate; - - ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; - ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; - - if (unlikely(ggtt->do_idle_maps)) - drm_notice(&i915->drm, - "Applying Ironlake quirks for intel_iommu\n"); - - return 0; -} - -int intel_gt_gmch_gen6_probe(struct i915_ggtt *ggtt) -{ - struct drm_i915_private *i915 = ggtt->vm.i915; - struct pci_dev *pdev = to_pci_dev(i915->drm.dev); - unsigned int size; - u16 snb_gmch_ctl; - - ggtt->gmadr = intel_pci_resource(pdev, 2); - ggtt->mappable_end = resource_size(&ggtt->gmadr); - - /* - * 64/512MB is the current min/max we actually know of, but this is - * just a coarse sanity check. - */ - if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { - drm_err(&i915->drm, "Unknown GMADR size (%pa)\n", - &ggtt->mappable_end); - return -ENXIO; - } - - pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - - size = gen6_get_total_gtt_size(snb_gmch_ctl); - ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; - - ggtt->vm.alloc_pt_dma = alloc_pt_dma; - ggtt->vm.alloc_scratch_dma = alloc_pt_dma; - - ggtt->vm.clear_range = nop_clear_range; - if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915)) - ggtt->vm.clear_range = gen6_ggtt_clear_range; - ggtt->vm.insert_page = gen6_ggtt_insert_page; - ggtt->vm.insert_entries = gen6_ggtt_insert_entries; - ggtt->vm.cleanup = gen6_gmch_remove; - - ggtt->invalidate = gen6_ggtt_invalidate; - - if (HAS_EDRAM(i915)) - ggtt->vm.pte_encode = iris_pte_encode; - else if (IS_HASWELL(i915)) - ggtt->vm.pte_encode = hsw_pte_encode; - else if (IS_VALLEYVIEW(i915)) - ggtt->vm.pte_encode = byt_pte_encode; - else if (GRAPHICS_VER(i915) >= 7) - ggtt->vm.pte_encode = ivb_pte_encode; - else - ggtt->vm.pte_encode = snb_pte_encode; - - ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; - ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; - - return ggtt_probe_common(ggtt, size); -} - -static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) -{ - gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; - gmch_ctrl &= SNB_GMCH_GGMS_MASK; - - if (gmch_ctrl) - return 1 << (20 + gmch_ctrl); - - return 0; -} - -int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt) -{ - struct drm_i915_private *i915 = ggtt->vm.i915; - struct pci_dev *pdev = to_pci_dev(i915->drm.dev); - unsigned int size; - u16 snb_gmch_ctl; - - /* TODO: We're not aware of mappable constraints on gen8 yet */ - if (!HAS_LMEM(i915)) { - ggtt->gmadr = intel_pci_resource(pdev, 2); - ggtt->mappable_end = resource_size(&ggtt->gmadr); - } - - pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - if (IS_CHERRYVIEW(i915)) - size = chv_get_total_gtt_size(snb_gmch_ctl); - else - size = gen8_get_total_gtt_size(snb_gmch_ctl); - - ggtt->vm.alloc_pt_dma = alloc_pt_dma; - ggtt->vm.alloc_scratch_dma = alloc_pt_dma; - ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY; - - ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; - ggtt->vm.cleanup = gen6_gmch_remove; - ggtt->vm.insert_page = gen8_ggtt_insert_page; - ggtt->vm.clear_range = nop_clear_range; - if (intel_scanout_needs_vtd_wa(i915)) - ggtt->vm.clear_range = gen8_ggtt_clear_range; - - ggtt->vm.insert_entries = gen8_ggtt_insert_entries; - - /* - * Serialize GTT updates with aperture access on BXT if VT-d is on, - * and always on CHV. - */ - if (intel_vm_no_concurrent_access_wa(i915)) { - ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; - ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; - ggtt->vm.bind_async_flags = - I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; - } - - ggtt->invalidate = gen8_ggtt_invalidate; - - ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; - ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; - - ggtt->vm.pte_encode = gen8_ggtt_pte_encode; - - setup_private_pat(ggtt->vm.gt->uncore); - - return ggtt_probe_common(ggtt, size); -} - -int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915) -{ - if (GRAPHICS_VER(i915) < 6 && !intel_gmch_enable_gtt()) - return -EIO; - - return 0; -} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.h b/drivers/gpu/drm/i915/gt/intel_gt_gmch.h deleted file mode 100644 index 75ed55c1f30a..000000000000 --- a/drivers/gpu/drm/i915/gt/intel_gt_gmch.h +++ /dev/null @@ -1,46 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2022 Intel Corporation - */ - -#ifndef __INTEL_GT_GMCH_H__ -#define __INTEL_GT_GMCH_H__ - -#include "intel_gtt.h" - -/* For x86 platforms */ -#if IS_ENABLED(CONFIG_X86) -void intel_gt_gmch_gen5_chipset_flush(struct intel_gt *gt); -int intel_gt_gmch_gen6_probe(struct i915_ggtt *ggtt); -int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt); -int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt); -int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915); - -/* Stubs for non-x86 platforms */ -#else -static inline void intel_gt_gmch_gen5_chipset_flush(struct intel_gt *gt) -{ -} -static inline int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt) -{ - /* No HW should be probed for this case yet, return fail */ - return -ENODEV; -} -static inline int intel_gt_gmch_gen6_probe(struct i915_ggtt *ggtt) -{ - /* No HW should be probed for this case yet, return fail */ - return -ENODEV; -} -static inline int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt) -{ - /* No HW should be probed for this case yet, return fail */ - return -ENODEV; -} -static inline int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915) -{ - /* No HW should be enabled for this case yet, return fail */ - return -ENODEV; -} -#endif - -#endif /* __INTEL_GT_GMCH_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 128b31476938..29fd3a9e8b2e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -561,14 +561,13 @@ i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n) void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt, unsigned long lmem_pt_obj_flags); - void intel_ggtt_bind_vma(struct i915_address_space *vm, - struct i915_vm_pt_stash *stash, - struct i915_vma_resource *vma_res, - enum i915_cache_level cache_level, - u32 flags); + struct i915_vm_pt_stash *stash, + struct i915_vma_resource *vma_res, + enum i915_cache_level cache_level, + u32 flags); void intel_ggtt_unbind_vma(struct i915_address_space *vm, - struct i915_vma_resource *vma_res); + struct i915_vma_resource *vma_res); int i915_ggtt_probe_hw(struct drm_i915_private *i915); int i915_ggtt_init_hw(struct drm_i915_private *i915); @@ -651,7 +650,6 @@ release_pd_entry(struct i915_page_directory * const pd, struct i915_page_table * const pt, const struct drm_i915_gem_object * const scratch); void gen6_ggtt_invalidate(struct i915_ggtt *ggtt); -void gen8_ggtt_invalidate(struct i915_ggtt *ggtt); void ppgtt_bind_vma(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, -- cgit v1.2.3 From 373269ae6f90bbbe945abde4c0811a991a27901a Mon Sep 17 00:00:00 2001 From: Akeem G Abodunrin Date: Wed, 22 Jun 2022 15:11:04 +0100 Subject: drm/i915/selftests: Increase timeout for live_parallel_switch With GuC submission, it takes a little bit longer switching contexts among all available engines simultaneously, when running live_parallel_switch subtest. Increase the timeout. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5885 Signed-off-by: Akeem G Abodunrin Cc: Matthew Brost Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220622141104.334432-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 93a67422ca3b..c6ad67b90e8a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -212,7 +212,7 @@ static int __live_parallel_switch1(void *data) i915_request_add(rq); } - if (i915_request_wait(rq, 0, HZ / 5) < 0) + if (i915_request_wait(rq, 0, HZ) < 0) err = -ETIME; i915_request_put(rq); if (err) -- cgit v1.2.3 From 563aaf4a928def2d36d1b3de0a4b515e2477b4da Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 22 Jun 2022 16:59:19 +0100 Subject: drm/i915: tweak the ordering in cpu_write_needs_clflush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For imported dma-buf objects we leave the object as cache_coherent = 0 across all platforms, which is reasonable given that have no clue what the memory underneath is, and its not like the driver can ever manually clflush the pages anyway (like with i915_gem_clflush_object) for such objects. However on discrete we choose to treat cache_dirty = true as a programmer error, leading to a warning. The simplest fix looks to be to just change the ordering in cpu_write_needs_clflush to prevent ever setting cache_dirty for dma-buf objects on discrete. Fixes: d028a7690d87 ("drm/i915/dmabuf: Fix prime_mmap to work when using LMEM") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5266 Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Gwan-gyeong Mun Reviewed-by: Gwan-gyeong Mun Link: https://patchwork.freedesktop.org/patch/msgid/20220622155919.355081-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 3e5d6057b3ef..1674b0c5802b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -35,12 +35,12 @@ bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj) if (obj->cache_dirty) return false; - if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) - return true; - if (IS_DGFX(i915)) return false; + if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) + return true; + /* Currently in use by HW (display engine)? Keep flushed. */ return i915_gem_object_is_framebuffer(obj); } -- cgit v1.2.3 From 8524bb67145d8a2b495fabcd4b5643010ea43353 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 24 Jun 2022 14:03:27 -0700 Subject: drm/i915: Correct duplicated/misplaced GT register definitions XEHPSDV_FLAT_CCS_BASE_ADDR, GEN8_L3_LRA_1_GPGPU, and MMCD_MISC_CTRL were duplicated between i915_reg.h and intel_gt_regs.h. These are all GT registers, so we should drop the copy from i915_reg.h. XEHPSDV_TILE0_ADDR_RANGE was defined in i915_reg.h, but really belongs in intel_gt_regs.h. Move it. Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220624210328.308630-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 1 + drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 +++ drivers/gpu/drm/i915/i915_reg.h | 17 ----------------- 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index fa54823d1219..e63de9c06596 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -14,6 +14,7 @@ #include "gem/i915_gem_region.h" #include "gt/intel_gt.h" #include "gt/intel_gt_mcr.h" +#include "gt/intel_gt_regs.h" #include "gt/intel_region_lmem.h" #include "i915_drv.h" #include "i915_gem_stolen.h" diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 07ef111947b8..61815b6e87de 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -324,6 +324,9 @@ #define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4) +#define XEHPSDV_TILE0_ADDR_RANGE _MMIO(0x4900) +#define XEHPSDV_TILE_LMEM_RANGE_SHIFT 8 + #define XEHPSDV_FLAT_CCS_BASE_ADDR _MMIO(0x4910) #define XEHPSDV_CCS_BASE_SHIFT 8 diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 213f02d58fc8..3ad7a2b5f47f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8496,23 +8496,6 @@ enum skl_power_gate { #define SGGI_DIS REG_BIT(15) #define SGR_DIS REG_BIT(13) -#define XEHPSDV_TILE0_ADDR_RANGE _MMIO(0x4900) -#define XEHPSDV_TILE_LMEM_RANGE_SHIFT 8 - -#define XEHPSDV_FLAT_CCS_BASE_ADDR _MMIO(0x4910) -#define XEHPSDV_CCS_BASE_SHIFT 8 - -/* gamt regs */ -#define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4) -#define GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW 0x67F1427F /* max/min for LRA1/2 */ -#define GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV 0x5FF101FF /* max/min for LRA1/2 */ -#define GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL 0x67F1427F /* " " */ -#define GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT 0x5FF101FF /* " " */ - -#define MMCD_MISC_CTRL _MMIO(0x4ddc) /* skl+ */ -#define MMCD_PCLA (1 << 31) -#define MMCD_HOTSPOT_EN (1 << 27) - #define _ICL_PHY_MISC_A 0x64C00 #define _ICL_PHY_MISC_B 0x64C04 #define _DG2_PHY_MISC_TC1 0x64C14 /* TC1="PHY E" but offset as if "PHY F" */ -- cgit v1.2.3 From 7d8097073caa334ed6187a964645335324231e01 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 24 Jun 2022 14:03:28 -0700 Subject: drm/i915: Prefer "XEHP_" prefix for registers We've been introducing new registers with a mix of "XEHP_" (architecture) and "XEHPSDV_" (platform) prefixes. For consistency, let's settle on "XEHP_" as the preferred form. XEHPSDV_RP_STATE_CAP stays with its current name since that's truly a platform-specific register and not something that applies to the Xe_HP architecture as a whole. Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20220624210328.308630-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 8 ++++---- drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_region_lmem.c | 8 ++++---- drivers/gpu/drm/i915/i915_reg.h | 6 +++--- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index e63de9c06596..166d0a4b9e8c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -836,8 +836,8 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, } else { resource_size_t lmem_range; - lmem_range = intel_gt_mcr_read_any(&i915->gt0, XEHPSDV_TILE0_ADDR_RANGE) & 0xFFFF; - lmem_size = lmem_range >> XEHPSDV_TILE_LMEM_RANGE_SHIFT; + lmem_range = intel_gt_mcr_read_any(&i915->gt0, XEHP_TILE0_ADDR_RANGE) & 0xFFFF; + lmem_size = lmem_range >> XEHP_TILE_LMEM_RANGE_SHIFT; lmem_size *= SZ_1G; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 61815b6e87de..37c1095d8603 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -324,11 +324,11 @@ #define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4) -#define XEHPSDV_TILE0_ADDR_RANGE _MMIO(0x4900) -#define XEHPSDV_TILE_LMEM_RANGE_SHIFT 8 +#define XEHP_TILE0_ADDR_RANGE _MMIO(0x4900) +#define XEHP_TILE_LMEM_RANGE_SHIFT 8 -#define XEHPSDV_FLAT_CCS_BASE_ADDR _MMIO(0x4910) -#define XEHPSDV_CCS_BASE_SHIFT 8 +#define XEHP_FLAT_CCS_BASE_ADDR _MMIO(0x4910) +#define XEHP_CCS_BASE_SHIFT 8 #define GAMTARBMODE _MMIO(0x4a08) #define ARB_MODE_BWGTLB_DISABLE (1 << 9) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index ae8a8f725f01..73a8b46e0234 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -679,7 +679,7 @@ static ssize_t media_RP0_freq_mhz_show(struct device *dev, u32 val; int err; - err = snb_pcode_read_p(gt->uncore, XEHPSDV_PCODE_FREQUENCY_CONFIG, + err = snb_pcode_read_p(gt->uncore, XEHP_PCODE_FREQUENCY_CONFIG, PCODE_MBOX_FC_SC_READ_FUSED_P0, PCODE_MBOX_DOMAIN_MEDIAFF, &val); @@ -700,7 +700,7 @@ static ssize_t media_RPn_freq_mhz_show(struct device *dev, u32 val; int err; - err = snb_pcode_read_p(gt->uncore, XEHPSDV_PCODE_FREQUENCY_CONFIG, + err = snb_pcode_read_p(gt->uncore, XEHP_PCODE_FREQUENCY_CONFIG, PCODE_MBOX_FC_SC_READ_FUSED_PN, PCODE_MBOX_DOMAIN_MEDIAFF, &val); diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index 2ff448047020..d09b996a9759 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -105,12 +105,12 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) resource_size_t lmem_range; u64 tile_stolen, flat_ccs_base; - lmem_range = intel_gt_mcr_read_any(&i915->gt0, XEHPSDV_TILE0_ADDR_RANGE) & 0xFFFF; - lmem_size = lmem_range >> XEHPSDV_TILE_LMEM_RANGE_SHIFT; + lmem_range = intel_gt_mcr_read_any(&i915->gt0, XEHP_TILE0_ADDR_RANGE) & 0xFFFF; + lmem_size = lmem_range >> XEHP_TILE_LMEM_RANGE_SHIFT; lmem_size *= SZ_1G; - flat_ccs_base = intel_gt_mcr_read_any(gt, XEHPSDV_FLAT_CCS_BASE_ADDR); - flat_ccs_base = (flat_ccs_base >> XEHPSDV_CCS_BASE_SHIFT) * SZ_64K; + flat_ccs_base = intel_gt_mcr_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); + flat_ccs_base = (flat_ccs_base >> XEHP_CCS_BASE_SHIFT) * SZ_64K; /* FIXME: Remove this when we have small-bar enabled */ if (pci_resource_len(pdev, 2) < lmem_size) { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3ad7a2b5f47f..63f1f04b21a7 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6767,12 +6767,12 @@ #define DG1_UNCORE_GET_INIT_STATUS 0x0 #define DG1_UNCORE_INIT_STATUS_COMPLETE 0x1 #define GEN12_PCODE_READ_SAGV_BLOCK_TIME_US 0x23 -#define XEHPSDV_PCODE_FREQUENCY_CONFIG 0x6e /* xehpsdv, pvc */ -/* XEHPSDV_PCODE_FREQUENCY_CONFIG sub-commands (param1) */ +#define XEHP_PCODE_FREQUENCY_CONFIG 0x6e /* xehpsdv, pvc */ +/* XEHP_PCODE_FREQUENCY_CONFIG sub-commands (param1) */ #define PCODE_MBOX_FC_SC_READ_FUSED_P0 0x0 #define PCODE_MBOX_FC_SC_READ_FUSED_PN 0x1 /* PCODE_MBOX_DOMAIN_* - mailbox domain IDs */ -/* XEHPSDV_PCODE_FREQUENCY_CONFIG param2 */ +/* XEHP_PCODE_FREQUENCY_CONFIG param2 */ #define PCODE_MBOX_DOMAIN_NONE 0x0 #define PCODE_MBOX_DOMAIN_MEDIAFF 0x3 #define GEN6_PCODE_DATA _MMIO(0x138128) -- cgit v1.2.3 From 7307e91bfcd0e3f123aab01b30557f93923b6d73 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Wed, 15 Jun 2022 00:13:46 +0530 Subject: drm/i915: Do not access rq->engine without a reference In i915_fence_get_driver_name(), user may not hold a reference to rq->engine. Hence do not access it. Instead, store required device private pointer in 'rq->i915' and use it. Signed-off-by: Niranjana Vishwanathapura Suggested-by: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20220614184348.23746-2-ramalingam.c@intel.com --- drivers/gpu/drm/i915/i915_request.c | 3 ++- drivers/gpu/drm/i915/i915_request.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c3937640b119..667dda7668cb 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -60,7 +60,7 @@ static struct kmem_cache *slab_execute_cbs; static const char *i915_fence_get_driver_name(struct dma_fence *fence) { - return dev_name(to_request(fence)->engine->i915->drm.dev); + return dev_name(to_request(fence)->i915->drm.dev); } static const char *i915_fence_get_timeline_name(struct dma_fence *fence) @@ -937,6 +937,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->engine = ce->engine; rq->ring = ce->ring; rq->execution_mask = ce->engine->mask; + rq->i915 = ce->engine->i915; ret = intel_timeline_get_seqno(tl, rq, &seqno); if (ret) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 28b1f9db5487..47041ec68df8 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -196,6 +196,8 @@ struct i915_request { struct dma_fence fence; spinlock_t lock; + struct drm_i915_private *i915; + /** * Context and ring buffer related to this request * Contexts are refcounted, so when this request is associated with a -- cgit v1.2.3 From bcb9aa45d5a0e11ef91245330c53cde214d15e8d Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Wed, 15 Jun 2022 00:13:47 +0530 Subject: Revert "drm/i915: Hold reference to intel_context over life of i915_request" This reverts commit 1e98d8c52ed5dfbaf273c4423c636525c2ce59e7. The problem with this patch is that it makes i915_request to hold a reference to intel_context, which in turn holds a reference on the VM. This strong back referencing can lead to reference loops which leads to resource leak. An example is the upcoming VM_BIND work which requires VM to hold a reference to some shared VM specific BO. But this BO's dma-resv fences holds reference to the i915_request thus leading to reference loop. v2: Do not use reserved requests for virtual engines Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Ramalingam C Suggested-by: Matthew Brost Reviewed-by: Mathew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20220614184348.23746-3-ramalingam.c@intel.com --- drivers/gpu/drm/i915/i915_request.c | 52 +++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 667dda7668cb..62fad16a55e8 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -134,17 +134,42 @@ static void i915_fence_release(struct dma_fence *fence) i915_sw_fence_fini(&rq->semaphore); /* - * Keep one request on each engine for reserved use under mempressure, + * Keep one request on each engine for reserved use under mempressure * do not use with virtual engines as this really is only needed for * kernel contexts. + * + * We do not hold a reference to the engine here and so have to be + * very careful in what rq->engine we poke. The virtual engine is + * referenced via the rq->context and we released that ref during + * i915_request_retire(), ergo we must not dereference a virtual + * engine here. Not that we would want to, as the only consumer of + * the reserved engine->request_pool is the power management parking, + * which must-not-fail, and that is only run on the physical engines. + * + * Since the request must have been executed to be have completed, + * we know that it will have been processed by the HW and will + * not be unsubmitted again, so rq->engine and rq->execution_mask + * at this point is stable. rq->execution_mask will be a single + * bit if the last and _only_ engine it could execution on was a + * physical engine, if it's multiple bits then it started on and + * could still be on a virtual engine. Thus if the mask is not a + * power-of-two we assume that rq->engine may still be a virtual + * engine and so a dangling invalid pointer that we cannot dereference + * + * For example, consider the flow of a bonded request through a virtual + * engine. The request is created with a wide engine mask (all engines + * that we might execute on). On processing the bond, the request mask + * is reduced to one or more engines. If the request is subsequently + * bound to a single engine, it will then be constrained to only + * execute on that engine and never returned to the virtual engine + * after timeslicing away, see __unwind_incomplete_requests(). Thus we + * know that if the rq->execution_mask is a single bit, rq->engine + * can be a physical engine with the exact corresponding mask. */ if (!intel_engine_is_virtual(rq->engine) && - !cmpxchg(&rq->engine->request_pool, NULL, rq)) { - intel_context_put(rq->context); + is_power_of_2(rq->execution_mask) && + !cmpxchg(&rq->engine->request_pool, NULL, rq)) return; - } - - intel_context_put(rq->context); kmem_cache_free(slab_requests, rq); } @@ -921,19 +946,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) } } - /* - * Hold a reference to the intel_context over life of an i915_request. - * Without this an i915_request can exist after the context has been - * destroyed (e.g. request retired, context closed, but user space holds - * a reference to the request from an out fence). In the case of GuC - * submission + virtual engine, the engine that the request references - * is also destroyed which can trigger bad pointer dref in fence ops - * (e.g. i915_fence_get_driver_name). We could likely change these - * functions to avoid touching the engine but let's just be safe and - * hold the intel_context reference. In execlist mode the request always - * eventually points to a physical engine so this isn't an issue. - */ - rq->context = intel_context_get(ce); + rq->context = ce; rq->engine = ce->engine; rq->ring = ce->ring; rq->execution_mask = ce->engine->mask; @@ -1009,7 +1022,6 @@ err_unwind: GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); err_free: - intel_context_put(ce); kmem_cache_free(slab_requests, rq); err_unreserve: intel_context_unpin(ce); -- cgit v1.2.3 From 59bcdb564b3bac3e86cc274e5dec05d4647ce47f Mon Sep 17 00:00:00 2001 From: Alan Previn Date: Wed, 22 Jun 2022 19:31:57 -0700 Subject: drm/i915/guc: Don't update engine busyness stats too frequently Using two different types of workoads, it was observed that guc_update_engine_gt_clks was being called too frequently and/or causing a CPU-to-lmem bandwidth hit over PCIE. Details on the workloads and numbers are in the notes below. Background: At the moment, guc_update_engine_gt_clks can be invoked via one of 3 ways. #1 and #2 are infrequent under normal operating conditions: 1.When a predefined "ping_delay" timer expires so that GuC- busyness can sample the GTPM clock counter to ensure it doesn't miss a wrap-around of the 32-bits of the HW counter. (The ping_delay is calculated based on 1/8th the time taken for the counter go from 0x0 to 0xffffffff based on the GT frequency. This comes to about once every 28 seconds at a GT frequency of 19.2Mhz). 2.In preparation for a gt reset. 3.In response to __gt_park events (as the gt power management puts the gt into a lower power state when there is no work being done). Root-cause: For both the workloads described farther below, it was observed that when user space calls IOCTLs that unparks the gt momentarily and repeats such calls many times in quick succession, it triggers calling guc_update_engine_gt_clks as many times. However, the primary purpose of guc_update_engine_gt_clks is to ensure we don't miss the wraparound while the counter is ticking. Thus, the solution is to ensure we skip that check if gt_park is calling this function earlier than necessary. Solution: Snapshot jiffies when we do actually update the busyness stats. Then get the new jiffies every time intel_guc_busyness_park is called and bail if we are being called too soon. Use half of the ping_delay as a safe threshold. NOTE1: Workload1: IGTs' gem_create was modified to create a file handle, allocate memory with sizes that range from a min of 4K to the max supported (in power of two step-sizes). Its maps, modifies and reads back the memory. Allocations and modification is repeated until total memory allocation reaches the max. Then the file handle is closed. With this workload, guc_update_engine_gt_clks was called over 188 thousand times in the span of 15 seconds while this test ran three times. With this patch, the number of calls reduced to 14. NOTE2: Workload2: 30 transcode sessions are created in quick succession. While these sessions are created, pcm-iio tool was used to measure I/O read operation bandwidth consumption sampled at 100 milisecond intervals over the course of 20 seconds. The total bandwidth consumed over 20 seconds without this patch was measured at average at 311KBps per sample. With this patch, the number went down to about 175Kbps which is about a 43% savings. Signed-off-by: Alan Previn Reviewed-by: Umesh Nerlige Ramappa Acked-by: Tvrtko Ursulin Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220623023157.211650-2-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 8 ++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 13 +++++++++++++ 2 files changed, 21 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 966e69a8b1c1..d0d99f178f2d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -230,6 +230,14 @@ struct intel_guc { * @shift: Right shift value for the gpm timestamp */ u32 shift; + + /** + * @last_stat_jiffies: jiffies at last actual stats collection time + * We use this timestamp to ensure we don't oversample the + * stats because runtime power management events can trigger + * stats collection at much higher rates than required. + */ + unsigned long last_stat_jiffies; } timestamp; #ifdef CONFIG_DRM_I915_SELFTEST diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index e62ea35513ea..c9f167b80910 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1314,6 +1314,8 @@ static void __update_guc_busyness_stats(struct intel_guc *guc) unsigned long flags; ktime_t unused; + guc->timestamp.last_stat_jiffies = jiffies; + spin_lock_irqsave(&guc->timestamp.lock, flags); guc_update_pm_timestamp(guc, &unused); @@ -1386,6 +1388,17 @@ void intel_guc_busyness_park(struct intel_gt *gt) return; cancel_delayed_work(&guc->timestamp.work); + + /* + * Before parking, we should sample engine busyness stats if we need to. + * We can skip it if we are less than half a ping from the last time we + * sampled the busyness stats. + */ + if (guc->timestamp.last_stat_jiffies && + !time_after(jiffies, guc->timestamp.last_stat_jiffies + + (guc->timestamp.ping_delay / 2))) + return; + __update_guc_busyness_stats(guc); } -- cgit v1.2.3 From 0667429ce68e0b08f9f1fec8fd0b1f57228f605e Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Tue, 21 Jun 2022 12:21:05 -0700 Subject: drm/i915/reset: Add additional steps for Wa_22011802037 for execlist backend For execlists backend, current implementation of Wa_22011802037 is to stop the CS before doing a reset of the engine. This WA was further extended to wait for any pending MI FORCE WAKEUPs before issuing a reset. Add the extended steps in the execlist path of reset. In addition, extend the WA to gen11. v2: (Tvrtko) - Clarify comments, commit message, fix typos - Use IS_GRAPHICS_VER for gen 11/12 checks v3: (Daneile) - Drop changes to intel_ring_submission since WA does not apply to it - Log an error if MSG IDLE is not defined for an engine Signed-off-by: Umesh Nerlige Ramappa Fixes: f6aa0d713c88 ("drm/i915: Add Wa_22011802037 force cs halt") Acked-by: Tvrtko Ursulin Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220621192105.2100585-1-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/gt/intel_engine.h | 2 + drivers/gpu/drm/i915/gt/intel_engine_cs.c | 88 +++++++++++++++++++++- .../gpu/drm/i915/gt/intel_execlists_submission.c | 7 ++ drivers/gpu/drm/i915/gt/uc/intel_guc.c | 4 +- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 81 ++------------------ 5 files changed, 103 insertions(+), 79 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 1431f1e9dbee..04e435bce79b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -201,6 +201,8 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine); int intel_engine_stop_cs(struct intel_engine_cs *engine); void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine); +void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine); + void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask); u64 intel_engine_get_active_head(const struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 136cc44c3deb..283870c65991 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1376,10 +1376,10 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine, intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING)); /* - * Wa_22011802037 : gen12, Prior to doing a reset, ensure CS is + * Wa_22011802037 : gen11, gen12, Prior to doing a reset, ensure CS is * stopped, set ring stop bit and prefetch disable bit to halt CS */ - if (GRAPHICS_VER(engine->i915) == 12) + if (IS_GRAPHICS_VER(engine->i915, 11, 12)) intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base), _MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE)); @@ -1402,6 +1402,18 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine) return -ENODEV; ENGINE_TRACE(engine, "\n"); + /* + * TODO: Find out why occasionally stopping the CS times out. Seen + * especially with gem_eio tests. + * + * Occasionally trying to stop the cs times out, but does not adversely + * affect functionality. The timeout is set as a config parameter that + * defaults to 100ms. In most cases the follow up operation is to wait + * for pending MI_FORCE_WAKES. The assumption is that this timeout is + * sufficient for any pending MI_FORCEWAKEs to complete. Once root + * caused, the caller must check and handle the return from this + * function. + */ if (__intel_engine_stop_cs(engine, 1000, stop_timeout(engine))) { ENGINE_TRACE(engine, "timed out on STOP_RING -> IDLE; HEAD:%04x, TAIL:%04x\n", @@ -1428,6 +1440,78 @@ void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine) ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); } +static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine) +{ + static const i915_reg_t _reg[I915_NUM_ENGINES] = { + [RCS0] = MSG_IDLE_CS, + [BCS0] = MSG_IDLE_BCS, + [VCS0] = MSG_IDLE_VCS0, + [VCS1] = MSG_IDLE_VCS1, + [VCS2] = MSG_IDLE_VCS2, + [VCS3] = MSG_IDLE_VCS3, + [VCS4] = MSG_IDLE_VCS4, + [VCS5] = MSG_IDLE_VCS5, + [VCS6] = MSG_IDLE_VCS6, + [VCS7] = MSG_IDLE_VCS7, + [VECS0] = MSG_IDLE_VECS0, + [VECS1] = MSG_IDLE_VECS1, + [VECS2] = MSG_IDLE_VECS2, + [VECS3] = MSG_IDLE_VECS3, + [CCS0] = MSG_IDLE_CS, + [CCS1] = MSG_IDLE_CS, + [CCS2] = MSG_IDLE_CS, + [CCS3] = MSG_IDLE_CS, + }; + u32 val; + + if (!_reg[engine->id].reg) { + drm_err(&engine->i915->drm, + "MSG IDLE undefined for engine id %u\n", engine->id); + return 0; + } + + val = intel_uncore_read(engine->uncore, _reg[engine->id]); + + /* bits[29:25] & bits[13:9] >> shift */ + return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT; +} + +static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask) +{ + int ret; + + /* Ensure GPM receives fw up/down after CS is stopped */ + udelay(1); + + /* Wait for forcewake request to complete in GPM */ + ret = __intel_wait_for_register_fw(gt->uncore, + GEN9_PWRGT_DOMAIN_STATUS, + fw_mask, fw_mask, 5000, 0, NULL); + + /* Ensure CS receives fw ack from GPM */ + udelay(1); + + if (ret) + GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret); +} + +/* + * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any + * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The + * pending status is indicated by bits[13:9] (masked by bits[29:25]) in the + * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we + * are concerned only with the gt reset here, we use a logical OR of pending + * forcewakeups from all reset domains and then wait for them to complete by + * querying PWRGT_DOMAIN_STATUS. + */ +void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine) +{ + u32 fw_pending = __cs_pending_mi_force_wakes(engine); + + if (fw_pending) + __gpm_wait_for_fw_complete(engine->gt, fw_pending); +} + /* NB: please notice the memset */ void intel_engine_get_instdone(const struct intel_engine_cs *engine, struct intel_instdone *instdone) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 98f1c7258cc0..aab240ea1d25 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2968,6 +2968,13 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) ring_set_paused(engine, 1); intel_engine_stop_cs(engine); + /* + * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need + * to wait for any pending mi force wakeups + */ + if (IS_GRAPHICS_VER(engine->i915, 11, 12)) + intel_engine_wait_for_pending_mi_fw(engine); + engine->execlists.reset_ccid = active_ccid(engine); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 35887cb53201..2706a8c65090 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -310,8 +310,8 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) if (IS_DG2(gt->i915)) flags |= GUC_WA_DUAL_QUEUE; - /* Wa_22011802037: graphics version 12 */ - if (GRAPHICS_VER(gt->i915) == 12) + /* Wa_22011802037: graphics version 11/12 */ + if (IS_GRAPHICS_VER(gt->i915, 11, 12)) flags |= GUC_WA_PRE_PARSER; /* Wa_16011777198:dg2 */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index c9f167b80910..40f726c61e95 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1540,87 +1540,18 @@ static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) lrc_update_regs(ce, engine, head); } -static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine) -{ - static const i915_reg_t _reg[I915_NUM_ENGINES] = { - [RCS0] = MSG_IDLE_CS, - [BCS0] = MSG_IDLE_BCS, - [VCS0] = MSG_IDLE_VCS0, - [VCS1] = MSG_IDLE_VCS1, - [VCS2] = MSG_IDLE_VCS2, - [VCS3] = MSG_IDLE_VCS3, - [VCS4] = MSG_IDLE_VCS4, - [VCS5] = MSG_IDLE_VCS5, - [VCS6] = MSG_IDLE_VCS6, - [VCS7] = MSG_IDLE_VCS7, - [VECS0] = MSG_IDLE_VECS0, - [VECS1] = MSG_IDLE_VECS1, - [VECS2] = MSG_IDLE_VECS2, - [VECS3] = MSG_IDLE_VECS3, - [CCS0] = MSG_IDLE_CS, - [CCS1] = MSG_IDLE_CS, - [CCS2] = MSG_IDLE_CS, - [CCS3] = MSG_IDLE_CS, - }; - u32 val; - - if (!_reg[engine->id].reg) - return 0; - - val = intel_uncore_read(engine->uncore, _reg[engine->id]); - - /* bits[29:25] & bits[13:9] >> shift */ - return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT; -} - -static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask) -{ - int ret; - - /* Ensure GPM receives fw up/down after CS is stopped */ - udelay(1); - - /* Wait for forcewake request to complete in GPM */ - ret = __intel_wait_for_register_fw(gt->uncore, - GEN9_PWRGT_DOMAIN_STATUS, - fw_mask, fw_mask, 5000, 0, NULL); - - /* Ensure CS receives fw ack from GPM */ - udelay(1); - - if (ret) - GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret); -} - -/* - * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any - * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The - * pending status is indicated by bits[13:9] (masked by bits[ 29:25]) in the - * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we - * are concerned only with the gt reset here, we use a logical OR of pending - * forcewakeups from all reset domains and then wait for them to complete by - * querying PWRGT_DOMAIN_STATUS. - */ static void guc_engine_reset_prepare(struct intel_engine_cs *engine) { - u32 fw_pending; - - if (GRAPHICS_VER(engine->i915) != 12) + if (!IS_GRAPHICS_VER(engine->i915, 11, 12)) return; - /* - * Wa_22011802037 - * TODO: Occasionally trying to stop the cs times out, but does not - * adversely affect functionality. The timeout is set as a config - * parameter that defaults to 100ms. Assuming that this timeout is - * sufficient for any pending MI_FORCEWAKEs to complete, ignore the - * timeout returned here until it is root caused. - */ intel_engine_stop_cs(engine); - fw_pending = __cs_pending_mi_force_wakes(engine); - if (fw_pending) - __gpm_wait_for_fw_complete(engine->gt, fw_pending); + /* + * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need + * to wait for any pending mi force wakeups + */ + intel_engine_wait_for_pending_mi_fw(engine); } static void guc_reset_nop(struct intel_engine_cs *engine) -- cgit v1.2.3 From 58eaa6b3fb636072a4f19e6b6c76bbf564e95b95 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Wed, 22 Jun 2022 17:32:25 -0700 Subject: drm/i915/guc/slpc: Use non-blocking H2G for waitboost SLPC min/max frequency updates require H2G calls. We are seeing timeouts when GuC channel is backed up and it is unable to respond in a timely fashion causing warnings and affecting CI. This is seen when waitboosting happens during a stress test. this patch updates the waitboost path to use a non-blocking H2G call instead, which returns as soon as the message is successfully transmitted. v2: Use drm_notice to report any errors that might occur while sending the waitboost H2G request (Tvrtko) v3: Add drm_notice inside force_min_freq (Ashutosh) Cc: Ashutosh Dixit Signed-off-by: Vinay Belgaumkar Reviewed-by: Ashutosh Dixit Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220623003225.23301-1-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 42 ++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 2df31af70d63..ec9c4ca0f615 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc) return data->header.global_state; } +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value) +{ + u32 request[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), + id, + value, + }; + int ret; + + ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0); + + return ret > 0 ? -EPROTO : ret; +} + +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + + GEM_BUG_ON(id >= SLPC_MAX_PARAM); + + return guc_action_slpc_set_param_nb(guc, id, value); +} + static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) { u32 request[] = { @@ -208,12 +232,14 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) */ with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - ret = slpc_set_param(slpc, - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, - freq); + /* Non-blocking request will avoid stalls */ + ret = slpc_set_param_nb(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + freq); if (ret) - i915_probe_error(i915, "Unable to force min freq to %u: %d", - freq, ret); + drm_notice(&i915->drm, + "Failed to send set_param for min freq(%d): (%d)\n", + freq, ret); } return ret; @@ -222,6 +248,7 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) static void slpc_boost_work(struct work_struct *work) { struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work); + int err; /* * Raise min freq to boost. It's possible that @@ -231,8 +258,9 @@ static void slpc_boost_work(struct work_struct *work) */ mutex_lock(&slpc->lock); if (atomic_read(&slpc->num_waiters)) { - slpc_force_min_freq(slpc, slpc->boost_freq); - slpc->num_boosts++; + err = slpc_force_min_freq(slpc, slpc->boost_freq); + if (!err) + slpc->num_boosts++; } mutex_unlock(&slpc->lock); } -- cgit v1.2.3 From a06968563775181690125091f470a8655742dcbf Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Fri, 24 Jun 2022 13:08:21 +0200 Subject: drm/i915: Fix a lockdep warning at error capture For some platfroms we use stop_machine version of gen8_ggtt_insert_page/gen8_ggtt_insert_entries to avoid a concurrent GGTT access bug but this causes a circular locking dependency warning: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&ggtt->error_mutex); lock(dma_fence_map); lock(&ggtt->error_mutex); lock(cpu_hotplug_lock); Fix this by calling gen8_ggtt_insert_page/gen8_ggtt_insert_entries directly at error capture which is concurrent GGTT access safe because reset path make sure of that. v2: Fix rebase conflict and added a comment. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5595 Reviewed-by: Gwan-gyeong Mun Suggested-by: Chris Wilson Signed-off-by: Nirmoy Das Reviewed-by: Andrzej Hajda Signed-off-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20220624110821.29190-1-nirmoy.das@intel.com --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 10 ++++++++++ drivers/gpu/drm/i915/gt/intel_gtt.h | 9 +++++++++ drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 5 ++++- drivers/gpu/drm/i915/i915_gpu_error.c | 8 ++++++-- 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index ffff96180313..15a915bb4088 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -960,6 +960,16 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) if (intel_vm_no_concurrent_access_wa(i915)) { ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; + + /* + * Calling stop_machine() version of GGTT update function + * at error capture/reset path will raise lockdep warning. + * Allow calling gen8_ggtt_insert_* directly at reset path + * which is safe from parallel GGTT updates. + */ + ggtt->vm.raw_insert_page = gen8_ggtt_insert_page; + ggtt->vm.raw_insert_entries = gen8_ggtt_insert_entries; + ggtt->vm.bind_async_flags = I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; } diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 29fd3a9e8b2e..e639434e97fd 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -306,6 +306,15 @@ struct i915_address_space { struct i915_vma_resource *vma_res, enum i915_cache_level cache_level, u32 flags); + void (*raw_insert_page)(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level cache_level, + u32 flags); + void (*raw_insert_entries)(struct i915_address_space *vm, + struct i915_vma_resource *vma_res, + enum i915_cache_level cache_level, + u32 flags); void (*cleanup)(struct i915_address_space *vm); void (*foreach)(struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index d2c5c9367cc4..c06e83872c34 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -493,7 +493,10 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw) if (i915_gem_object_is_lmem(obj)) pte_flags |= PTE_LM; - ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, pte_flags); + if (ggtt->vm.raw_insert_entries) + ggtt->vm.raw_insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, pte_flags); + else + ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, pte_flags); } static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index bff8a111424a..f9b1969ed7ed 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1104,8 +1104,12 @@ i915_vma_coredump_create(const struct intel_gt *gt, for_each_sgt_daddr(dma, iter, vma_res->bi.pages) { mutex_lock(&ggtt->error_mutex); - ggtt->vm.insert_page(&ggtt->vm, dma, slot, - I915_CACHE_NONE, 0); + if (ggtt->vm.raw_insert_page) + ggtt->vm.raw_insert_page(&ggtt->vm, dma, slot, + I915_CACHE_NONE, 0); + else + ggtt->vm.insert_page(&ggtt->vm, dma, slot, + I915_CACHE_NONE, 0); mb(); s = io_mapping_map_wc(&ggtt->iomap, slot, PAGE_SIZE); -- cgit v1.2.3