From 83842357c48ba9270bdf973fd21c8c1a2a4af72b Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Fri, 10 Mar 2023 11:10:24 +0100 Subject: drm/i915/gt: Update engine_init_common documentation Change the function doc to reflect updated name. v2: un-kerneldoc the comment(Matt). :s/engines_init_common/engine_init_common(Andi) Cc: Andi Shyti Cc: Matt Roper Signed-off-by: Nirmoy Das Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230310101024.4700-1-nirmoy.das@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index ad3413242100..5c6c9a6d469c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1428,8 +1428,8 @@ create_kernel_context(struct intel_engine_cs *engine) &kernel, "kernel_context"); } -/** - * intel_engines_init_common - initialize cengine state which might require hw access +/* + * engine_init_common - initialize engine state which might require hw access * @engine: Engine to initialize. * * Initializes @engine@ structure members shared between legacy and execlists -- cgit v1.2.3 From 1de178421f1a95de408610c89655ec1d4edb9d29 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Wed, 8 Mar 2023 14:36:24 +0100 Subject: drm/i915/gt: prevent forcewake releases during BAR resize Tests on DG2 machines show that releasing forcewakes during BAR resize results later in forcewake ack timeouts. Since forcewakes can be realeased asynchronously the simplest way to prevent it is to get all forcewakes for duration of BAR resizing. v2: hold rpm as well during resizing (Rodrigo) Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/6530 Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/7853 Reviewed-by: Rodrigo Vivi Acked-by: Nirmoy Das Reviewed-by: Andi Shyti Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20230308133624.2131582-1-andrzej.hajda@intel.com --- drivers/gpu/drm/i915/gt/intel_region_lmem.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index f3ad93db0b21..a9a6230982f6 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -54,6 +54,7 @@ static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t struct resource *root_res; resource_size_t rebar_size; resource_size_t current_size; + intel_wakeref_t wakeref; u32 pci_cmd; int i; @@ -102,15 +103,25 @@ static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t return; } - /* First disable PCI memory decoding references */ - pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); - pci_write_config_dword(pdev, PCI_COMMAND, - pci_cmd & ~PCI_COMMAND_MEMORY); + /* + * Releasing forcewake during BAR resizing results in later forcewake + * ack timeouts and former can happen any time - it is asynchronous. + * Grabbing all forcewakes prevents it. + */ + with_intel_runtime_pm(i915->uncore.rpm, wakeref) { + intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - _resize_bar(i915, GEN12_LMEM_BAR, rebar_size); + /* First disable PCI memory decoding references */ + pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); + pci_write_config_dword(pdev, PCI_COMMAND, + pci_cmd & ~PCI_COMMAND_MEMORY); - pci_assign_unassigned_bus_resources(pdev->bus); - pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); + _resize_bar(i915, GEN12_LMEM_BAR, rebar_size); + + pci_assign_unassigned_bus_resources(pdev->bus); + pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); + intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); + } } #else static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t lmem_size) {} -- cgit v1.2.3 From b288d740f885648680a8f1bcfbb1039d16be3f13 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 10 Mar 2023 10:23:49 +0100 Subject: drm/i915/gt: introduce vm->scratch_range callback The callback will be responsible for setting scratch page PTEs for specified range. In contrast to clear_range it cannot be optimized to nop. It will be used by code adding guard pages. Reviewed-by: Andi Shyti Reviewed-by: Nirmoy Das Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20230308-guard_error_capture-v6-1-1b5f31422563@intel.com --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 23 +++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c | 1 + drivers/gpu/drm/i915/gt/intel_gtt.h | 2 ++ 3 files changed, 26 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 0c7fe360f873..8a45624203b8 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -290,6 +290,27 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, ggtt->invalidate(ggtt); } +static void gen8_ggtt_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + unsigned int first_entry = start / I915_GTT_PAGE_SIZE; + unsigned int num_entries = length / I915_GTT_PAGE_SIZE; + const gen8_pte_t scratch_pte = vm->scratch[0]->encode; + gen8_pte_t __iomem *gtt_base = + (gen8_pte_t __iomem *)ggtt->gsm + first_entry; + const int max_entries = ggtt_total_entries(ggtt) - first_entry; + int i; + + if (WARN(num_entries > max_entries, + "First entry = %d; Num entries = %d (max=%d)\n", + first_entry, num_entries, max_entries)) + num_entries = max_entries; + + for (i = 0; i < num_entries; i++) + gen8_set_pte(>t_base[i], scratch_pte); +} + static void gen6_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, @@ -918,6 +939,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.cleanup = gen6_gmch_remove; ggtt->vm.insert_page = gen8_ggtt_insert_page; ggtt->vm.clear_range = nop_clear_range; + ggtt->vm.scratch_range = gen8_ggtt_clear_range; ggtt->vm.insert_entries = gen8_ggtt_insert_entries; @@ -1081,6 +1103,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.clear_range = nop_clear_range; if (!HAS_FULL_PPGTT(i915)) ggtt->vm.clear_range = gen6_ggtt_clear_range; + ggtt->vm.scratch_range = gen6_ggtt_clear_range; ggtt->vm.insert_page = gen6_ggtt_insert_page; ggtt->vm.insert_entries = gen6_ggtt_insert_entries; ggtt->vm.cleanup = gen6_gmch_remove; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c index 0e3630103693..e978840e5f49 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c @@ -103,6 +103,7 @@ int intel_ggtt_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.insert_page = gmch_ggtt_insert_page; ggtt->vm.insert_entries = gmch_ggtt_insert_entries; ggtt->vm.clear_range = gmch_ggtt_clear_range; + ggtt->vm.scratch_range = gmch_ggtt_clear_range; ggtt->vm.cleanup = gmch_ggtt_remove; ggtt->invalidate = gmch_ggtt_invalidate; diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 5a775310d3fc..69ce55f517f5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -298,6 +298,8 @@ struct i915_address_space { u64 start, u64 length); void (*clear_range)(struct i915_address_space *vm, u64 start, u64 length); + void (*scratch_range)(struct i915_address_space *vm, + u64 start, u64 length); void (*insert_page)(struct i915_address_space *vm, dma_addr_t addr, u64 offset, -- cgit v1.2.3 From 72f6107d2f2294f76d9fb086acd0b01690ea5021 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 10 Mar 2023 10:23:50 +0100 Subject: drm/i915: add guard page to ggtt->error_capture Write-combining memory allows speculative reads by CPU. ggtt->error_capture is WC mapped to CPU, so CPU/MMU can try to prefetch memory beyond the error_capture, ie it tries to read memory pointed by next PTE in GGTT. If this PTE points to invalid address DMAR errors will occur. This behaviour was observed on ADL and RPL platforms. To avoid it, guard scratch page should be added after error_capture. The patch fixes the most annoying issue with error capture but since WC reads are used also in other places there is a risk similar problem can affect them as well. v2: - modified commit message (I hope the diagnosis is correct), - added bug checks to ensure scratch is initialized on gen3 platforms. CI produces strange stacktrace for it suggesting scratch[0] is NULL, to be removed after resolving the issue with gen3 platforms. v3: - removed bug checks, replaced with gen check. v4: - change code for scratch page insertion to support all platforms, - add info in commit message there could be more similar issues v5: - check for nop_clear_range instead of gen8 (Tvrtko), - re-insert scratch pages on resume (Tvrtko) v6: - use scratch_range callback to set scratch pages (Chris) Reviewed-by: Andi Shyti Acked-by: Nirmoy Das Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20230308-guard_error_capture-v6-2-1b5f31422563@intel.com --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 8a45624203b8..c1dfb219075a 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -571,8 +571,12 @@ static int init_ggtt(struct i915_ggtt *ggtt) * paths, and we trust that 0 will remain reserved. However, * the only likely reason for failure to insert is a driver * bug, which we expect to cause other failures... + * + * Since CPU can perform speculative reads on error capture + * (write-combining allows it) add scratch page after error + * capture to avoid DMAR errors. */ - ggtt->error_capture.size = I915_GTT_PAGE_SIZE; + ggtt->error_capture.size = 2 * I915_GTT_PAGE_SIZE; ggtt->error_capture.color = I915_COLOR_UNEVICTABLE; if (drm_mm_reserve_node(&ggtt->vm.mm, &ggtt->error_capture)) drm_mm_insert_node_in_range(&ggtt->vm.mm, @@ -582,11 +586,15 @@ static int init_ggtt(struct i915_ggtt *ggtt) 0, ggtt->mappable_end, DRM_MM_INSERT_LOW); } - if (drm_mm_node_allocated(&ggtt->error_capture)) + if (drm_mm_node_allocated(&ggtt->error_capture)) { + u64 start = ggtt->error_capture.start; + u64 size = ggtt->error_capture.size; + + ggtt->vm.scratch_range(&ggtt->vm, start, size); drm_dbg(&ggtt->vm.i915->drm, "Reserved GGTT:[%llx, %llx] for use by error capture\n", - ggtt->error_capture.start, - ggtt->error_capture.start + ggtt->error_capture.size); + start, start + size); + } /* * The upper portion of the GuC address space has a sizeable hole @@ -1279,6 +1287,10 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt) flush = i915_ggtt_resume_vm(&ggtt->vm); + if (drm_mm_node_allocated(&ggtt->error_capture)) + ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start, + ggtt->error_capture.size); + ggtt->invalidate(ggtt); if (flush) -- cgit v1.2.3 From ae1da08fb306caa8cc134b81ea68c537cfe7a451 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 16 Mar 2023 14:27:28 +0000 Subject: drm/i915: Simplify vcs/bsd engine selection No need to look at the mask of present engines when we already have a count stored ever since e2d0ff3525b9 ("drm/i915: Count engine instances per uabi class"). Signed-off-by: Tvrtko Ursulin Cc: Jonathan Cavitt Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20230316142728.1335239-1-tvrtko.ursulin@linux.intel.com [tursulin: fixup typo in patch title] --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 9dce2957b4e5..3aeede6aee4d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2449,11 +2449,6 @@ static int eb_submit(struct i915_execbuffer *eb) return err; } -static int num_vcs_engines(struct drm_i915_private *i915) -{ - return hweight_long(VDBOX_MASK(to_gt(i915))); -} - /* * Find one BSD ring to dispatch the corresponding BSD command. * The engine index is returned. @@ -2467,7 +2462,7 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, /* Check whether the file_priv has already selected one ring. */ if ((int)file_priv->bsd_engine < 0) file_priv->bsd_engine = - get_random_u32_below(num_vcs_engines(dev_priv)); + get_random_u32_below(dev_priv->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO]); return file_priv->bsd_engine; } @@ -2655,7 +2650,8 @@ eb_select_legacy_ring(struct i915_execbuffer *eb) return -1; } - if (user_ring_id == I915_EXEC_BSD && num_vcs_engines(i915) > 1) { + if (user_ring_id == I915_EXEC_BSD && + i915->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO] > 1) { unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; if (bsd_idx == I915_EXEC_BSD_DEFAULT) { -- cgit v1.2.3 From c4252a11131c7f27a158294241466e2a4e7ff94e Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 14 Mar 2023 16:19:20 +0100 Subject: drm/i915/gt: perform uc late init after probe error injection Probe pseudo errors should be injected only in places where real errors can be encountered, otherwise unwinding code can be broken. Placing intel_uc_init_late before i915_inject_probe_error violated this rule, resulting in following bug: __intel_gt_disable:655 GEM_BUG_ON(intel_gt_pm_is_awake(gt)) Fixes: 481d458caede ("drm/i915/guc: Add golden context to GuC ADS") Acked-by: Nirmoy Das Reviewed-by: Andi Shyti Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20230314151920.1065847-1-andrzej.hajda@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 89ccb95e146c..7d6428a37120 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -737,12 +737,12 @@ int intel_gt_init(struct intel_gt *gt) if (err) goto err_gt; - intel_uc_init_late(>->uc); - err = i915_inject_probe_error(gt->i915, -EIO); if (err) goto err_gt; + intel_uc_init_late(>->uc); + intel_migrate_init(>->migrate, gt); goto out_fw; -- cgit v1.2.3 From 91f4228960adb6583a33af310912163469f49da7 Mon Sep 17 00:00:00 2001 From: Fei Yang Date: Wed, 15 Mar 2023 11:08:00 -0700 Subject: drm/i915/selftests: keep same cache settings as timeline On MTL, objects allocated through i915_gem_object_create_internal() are mapped as uncached in GPU by default because HAS_LLC is false. However in the live_hwsp_read selftest these watcher objects are mapped as WB on CPU side. The conseqence is that the updates done by the GPU are not immediately visible to CPU, thus the selftest is randomly failing due to the stale data in CPU cache. Solution can be either setting WC for CPU + UC for GPU, or WB for CPU + 1-way coherent WB for GPU. To keep the consistency, let's simply inherit the same cache settings from the timeline, which is WB for CPU + 1-way coherent WB for GPU, because this test is supposed to emulate the behavior of the timeline anyway. Signed-off-by: Fei Yang Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20230315180800.2632766-1-fei.yang@intel.com --- drivers/gpu/drm/i915/gt/selftest_timeline.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 522d0190509c..9f536c251179 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -825,7 +825,8 @@ static bool cmp_gte(u32 a, u32 b) return a >= b; } -static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt) +static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt, + struct intel_timeline *tl) { struct drm_i915_gem_object *obj; struct i915_vma *vma; @@ -834,7 +835,10 @@ static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt) if (IS_ERR(obj)) return PTR_ERR(obj); - w->map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); + /* keep the same cache settings as timeline */ + i915_gem_object_set_cache_coherency(obj, tl->hwsp_ggtt->obj->cache_level); + w->map = i915_gem_object_pin_map_unlocked(obj, + page_unmask_bits(tl->hwsp_ggtt->obj->mm.mapping)); if (IS_ERR(w->map)) { i915_gem_object_put(obj); return PTR_ERR(w->map); @@ -1004,8 +1008,10 @@ static int live_hwsp_read(void *arg) if (!tl->has_initial_breadcrumb) goto out_free; + selftest_tl_pin(tl); + for (i = 0; i < ARRAY_SIZE(watcher); i++) { - err = setup_watcher(&watcher[i], gt); + err = setup_watcher(&watcher[i], gt, tl); if (err) goto out; } @@ -1160,6 +1166,8 @@ out: for (i = 0; i < ARRAY_SIZE(watcher); i++) cleanup_watcher(&watcher[i]); + intel_timeline_unpin(tl); + if (igt_flush_test(gt->i915)) err = -EIO; -- cgit v1.2.3 From 5e008ba67cb80084e99b40ccd46f9029ae421632 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Tue, 14 Mar 2023 19:29:06 -0700 Subject: drm/i915: Fix format for perf_limit_reasons Use hex format so that it is easier to decode. Fixes: fe5979665f64 ("drm/i915/debugfs: Add perf_limit_reasons in debugfs") Signed-off-by: Vinay Belgaumkar Reviewed-by: Ashutosh Dixit Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20230315022906.2467408-1-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 83df4cd5e06c..80dbbef86b1d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -580,7 +580,7 @@ static bool perf_limit_reasons_eval(void *data) } DEFINE_SIMPLE_ATTRIBUTE(perf_limit_reasons_fops, perf_limit_reasons_get, - perf_limit_reasons_clear, "%llu\n"); + perf_limit_reasons_clear, "0x%llx\n"); void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root) { -- cgit v1.2.3 From 44df42e66139b5fac8db49ee354be279210f9816 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 15 Mar 2023 17:48:00 -0700 Subject: drm/i915/pmu: Use functions common with sysfs to read actual freq Expose intel_rps_read_actual_frequency_fw to read the actual freq without taking forcewake for use by PMU. The code is refactored to use a common set of functions across sysfs and PMU. Using common functions with sysfs in PMU solves the issues of missing support for MTL and missing support for older generations (prior to Gen6). It also future proofs the PMU where sometimes code has been updated for sysfs and PMU has been missed. v2: Remove runtime_pm_if_in_use from read_actual_frequency_fw (Tvrtko) v3: (Tvrtko) - Remove goto in __read_cagf - Unexport intel_rps_get_cagf and intel_rps_read_punit_req Fixes: 22009b6dad66 ("drm/i915/mtl: Modify CAGF functions for MTL") Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8280 Signed-off-by: Ashutosh Dixit Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20230316004800.2539753-1-ashutosh.dixit@intel.com --- drivers/gpu/drm/i915/gt/intel_rps.c | 38 ++++++++++++++++++++----------------- drivers/gpu/drm/i915/gt/intel_rps.h | 4 +--- drivers/gpu/drm/i915/i915_pmu.c | 10 ++++------ 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 9ad3bc7201cb..fc73cfe0e39b 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2074,16 +2074,6 @@ void intel_rps_sanitize(struct intel_rps *rps) rps_disable_interrupts(rps); } -u32 intel_rps_read_rpstat_fw(struct intel_rps *rps) -{ - struct drm_i915_private *i915 = rps_to_i915(rps); - i915_reg_t rpstat; - - rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; - - return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat); -} - u32 intel_rps_read_rpstat(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); @@ -2094,7 +2084,7 @@ u32 intel_rps_read_rpstat(struct intel_rps *rps) return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat); } -u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) +static u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) { struct drm_i915_private *i915 = rps_to_i915(rps); u32 cagf; @@ -2117,10 +2107,11 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) return cagf; } -static u32 read_cagf(struct intel_rps *rps) +static u32 __read_cagf(struct intel_rps *rps, bool take_fw) { struct drm_i915_private *i915 = rps_to_i915(rps); struct intel_uncore *uncore = rps_to_uncore(rps); + i915_reg_t r = INVALID_MMIO_REG; u32 freq; /* @@ -2128,22 +2119,30 @@ static u32 read_cagf(struct intel_rps *rps) * registers will return 0 freq when GT is in RC6 */ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { - freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1); + r = MTL_MIRROR_TARGET_WP1; } else if (GRAPHICS_VER(i915) >= 12) { - freq = intel_uncore_read(uncore, GEN12_RPSTAT1); + r = GEN12_RPSTAT1; } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { vlv_punit_get(i915); freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); vlv_punit_put(i915); } else if (GRAPHICS_VER(i915) >= 6) { - freq = intel_uncore_read(uncore, GEN6_RPSTAT1); + r = GEN6_RPSTAT1; } else { - freq = intel_uncore_read(uncore, MEMSTAT_ILK); + r = MEMSTAT_ILK; } + if (i915_mmio_reg_valid(r)) + freq = take_fw ? intel_uncore_read(uncore, r) : intel_uncore_read_fw(uncore, r); + return intel_rps_get_cagf(rps, freq); } +static u32 read_cagf(struct intel_rps *rps) +{ + return __read_cagf(rps, true); +} + u32 intel_rps_read_actual_frequency(struct intel_rps *rps) { struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; @@ -2156,7 +2155,12 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps) return freq; } -u32 intel_rps_read_punit_req(struct intel_rps *rps) +u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps) +{ + return intel_gpu_freq(rps, __read_cagf(rps, false)); +} + +static u32 intel_rps_read_punit_req(struct intel_rps *rps) { struct intel_uncore *uncore = rps_to_uncore(rps); struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 9e1cad9ba0e9..d86ddfee095e 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -34,8 +34,8 @@ void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive); int intel_gpu_freq(struct intel_rps *rps, int val); int intel_freq_opcode(struct intel_rps *rps, int val); -u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1); u32 intel_rps_read_actual_frequency(struct intel_rps *rps); +u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps); u32 intel_rps_get_requested_frequency(struct intel_rps *rps); u32 intel_rps_get_min_frequency(struct intel_rps *rps); u32 intel_rps_get_min_raw_freq(struct intel_rps *rps); @@ -46,10 +46,8 @@ int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val); u32 intel_rps_get_rp0_frequency(struct intel_rps *rps); u32 intel_rps_get_rp1_frequency(struct intel_rps *rps); u32 intel_rps_get_rpn_frequency(struct intel_rps *rps); -u32 intel_rps_read_punit_req(struct intel_rps *rps); u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps); u32 intel_rps_read_rpstat(struct intel_rps *rps); -u32 intel_rps_read_rpstat_fw(struct intel_rps *rps); void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps); void intel_rps_raise_unslice(struct intel_rps *rps); void intel_rps_lower_unslice(struct intel_rps *rps); diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 52531ab28c5f..6d422b056f8a 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -393,14 +393,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) * case we assume the system is running at the intended * frequency. Fortunately, the read should rarely fail! */ - val = intel_rps_read_rpstat_fw(rps); - if (val) - val = intel_rps_get_cagf(rps, val); - else - val = rps->cur_freq; + val = intel_rps_read_actual_frequency_fw(rps); + if (!val) + val = intel_gpu_freq(rps, rps->cur_freq); add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT], - intel_gpu_freq(rps, val), period_ns / 1000); + val, period_ns / 1000); } if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) { -- cgit v1.2.3 From 02abecdeebfcd3848b26b70778dd7f6eb0db65e1 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 17 Mar 2023 12:18:01 -0600 Subject: drm/i915/uapi: Replace fake flex-array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zero-length arrays as fake flexible arrays are deprecated and we are moving towards adopting C99 flexible-array members instead. Address the following warning found with GCC-13 and -fstrict-flex-arrays=3 enabled: drivers/gpu/drm/i915/gem/i915_gem_context.c: In function ‘set_proto_ctx_engines.isra’: drivers/gpu/drm/i915/gem/i915_gem_context.c:769:41: warning: array subscript n is outside array bounds of ‘struct i915_engine_class_instance[0]’ [-Warray-bounds=] 769 | if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) { | ^~~~~~~~~~~~~~~~~ ./include/uapi/drm/i915_drm.h:2494:43: note: while referencing ‘engines’ 2494 | struct i915_engine_class_instance engines[0]; This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines on memcpy() and help us make progress towards globally enabling -fstrict-flex-arrays=3 [1]. Link: https://github.com/KSPP/linux/issues/21 Link: https://github.com/KSPP/linux/issues/271 Link: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602902.html [1] Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/ZBSu2QsUJy31kjSE@work --- include/uapi/drm/i915_drm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 8df261c5ab9b..5e458d6f2895 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2491,7 +2491,7 @@ struct i915_context_param_engines { #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */ #define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */ #define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */ - struct i915_engine_class_instance engines[0]; + struct i915_engine_class_instance engines[]; } __attribute__((packed)); #define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \ -- cgit v1.2.3 From 70b5ffb393f3f1fbb00ac52c5288d233ae6e991e Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Sat, 18 Mar 2023 21:36:14 +0100 Subject: drm/i915/gt: Create per-gt debugfs files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support multi-GT configurations, we need to generate independent debug files for each GT. To achieve this create a separate directory for each GT under the debugfs directory. For instance, in a system with two GTs, the debugfs structure would look like this: /sys/kernel/debug/dri └── 0    ├── gt0    │   ├── drpc    │   ├── engines    │   ├── forcewake    │   ├── frequency    │   └── rps_boost    └── gt1    :   ├── drpc    :   ├── engines    :   ├── forcewake       ├── frequency       └── rps_boost Signed-off-by: Andi Shyti Cc: Tvrtko Ursulin Reviewed-by: Radhakrishna Sripada Link: https://patchwork.freedesktop.org/patch/msgid/20230318203616.183765-2-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gt_debugfs.c | 4 +++- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 5 ++++- drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c | 2 ++ 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c index 5fc2df01aa0d..4dc23b8d3aa2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c @@ -83,11 +83,13 @@ static void gt_debugfs_register(struct intel_gt *gt, struct dentry *root) void intel_gt_debugfs_register(struct intel_gt *gt) { struct dentry *root; + char gtname[4]; if (!gt->i915->drm.primary->debugfs_root) return; - root = debugfs_create_dir("gt", gt->i915->drm.primary->debugfs_root); + snprintf(gtname, sizeof(gtname), "gt%u", gt->info.id); + root = debugfs_create_dir(gtname, gt->i915->drm.primary->debugfs_root); if (IS_ERR(root)) return; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index bb4dfe707a7d..e46aac1a41e6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -42,6 +42,8 @@ struct intel_guc { /** @capture: the error-state-capture module's data and objects */ struct intel_guc_state_capture *capture; + struct dentry *dbgfs_node; + /** @sched_engine: Global engine used to submit requests to GuC */ struct i915_sched_engine *sched_engine; /** diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 195db8c9d420..55bc8b55fbc0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -542,8 +542,11 @@ static int guc_log_relay_create(struct intel_guc_log *log) */ n_subbufs = 8; + if (!guc->dbgfs_node) + return -ENOENT; + guc_log_relay_chan = relay_open("guc_log", - i915->drm.primary->debugfs_root, + guc->dbgfs_node, subbuf_size, n_subbufs, &relay_callbacks, i915); if (!guc_log_relay_chan) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c index 284d6fbc2d08..2f93cc4e408a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c @@ -54,6 +54,8 @@ void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root) if (IS_ERR(root)) return; + uc->guc.dbgfs_node = root; + intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), uc); intel_guc_debugfs_register(&uc->guc, root); -- cgit v1.2.3 From 80ac788a8d2fc8904cce97b7873b6d8fd513a46d Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Sat, 18 Mar 2023 21:36:16 +0100 Subject: drm/i915/debugfs: Enable upper layer interfaces to act on all gt's The commit 82a149a62b6b ("drm/i915/gt: move remaining debugfs interfaces into gt") moved gt-related debugfs files in the gtX/ directories to operate on individual gt's. However, the original files were only functioning on the root GT (GT 0) and have been left in the same location to maintain compatibility with userspace users. Add multiplexing functionality to the higher directories' files. This enables the operations to be performed on all the GTs with a single write. In the case of reads, the files provide an or'ed value across all the tiles. Signed-off-by: Andi Shyti Cc: Maciej Patelczyk Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20230318203616.183765-4-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index a356ca490159..fa9c0a387c42 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -575,14 +575,34 @@ static int i915_wa_registers(struct seq_file *m, void *unused) static int i915_wedged_get(void *data, u64 *val) { struct drm_i915_private *i915 = data; + struct intel_gt *gt; + unsigned int i; - return intel_gt_debugfs_reset_show(to_gt(i915), val); + *val = 0; + + for_each_gt(gt, i915, i) { + int ret; + + ret = intel_gt_debugfs_reset_show(gt, val); + if (ret) + return ret; + + /* at least one tile should be wedged */ + if (*val) + break; + } + + return 0; } static int i915_wedged_set(void *data, u64 val) { struct drm_i915_private *i915 = data; - intel_gt_debugfs_reset_store(to_gt(i915), val); + struct intel_gt *gt; + unsigned int i; + + for_each_gt(gt, i915, i) + intel_gt_debugfs_reset_store(gt, val); return 0; } @@ -732,7 +752,11 @@ static int i915_sseu_status(struct seq_file *m, void *unused) static int i915_forcewake_open(struct inode *inode, struct file *file) { struct drm_i915_private *i915 = inode->i_private; - intel_gt_pm_debugfs_forcewake_user_open(to_gt(i915)); + struct intel_gt *gt; + unsigned int i; + + for_each_gt(gt, i915, i) + intel_gt_pm_debugfs_forcewake_user_open(gt); return 0; } @@ -740,7 +764,11 @@ static int i915_forcewake_open(struct inode *inode, struct file *file) static int i915_forcewake_release(struct inode *inode, struct file *file) { struct drm_i915_private *i915 = inode->i_private; - intel_gt_pm_debugfs_forcewake_user_release(to_gt(i915)); + struct intel_gt *gt; + unsigned int i; + + for_each_gt(gt, i915, i) + intel_gt_pm_debugfs_forcewake_user_release(gt); return 0; } -- cgit v1.2.3 From badb30270960df505cf245bad8844c227731fb0b Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Wed, 22 Mar 2023 01:16:11 +0100 Subject: drm/i915: Use i915 instead of dev_priv insied the file_priv structure In the process of renaming all instances of 'dev_priv' to 'i915', start using 'i915' within the 'drm_i915_file_private' structure. Signed-off-by: Andi Shyti Reviewed-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20230322001611.632321-1-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 22 +++++++++++----------- drivers/gpu/drm/i915/i915_drm_client.c | 2 +- drivers/gpu/drm/i915/i915_file_private.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 6d639ca24dfb..5402a7bbcb1d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -364,7 +364,7 @@ static int set_proto_ctx_vm(struct drm_i915_file_private *fpriv, struct i915_gem_proto_context *pc, const struct drm_i915_gem_context_param *args) { - struct drm_i915_private *i915 = fpriv->dev_priv; + struct drm_i915_private *i915 = fpriv->i915; struct i915_address_space *vm; if (args->size) @@ -733,7 +733,7 @@ static int set_proto_ctx_engines(struct drm_i915_file_private *fpriv, struct i915_gem_proto_context *pc, const struct drm_i915_gem_context_param *args) { - struct drm_i915_private *i915 = fpriv->dev_priv; + struct drm_i915_private *i915 = fpriv->i915; struct set_proto_ctx_engines set = { .i915 = i915 }; struct i915_context_param_engines __user *user = u64_to_user_ptr(args->value); @@ -813,7 +813,7 @@ static int set_proto_ctx_sseu(struct drm_i915_file_private *fpriv, struct i915_gem_proto_context *pc, struct drm_i915_gem_context_param *args) { - struct drm_i915_private *i915 = fpriv->dev_priv; + struct drm_i915_private *i915 = fpriv->i915; struct drm_i915_gem_context_param_sseu user_sseu; struct intel_sseu *sseu; int ret; @@ -913,7 +913,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, break; case I915_CONTEXT_PARAM_PRIORITY: - ret = validate_priority(fpriv->dev_priv, args); + ret = validate_priority(fpriv->i915, args); if (!ret) pc->sched.priority = args->value; break; @@ -934,12 +934,12 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, if (args->size) ret = -EINVAL; else - ret = proto_context_set_persistence(fpriv->dev_priv, pc, + ret = proto_context_set_persistence(fpriv->i915, pc, args->value); break; case I915_CONTEXT_PARAM_PROTECTED_CONTENT: - ret = proto_context_set_protected(fpriv->dev_priv, pc, + ret = proto_context_set_protected(fpriv->i915, pc, args->value); break; @@ -1770,7 +1770,7 @@ void i915_gem_context_close(struct drm_file *file) unsigned long idx; xa_for_each(&file_priv->proto_context_xa, idx, pc) - proto_context_close(file_priv->dev_priv, pc); + proto_context_close(file_priv->i915, pc); xa_destroy(&file_priv->proto_context_xa); mutex_destroy(&file_priv->proto_context_lock); @@ -2206,7 +2206,7 @@ finalize_create_context_locked(struct drm_i915_file_private *file_priv, lockdep_assert_held(&file_priv->proto_context_lock); - ctx = i915_gem_create_context(file_priv->dev_priv, pc); + ctx = i915_gem_create_context(file_priv->i915, pc); if (IS_ERR(ctx)) return ctx; @@ -2223,7 +2223,7 @@ finalize_create_context_locked(struct drm_i915_file_private *file_priv, old = xa_erase(&file_priv->proto_context_xa, id); GEM_BUG_ON(old != pc); - proto_context_close(file_priv->dev_priv, pc); + proto_context_close(file_priv->i915, pc); return ctx; } @@ -2352,7 +2352,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, GEM_WARN_ON(ctx && pc); if (pc) - proto_context_close(file_priv->dev_priv, pc); + proto_context_close(file_priv->i915, pc); if (ctx) context_close(ctx); @@ -2505,7 +2505,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, * GEM_CONTEXT_CREATE starting with graphics * version 13. */ - WARN_ON(GRAPHICS_VER(file_priv->dev_priv) > 12); + WARN_ON(GRAPHICS_VER(file_priv->i915) > 12); ret = set_proto_ctx_param(file_priv, pc, args); } else { ret = -ENOENT; diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c index b09d1d386574..e8fa172ebe5e 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.c +++ b/drivers/gpu/drm/i915/i915_drm_client.c @@ -130,7 +130,7 @@ void i915_drm_client_fdinfo(struct seq_file *m, struct file *f) { struct drm_file *file = f->private_data; struct drm_i915_file_private *file_priv = file->driver_priv; - struct drm_i915_private *i915 = file_priv->dev_priv; + struct drm_i915_private *i915 = file_priv->i915; struct i915_drm_client *client = file_priv->client; struct pci_dev *pdev = to_pci_dev(i915->drm.dev); unsigned int i; diff --git a/drivers/gpu/drm/i915/i915_file_private.h b/drivers/gpu/drm/i915/i915_file_private.h index f42877869692..c9cb8eecacde 100644 --- a/drivers/gpu/drm/i915/i915_file_private.h +++ b/drivers/gpu/drm/i915/i915_file_private.h @@ -15,7 +15,7 @@ struct drm_file; struct i915_drm_client; struct drm_i915_file_private { - struct drm_i915_private *dev_priv; + struct drm_i915_private *i915; union { struct drm_file *file; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 35950fa91406..2ba922fbbd5f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1313,7 +1313,7 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) } file->driver_priv = file_priv; - file_priv->dev_priv = i915; + file_priv->i915 = i915; file_priv->file = file; file_priv->client = client; -- cgit v1.2.3 From e2ee10474ce766686e7a7496585cdfaf79e3a1bf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 16 Mar 2023 17:59:18 +0100 Subject: drm/i915/gem: Flush lmem contents after construction i915_gem_object_create_lmem_from_data() lacks the flush of the data written to lmem to ensure the object is marked as dirty and the writes flushed to the backing store. Once created, we can immediately release the obj->mm.mapping caching of the vmap. Fixes: 7acbbc7cf485 ("drm/i915/guc: put all guc objects in lmem when available") Cc: Matthew Auld Cc: Daniele Ceraolo Spurio Cc: Andi Shyti Cc: Matthew Brost Cc: John Harrison Signed-off-by: Chris Wilson Cc: # v5.16+ Signed-off-by: Nirmoy Das Reviewed-by: Andi Shyti Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20230316165918.13074-1-nirmoy.das@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 8949fb0a944f..3198b64ad7db 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -127,7 +127,8 @@ i915_gem_object_create_lmem_from_data(struct drm_i915_private *i915, memcpy(map, data, size); - i915_gem_object_unpin_map(obj); + i915_gem_object_flush_map(obj); + __i915_gem_object_release_map(obj); return obj; } -- cgit v1.2.3 From 4d6d94ba8823a2f4e48c56ed33cb77061c1f425d Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Mon, 20 Mar 2023 20:21:17 +0100 Subject: drm/i915/selftests: Drop igt_cs_tlb The gt_tlb live selftest has the same code coverage as the igt_cs_tlb subtest of gtt, except it is better at detecting TLB bugs. Furthermore, while igt_cs_tlb is hitting some unforeseen issues, these issues are either false positives due to the test being poorly formatted, or are true positives that can be more easily diagnosed with smaller tests. As such, igt_cs_tlb is superceded by and obsoleted by gt_tlb, meaning it can be removed. Signed-off-by: Jonathan Cavitt Reviewed-by: Andrzej Hajda Acked-by: Nirmoy Das Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230320192117.287374-1-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 356 -------------------------- 1 file changed, 356 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 01e75160a84a..5361ce70d3f2 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1940,361 +1940,6 @@ out_put: return err; } -static int context_sync(struct intel_context *ce) -{ - struct i915_request *rq; - long timeout; - - rq = intel_context_create_request(ce); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - i915_request_get(rq); - i915_request_add(rq); - - timeout = i915_request_wait(rq, 0, HZ / 5); - i915_request_put(rq); - - return timeout < 0 ? -EIO : 0; -} - -static struct i915_request * -submit_batch(struct intel_context *ce, u64 addr) -{ - struct i915_request *rq; - int err; - - rq = intel_context_create_request(ce); - if (IS_ERR(rq)) - return rq; - - err = 0; - if (rq->engine->emit_init_breadcrumb) /* detect a hang */ - err = rq->engine->emit_init_breadcrumb(rq); - if (err == 0) - err = rq->engine->emit_bb_start(rq, addr, 0, 0); - - if (err == 0) - i915_request_get(rq); - i915_request_add(rq); - - return err ? ERR_PTR(err) : rq; -} - -static u32 *spinner(u32 *batch, int i) -{ - return batch + i * 64 / sizeof(*batch) + 4; -} - -static void end_spin(u32 *batch, int i) -{ - *spinner(batch, i) = MI_BATCH_BUFFER_END; - wmb(); -} - -static int igt_cs_tlb(void *arg) -{ - const unsigned int count = PAGE_SIZE / 64; - const unsigned int chunk_size = count * PAGE_SIZE; - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *bbe, *act, *out; - struct i915_gem_engines_iter it; - struct i915_address_space *vm; - struct i915_gem_context *ctx; - struct intel_context *ce; - struct i915_vma *vma; - I915_RND_STATE(prng); - struct file *file; - unsigned int i; - u32 *result; - u32 *batch; - int err = 0; - - /* - * Our mission here is to fool the hardware to execute something - * from scratch as it has not seen the batch move (due to missing - * the TLB invalidate). - */ - - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - ctx = live_context(i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out_unlock; - } - - vm = i915_gem_context_get_eb_vm(ctx); - if (i915_is_ggtt(vm)) - goto out_vm; - - /* Create two pages; dummy we prefill the TLB, and intended */ - bbe = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(bbe)) { - err = PTR_ERR(bbe); - goto out_vm; - } - - batch = i915_gem_object_pin_map_unlocked(bbe, I915_MAP_WC); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_put_bbe; - } - memset32(batch, MI_BATCH_BUFFER_END, PAGE_SIZE / sizeof(u32)); - i915_gem_object_flush_map(bbe); - i915_gem_object_unpin_map(bbe); - - act = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(act)) { - err = PTR_ERR(act); - goto out_put_bbe; - } - - /* Track the execution of each request by writing into different slot */ - batch = i915_gem_object_pin_map_unlocked(act, I915_MAP_WC); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_put_act; - } - for (i = 0; i < count; i++) { - u32 *cs = batch + i * 64 / sizeof(*cs); - u64 addr = (vm->total - PAGE_SIZE) + i * sizeof(u32); - - GEM_BUG_ON(GRAPHICS_VER(i915) < 6); - cs[0] = MI_STORE_DWORD_IMM_GEN4; - if (GRAPHICS_VER(i915) >= 8) { - cs[1] = lower_32_bits(addr); - cs[2] = upper_32_bits(addr); - cs[3] = i; - cs[4] = MI_NOOP; - cs[5] = MI_BATCH_BUFFER_START_GEN8; - } else { - cs[1] = 0; - cs[2] = lower_32_bits(addr); - cs[3] = i; - cs[4] = MI_NOOP; - cs[5] = MI_BATCH_BUFFER_START; - } - } - - out = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(out)) { - err = PTR_ERR(out); - goto out_put_batch; - } - i915_gem_object_set_cache_coherency(out, I915_CACHING_CACHED); - - vma = i915_vma_instance(out, vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_put_out; - } - - err = i915_vma_pin(vma, 0, 0, - PIN_USER | - PIN_OFFSET_FIXED | - (vm->total - PAGE_SIZE)); - if (err) - goto out_put_out; - GEM_BUG_ON(vma->node.start != vm->total - PAGE_SIZE); - - result = i915_gem_object_pin_map_unlocked(out, I915_MAP_WB); - if (IS_ERR(result)) { - err = PTR_ERR(result); - goto out_put_out; - } - - for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { - IGT_TIMEOUT(end_time); - unsigned long pass = 0; - - if (!intel_engine_can_store_dword(ce->engine)) - continue; - - while (!__igt_timeout(end_time, NULL)) { - struct i915_vm_pt_stash stash = {}; - struct i915_request *rq; - struct i915_gem_ww_ctx ww; - struct i915_vma_resource *vma_res; - u64 offset; - - offset = igt_random_offset(&prng, - 0, vm->total - PAGE_SIZE, - chunk_size, PAGE_SIZE); - - memset32(result, STACK_MAGIC, PAGE_SIZE / sizeof(u32)); - - vma = i915_vma_instance(bbe, vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto end; - } - - i915_gem_object_lock(bbe, NULL); - err = i915_vma_get_pages(vma); - i915_gem_object_unlock(bbe); - if (err) - goto end; - - vma_res = i915_vma_resource_alloc(); - if (IS_ERR(vma_res)) { - i915_vma_put_pages(vma); - err = PTR_ERR(vma_res); - goto end; - } - - i915_gem_ww_ctx_init(&ww, false); -retry: - err = i915_vm_lock_objects(vm, &ww); - if (err) - goto end_ww; - - err = i915_vm_alloc_pt_stash(vm, &stash, chunk_size); - if (err) - goto end_ww; - - err = i915_vm_map_pt_stash(vm, &stash); - if (!err) - vm->allocate_va_range(vm, &stash, offset, chunk_size); - i915_vm_free_pt_stash(vm, &stash); -end_ww: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - if (err) { - kfree(vma_res); - goto end; - } - - i915_vma_resource_init_from_vma(vma_res, vma); - /* Prime the TLB with the dummy pages */ - for (i = 0; i < count; i++) { - vma_res->start = offset + i * PAGE_SIZE; - vm->insert_entries(vm, vma_res, I915_CACHE_NONE, - 0); - - rq = submit_batch(ce, vma_res->start); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - i915_vma_resource_fini(vma_res); - kfree(vma_res); - goto end; - } - i915_request_put(rq); - } - i915_vma_resource_fini(vma_res); - i915_vma_put_pages(vma); - - err = context_sync(ce); - if (err) { - pr_err("%s: dummy setup timed out\n", - ce->engine->name); - kfree(vma_res); - goto end; - } - - vma = i915_vma_instance(act, vm, NULL); - if (IS_ERR(vma)) { - kfree(vma_res); - err = PTR_ERR(vma); - goto end; - } - - i915_gem_object_lock(act, NULL); - err = i915_vma_get_pages(vma); - i915_gem_object_unlock(act); - if (err) { - kfree(vma_res); - goto end; - } - - i915_vma_resource_init_from_vma(vma_res, vma); - /* Replace the TLB with target batches */ - for (i = 0; i < count; i++) { - struct i915_request *rq; - u32 *cs = batch + i * 64 / sizeof(*cs); - u64 addr; - - vma_res->start = offset + i * PAGE_SIZE; - vm->insert_entries(vm, vma_res, I915_CACHE_NONE, 0); - - addr = vma_res->start + i * 64; - cs[4] = MI_NOOP; - cs[6] = lower_32_bits(addr); - cs[7] = upper_32_bits(addr); - wmb(); - - rq = submit_batch(ce, addr); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - i915_vma_resource_fini(vma_res); - kfree(vma_res); - goto end; - } - - /* Wait until the context chain has started */ - if (i == 0) { - while (READ_ONCE(result[i]) && - !i915_request_completed(rq)) - cond_resched(); - } else { - end_spin(batch, i - 1); - } - - i915_request_put(rq); - } - end_spin(batch, count - 1); - - i915_vma_resource_fini(vma_res); - kfree(vma_res); - i915_vma_put_pages(vma); - - err = context_sync(ce); - if (err) { - pr_err("%s: writes timed out\n", - ce->engine->name); - goto end; - } - - for (i = 0; i < count; i++) { - if (result[i] != i) { - pr_err("%s: Write lost on pass %lu, at offset %llx, index %d, found %x, expected %x\n", - ce->engine->name, pass, - offset, i, result[i], i); - err = -EINVAL; - goto end; - } - } - - vm->clear_range(vm, offset, chunk_size); - pass++; - } - } -end: - if (igt_flush_test(i915)) - err = -EIO; - i915_gem_context_unlock_engines(ctx); - i915_gem_object_unpin_map(out); -out_put_out: - i915_gem_object_put(out); -out_put_batch: - i915_gem_object_unpin_map(act); -out_put_act: - i915_gem_object_put(act); -out_put_bbe: - i915_gem_object_put(bbe); -out_vm: - i915_vm_put(vm); -out_unlock: - fput(file); - return err; -} - int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { @@ -2314,7 +1959,6 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ggtt_fill), SUBTEST(igt_ggtt_page), SUBTEST(igt_ggtt_misaligned_pin), - SUBTEST(igt_cs_tlb), }; GEM_BUG_ON(offset_in_page(to_gt(i915)->ggtt->vm.total)); -- cgit v1.2.3 From 411de2b5ac61a29e1e79db44539f69bb9b35a34d Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 16 Mar 2023 15:06:31 -0700 Subject: drm/i915/guc: Improve GuC load error reporting There are multiple ways in which the GuC load can fail. The driver was reporting the status register as is, but not everyone can read the matrix unfiltered. So add decoding of the common error cases. Also, remove the comment about interrupt based load completion checking being not recommended. The interrupt was removed from the GuC firmware some time ago so it is no longer an option anyway. While at it, also abort the timeout if a known error code is reported. No need to keep waiting if the GuC has already given up the load. v2: Fix mis-matched case and confusing 'success' variable (Daniele). Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20230316220632.3312218-2-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h | 17 +++++ drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 95 ++++++++++++++++++++----- drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h | 4 +- 3 files changed, 95 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h index 8085fb181274..bcb1129b3610 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h @@ -21,6 +21,9 @@ enum intel_guc_load_status { INTEL_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH = 0x02, INTEL_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH = 0x03, INTEL_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE = 0x04, + INTEL_GUC_LOAD_STATUS_HWCONFIG_START = 0x05, + INTEL_GUC_LOAD_STATUS_HWCONFIG_DONE = 0x06, + INTEL_GUC_LOAD_STATUS_HWCONFIG_ERROR = 0x07, INTEL_GUC_LOAD_STATUS_GDT_DONE = 0x10, INTEL_GUC_LOAD_STATUS_IDT_DONE = 0x20, INTEL_GUC_LOAD_STATUS_LAPIC_DONE = 0x30, @@ -38,4 +41,18 @@ enum intel_guc_load_status { INTEL_GUC_LOAD_STATUS_READY = 0xF0, }; +enum intel_bootrom_load_status { + INTEL_BOOTROM_STATUS_NO_KEY_FOUND = 0x13, + INTEL_BOOTROM_STATUS_AES_PROD_KEY_FOUND = 0x1A, + INTEL_BOOTROM_STATUS_RSA_FAILED = 0x50, + INTEL_BOOTROM_STATUS_PAVPC_FAILED = 0x73, + INTEL_BOOTROM_STATUS_WOPCM_FAILED = 0x74, + INTEL_BOOTROM_STATUS_LOADLOC_FAILED = 0x75, + INTEL_BOOTROM_STATUS_JUMP_PASSED = 0x76, + INTEL_BOOTROM_STATUS_JUMP_FAILED = 0x77, + INTEL_BOOTROM_STATUS_RC6CTXCONFIG_FAILED = 0x79, + INTEL_BOOTROM_STATUS_MPUMAP_INCORRECT = 0x7A, + INTEL_BOOTROM_STATUS_EXCEPTION = 0x7E, +}; + #endif /* _ABI_GUC_ERRORS_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 69133420c78b..0b49d84a8a9c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -88,31 +88,64 @@ static int guc_xfer_rsa(struct intel_uc_fw *guc_fw, /* * Read the GuC status register (GUC_STATUS) and store it in the * specified location; then return a boolean indicating whether - * the value matches either of two values representing completion - * of the GuC boot process. + * the value matches either completion or a known failure code. * * This is used for polling the GuC status in a wait_for() * loop below. */ -static inline bool guc_ready(struct intel_uncore *uncore, u32 *status) +static inline bool guc_load_done(struct intel_uncore *uncore, u32 *status, bool *success) { u32 val = intel_uncore_read(uncore, GUC_STATUS); u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, val); + u32 br_val = REG_FIELD_GET(GS_BOOTROM_MASK, val); *status = val; - return uk_val == INTEL_GUC_LOAD_STATUS_READY; + switch (uk_val) { + case INTEL_GUC_LOAD_STATUS_READY: + *success = true; + return true; + + case INTEL_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH: + case INTEL_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH: + case INTEL_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE: + case INTEL_GUC_LOAD_STATUS_HWCONFIG_ERROR: + case INTEL_GUC_LOAD_STATUS_DPC_ERROR: + case INTEL_GUC_LOAD_STATUS_EXCEPTION: + case INTEL_GUC_LOAD_STATUS_INIT_DATA_INVALID: + case INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID: + case INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID: + *success = false; + return true; + } + + switch (br_val) { + case INTEL_BOOTROM_STATUS_NO_KEY_FOUND: + case INTEL_BOOTROM_STATUS_RSA_FAILED: + case INTEL_BOOTROM_STATUS_PAVPC_FAILED: + case INTEL_BOOTROM_STATUS_WOPCM_FAILED: + case INTEL_BOOTROM_STATUS_LOADLOC_FAILED: + case INTEL_BOOTROM_STATUS_JUMP_FAILED: + case INTEL_BOOTROM_STATUS_RC6CTXCONFIG_FAILED: + case INTEL_BOOTROM_STATUS_MPUMAP_INCORRECT: + case INTEL_BOOTROM_STATUS_EXCEPTION: + *success = false; + return true; + } + + return false; } static int guc_wait_ucode(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); struct intel_uncore *uncore = gt->uncore; + bool success; u32 status; int ret; /* * Wait for the GuC to start up. - * NB: Docs recommend not using the interrupt for completion. + * * Measurements indicate this should take no more than 20ms * (assuming the GT clock is at maximum frequency). So, a * timeout here indicates that the GuC has failed and is unusable. @@ -127,28 +160,52 @@ static int guc_wait_ucode(struct intel_guc *guc) * 200ms. Even at slowest clock, this should be sufficient. And * in the working case, a larger timeout makes no difference. */ - ret = wait_for(guc_ready(uncore, &status), 200); - if (ret) { - guc_info(guc, "load failed: status = 0x%08X\n", status); - guc_info(guc, "load failed: status: Reset = %d, " - "BootROM = 0x%02X, UKernel = 0x%02X, " - "MIA = 0x%02X, Auth = 0x%02X\n", - REG_FIELD_GET(GS_MIA_IN_RESET, status), - REG_FIELD_GET(GS_BOOTROM_MASK, status), - REG_FIELD_GET(GS_UKERNEL_MASK, status), - REG_FIELD_GET(GS_MIA_MASK, status), - REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); - - if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { + ret = wait_for(guc_load_done(uncore, &status, &success), 200); + if (ret || !success) { + u32 ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, status); + u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status); + + guc_info(guc, "load failed: status = 0x%08X, ret = %d\n", status, ret); + guc_info(guc, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n", + REG_FIELD_GET(GS_MIA_IN_RESET, status), + bootrom, ukernel, + REG_FIELD_GET(GS_MIA_MASK, status), + REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); + + switch (bootrom) { + case INTEL_BOOTROM_STATUS_NO_KEY_FOUND: + guc_info(guc, "invalid key requested, header = 0x%08X\n", + intel_uncore_read(uncore, GUC_HEADER_INFO)); + ret = -ENOEXEC; + break; + + case INTEL_BOOTROM_STATUS_RSA_FAILED: guc_info(guc, "firmware signature verification failed\n"); ret = -ENOEXEC; + break; } - if (REG_FIELD_GET(GS_UKERNEL_MASK, status) == INTEL_GUC_LOAD_STATUS_EXCEPTION) { + switch (ukernel) { + case INTEL_GUC_LOAD_STATUS_EXCEPTION: guc_info(guc, "firmware exception. EIP: %#x\n", intel_uncore_read(uncore, SOFT_SCRATCH(13))); ret = -ENXIO; + break; + + case INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID: + guc_info(guc, "illegal register in save/restore workaround list\n"); + ret = -EPERM; + break; + + case INTEL_GUC_LOAD_STATUS_HWCONFIG_START: + guc_info(guc, "still extracting hwconfig table.\n"); + ret = -ETIMEDOUT; + break; } + + /* Uncommon/unexpected error, see earlier status code print for details */ + if (ret == 0) + ret = -ENXIO; } return ret; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h index 9915de32e894..3fd798837502 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h @@ -18,8 +18,6 @@ #define GS_MIA_IN_RESET (0x01 << GS_RESET_SHIFT) #define GS_BOOTROM_SHIFT 1 #define GS_BOOTROM_MASK (0x7F << GS_BOOTROM_SHIFT) -#define GS_BOOTROM_RSA_FAILED (0x50 << GS_BOOTROM_SHIFT) -#define GS_BOOTROM_JUMP_PASSED (0x76 << GS_BOOTROM_SHIFT) #define GS_UKERNEL_SHIFT 8 #define GS_UKERNEL_MASK (0xFF << GS_UKERNEL_SHIFT) #define GS_MIA_SHIFT 16 @@ -32,6 +30,8 @@ #define GS_AUTH_STATUS_BAD (0x01 << GS_AUTH_STATUS_SHIFT) #define GS_AUTH_STATUS_GOOD (0x02 << GS_AUTH_STATUS_SHIFT) +#define GUC_HEADER_INFO _MMIO(0xc014) + #define SOFT_SCRATCH(n) _MMIO(0xc180 + (n) * 4) #define SOFT_SCRATCH_COUNT 16 -- cgit v1.2.3 From 9469d456c6a28494dd9d5cc16e17cf2d4c15c571 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 16 Mar 2023 15:06:32 -0700 Subject: drm/i915/guc: Allow for very slow GuC loading A failure to load the GuC is occasionally observed where the GuC log actually showed that the GuC had loaded just fine. The implication being that the load just took ever so slightly longer than the 200ms timeout. Given that the actual time should be tens of milliseconds at the slowest, this should never happen. So far the issue has generally been caused by a bad IFWI resulting in low frequencies during boot (depsite the KMD requesting max frequency). However, the issue seems to happen more often than one would like. So a) increase the timeout so that the user still gets a working system even in the case of slow load. And b) report the frequency during the load to see if that is the case of the slow down. v2: Reduce timeout in non-debug builds, add references (Daniele) References: https://gitlab.freedesktop.org/drm/intel/-/issues/7931 References: https://gitlab.freedesktop.org/drm/intel/-/issues/8060 References: https://gitlab.freedesktop.org/drm/intel/-/issues/8083 References: https://gitlab.freedesktop.org/drm/intel/-/issues/8136 References: https://gitlab.freedesktop.org/drm/intel/-/issues/8137 Signed-off-by: John Harrison Tested-by: Ashutosh Dixit Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20230316220632.3312218-3-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 50 +++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 0b49d84a8a9c..6fda3aec5c66 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -12,6 +12,7 @@ #include "gt/intel_gt.h" #include "gt/intel_gt_mcr.h" #include "gt/intel_gt_regs.h" +#include "gt/intel_rps.h" #include "intel_guc_fw.h" #include "intel_guc_print.h" #include "i915_drv.h" @@ -135,13 +136,29 @@ static inline bool guc_load_done(struct intel_uncore *uncore, u32 *status, bool return false; } +/* + * Use a longer timeout for debug builds so that problems can be detected + * and analysed. But a shorter timeout for releases so that user's don't + * wait forever to find out there is a problem. Note that the only reason + * an end user should hit the timeout is in case of extreme thermal throttling. + * And a system that is that hot during boot is probably dead anyway! + */ +#if defined(CONFIG_DRM_I915_DEBUG_GEM) +#define GUC_LOAD_RETRY_LIMIT 20 +#else +#define GUC_LOAD_RETRY_LIMIT 3 +#endif + static int guc_wait_ucode(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); struct intel_uncore *uncore = gt->uncore; + ktime_t before, after, delta; bool success; u32 status; - int ret; + int ret, count; + u64 delta_ms; + u32 before_freq; /* * Wait for the GuC to start up. @@ -159,13 +176,32 @@ static int guc_wait_ucode(struct intel_guc *guc) * issues to be resolved. In the meantime bump the timeout to * 200ms. Even at slowest clock, this should be sufficient. And * in the working case, a larger timeout makes no difference. + * + * IFWI updates have also been seen to cause sporadic failures due to + * the requested frequency not being granted and thus the firmware + * load is attempted at minimum frequency. That can lead to load times + * in the seconds range. However, there is a limit on how long an + * individual wait_for() can wait. So wrap it in a loop. */ - ret = wait_for(guc_load_done(uncore, &status, &success), 200); + before_freq = intel_rps_read_actual_frequency(&uncore->gt->rps); + before = ktime_get(); + for (count = 0; count < GUC_LOAD_RETRY_LIMIT; count++) { + ret = wait_for(guc_load_done(uncore, &status, &success), 1000); + if (!ret || !success) + break; + + guc_dbg(guc, "load still in progress, count = %d, freq = %dMHz\n", + count, intel_rps_read_actual_frequency(&uncore->gt->rps)); + } + after = ktime_get(); + delta = ktime_sub(after, before); + delta_ms = ktime_to_ms(delta); if (ret || !success) { u32 ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, status); u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status); - guc_info(guc, "load failed: status = 0x%08X, ret = %d\n", status, ret); + guc_info(guc, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz, ret = %d\n", + status, delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps), ret); guc_info(guc, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n", REG_FIELD_GET(GS_MIA_IN_RESET, status), bootrom, ukernel, @@ -206,6 +242,14 @@ static int guc_wait_ucode(struct intel_guc *guc) /* Uncommon/unexpected error, see earlier status code print for details */ if (ret == 0) ret = -ENXIO; + } else if (delta_ms > 200) { + guc_warn(guc, "excessive init time: %lldms! [freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d]\n", + delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps), + before_freq, status, count, ret); + } else { + guc_dbg(guc, "init took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n", + delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps), + before_freq, status, count, ret); } return ret; -- cgit v1.2.3 From 2810ac6c753d17ee2572ffb57fe2382a786a080a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Mar 2023 15:58:51 -0700 Subject: drm/i915/perf: Drop wakeref on GuC RC error If we fail to adjust the GuC run-control on opening the perf stream, make sure we unwind the wakeref just taken. v2: Retain old goto label names (Ashutosh) v3: Drop bitfield boolean Fixes: 01e742746785 ("drm/i915/guc: Support OA when Wa_16011777198 is enabled") Signed-off-by: Chris Wilson Reviewed-by: Ashutosh Dixit Signed-off-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20230323225901.3743681-2-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 14 +++++++++----- drivers/gpu/drm/i915/i915_perf_types.h | 6 ++++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 824a34ec0b83..283a4a3c6862 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1592,9 +1592,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) /* * Wa_16011777198:dg2: Unset the override of GUCRC mode to enable rc6. */ - if (intel_uc_uses_guc_rc(>->uc) && - (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))) + if (stream->override_gucrc) drm_WARN_ON(>->i915->drm, intel_guc_slpc_unset_gucrc_mode(>->uc.guc.slpc)); @@ -3305,8 +3303,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, if (ret) { drm_dbg(&stream->perf->i915->drm, "Unable to override gucrc mode\n"); - goto err_config; + goto err_gucrc; } + + stream->override_gucrc = true; } ret = alloc_oa_buffer(stream); @@ -3345,11 +3345,15 @@ err_enable: free_oa_buffer(stream); err_oa_buf_alloc: - free_oa_configs(stream); + if (stream->override_gucrc) + intel_guc_slpc_unset_gucrc_mode(>->uc.guc.slpc); +err_gucrc: intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); intel_engine_pm_put(stream->engine); + free_oa_configs(stream); + err_config: free_noa_wait(stream); diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index ca150b7af3f2..4d5d8c365d9e 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -316,6 +316,12 @@ struct i915_perf_stream { * buffer should be checked for available data. */ u64 poll_oa_period; + + /** + * @override_gucrc: GuC RC has been overridden for the perf stream, + * and we need to restore the default configuration on release. + */ + bool override_gucrc; }; /** -- cgit v1.2.3 From 3735040978a43c25a19aa8015ab1a50dffe48f79 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Thu, 23 Mar 2023 15:58:52 -0700 Subject: drm/i915/mtl: Synchronize i915/BIOS on C6 enabling If BIOS enables/disables C6, i915 should do the same. Also, retain this value across driver reloads. This is needed only for MTL as of now due to an existing bug in OA which needs C6 disabled for it to function. BIOS behavior is also different across platforms in terms of how C6 is enabled. Signed-off-by: Vinay Belgaumkar Reviewed-by: Ashutosh Dixit Signed-off-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20230323225901.3743681-3-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/gt/intel_rc6.c | 26 +++++++++++++++++++++++--- drivers/gpu/drm/i915/gt/intel_rc6.h | 2 ++ drivers/gpu/drm/i915/gt/intel_rc6_types.h | 2 ++ 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 6184fcc16987..fe788014b6fa 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -420,6 +420,21 @@ static void vlv_rc6_enable(struct intel_rc6 *rc6) GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; } +bool intel_check_bios_c6_setup(struct intel_rc6 *rc6) +{ + if (!rc6->bios_state_captured) { + struct intel_uncore *uncore = rc6_to_uncore(rc6); + intel_wakeref_t wakeref; + + with_intel_runtime_pm(uncore->rpm, wakeref) + rc6->bios_rc_state = intel_uncore_read(uncore, GEN6_RC_STATE); + + rc6->bios_state_captured = true; + } + + return rc6->bios_rc_state & RC_SW_TARGET_STATE_MASK; +} + static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6) { struct intel_uncore *uncore = rc6_to_uncore(rc6); @@ -503,10 +518,10 @@ static bool rc6_supported(struct intel_rc6 *rc6) return false; } - if (IS_MTL_MEDIA_STEP(gt->i915, STEP_A0, STEP_B0) && - gt->type == GT_MEDIA) { + if (IS_METEORLAKE(gt->i915) && + !intel_check_bios_c6_setup(rc6)) { drm_notice(&i915->drm, - "Media RC6 disabled on A step\n"); + "C6 disabled by BIOS\n"); return false; } @@ -707,9 +722,14 @@ void intel_rc6_disable(struct intel_rc6 *rc6) void intel_rc6_fini(struct intel_rc6 *rc6) { struct drm_i915_gem_object *pctx; + struct intel_uncore *uncore = rc6_to_uncore(rc6); intel_rc6_disable(rc6); + /* We want the BIOS C6 state preserved across loads for MTL */ + if (IS_METEORLAKE(rc6_to_i915(rc6)) && rc6->bios_state_captured) + set(uncore, GEN6_RC_STATE, rc6->bios_rc_state); + pctx = fetch_and_zero(&rc6->pctx); if (pctx) i915_gem_object_put(pctx); diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h index 456fa668a276..e137c2c397c2 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.h +++ b/drivers/gpu/drm/i915/gt/intel_rc6.h @@ -27,4 +27,6 @@ u64 intel_rc6_residency_us(struct intel_rc6 *rc6, enum intel_rc6_res_type id); void intel_rc6_print_residency(struct seq_file *m, const char *title, enum intel_rc6_res_type id); +bool intel_check_bios_c6_setup(struct intel_rc6 *rc6); + #endif /* INTEL_RC6_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h index fa23c4dce00b..cd4587098162 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6_types.h +++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h @@ -29,6 +29,7 @@ struct intel_rc6 { u64 cur_residency[INTEL_RC6_RES_MAX]; u32 ctl_enable; + u32 bios_rc_state; struct drm_i915_gem_object *pctx; @@ -36,6 +37,7 @@ struct intel_rc6 { bool enabled : 1; bool manual : 1; bool wakeref : 1; + bool bios_state_captured : 1; }; #endif /* INTEL_RC6_TYPES_H */ -- cgit v1.2.3 From 9919d119fbbc913c2459b093eb81fe8197906424 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 23 Mar 2023 15:58:53 -0700 Subject: drm/i915/perf: Validate OA sseu config outside switch Once OA supports media engine class:instance, the engine can only be validated outside the switch since class and instance parameters are separate entities. Since OA sseu config depends on engine class:instance, validate OA sseu config outside the switch. v2: (Ashutosh) - Clarify commit message - Use drm_dbg instead of DRM_DEBUG - Reorder stack variables Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20230323225901.3743681-4-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 283a4a3c6862..ebd76b5c9712 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3940,7 +3940,9 @@ static int read_properties_unlocked(struct i915_perf *perf, u32 n_props, struct perf_open_properties *props) { + struct drm_i915_gem_context_param_sseu user_sseu; u64 __user *uprop = uprops; + bool config_sseu = false; u32 i; int ret; @@ -4069,8 +4071,6 @@ static int read_properties_unlocked(struct i915_perf *perf, props->hold_preemption = !!value; break; case DRM_I915_PERF_PROP_GLOBAL_SSEU: { - struct drm_i915_gem_context_param_sseu user_sseu; - if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 50)) { drm_dbg(&perf->i915->drm, "SSEU config not supported on gfx %x\n", @@ -4085,14 +4085,7 @@ static int read_properties_unlocked(struct i915_perf *perf, "Unable to copy global sseu parameter\n"); return -EFAULT; } - - ret = get_sseu_config(&props->sseu, props->engine, &user_sseu); - if (ret) { - drm_dbg(&perf->i915->drm, - "Invalid SSEU configuration\n"); - return ret; - } - props->has_sseu = true; + config_sseu = true; break; } case DRM_I915_PERF_PROP_POLL_OA_PERIOD: @@ -4112,6 +4105,16 @@ static int read_properties_unlocked(struct i915_perf *perf, uprop += 2; } + if (config_sseu) { + ret = get_sseu_config(&props->sseu, props->engine, &user_sseu); + if (ret) { + drm_dbg(&perf->i915->drm, + "Invalid SSEU configuration\n"); + return ret; + } + props->has_sseu = true; + } + return 0; } -- cgit v1.2.3 From 5f284e9c5aab5b12eb48a2cecc7a573c3b4e1cb4 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 23 Mar 2023 15:58:54 -0700 Subject: drm/i915/perf: Group engines into respective OA groups Now that we may have multiple OA units in a single GT as well as on separate GTs, create an engine group that maps to a single OA unit. v2: (Jani) - Drop warning on ENOMEM - Reorder patch in the series v3: (Ashutosh) - Remove unused members from perf structs - Update comments - Update engine_supports_oa check - Just return 1 in num_perf_groups_per_gt for now - Set engine->oa_group to NULL to begin with v4: Use engine_supports_oa() check in oa_init_reg_state (Ashutosh) v5: Rebase after dropping engine_supports_oa helper Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20230323225901.3743681-5-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_types.h | 10 +++ drivers/gpu/drm/i915/gt/intel_sseu.c | 3 +- drivers/gpu/drm/i915/i915_perf.c | 93 +++++++++++++++++++++++++--- drivers/gpu/drm/i915/i915_perf_types.h | 33 ++++++++-- 4 files changed, 127 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 0a071e5da1a8..960291f88fd6 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -53,6 +53,8 @@ struct intel_gt; struct intel_ring; struct intel_uncore; struct intel_breadcrumbs; +struct intel_engine_cs; +struct i915_perf_group; typedef u32 intel_engine_mask_t; #define ALL_ENGINES ((intel_engine_mask_t)~0ul) @@ -617,6 +619,14 @@ struct intel_engine_cs { } props, defaults; I915_SELFTEST_DECLARE(struct fault_attr reset_timeout); + + /* + * The perf group maps to one OA unit which controls one OA buffer. All + * reports corresponding to this engine will be reported to this OA + * buffer. An engine will map to a single OA unit, but a single OA unit + * can generate reports for multiple engines. + */ + struct i915_perf_group *oa_group; }; static inline bool diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 6c6198a257ac..1141f875f5bd 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -6,6 +6,7 @@ #include #include "i915_drv.h" +#include "i915_perf_types.h" #include "intel_engine_regs.h" #include "intel_gt_regs.h" #include "intel_sseu.h" @@ -677,7 +678,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt, * If i915/perf is active, we want a stable powergating configuration * on the system. Use the configuration pinned by i915/perf. */ - if (gt->perf.exclusive_stream) + if (gt->perf.group && gt->perf.group[PERF_GROUP_OAG].exclusive_stream) req_sseu = >->perf.sseu; slices = hweight8(req_sseu->slice_mask); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index ebd76b5c9712..cf4ea5e389a5 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1570,12 +1570,18 @@ free_noa_wait(struct i915_perf_stream *stream) i915_vma_unpin_and_release(&stream->noa_wait, 0); } +static bool engine_supports_oa(const struct intel_engine_cs *engine) +{ + return engine->oa_group; +} + static void i915_oa_stream_destroy(struct i915_perf_stream *stream) { struct i915_perf *perf = stream->perf; struct intel_gt *gt = stream->engine->gt; + struct i915_perf_group *g = stream->engine->oa_group; - if (WARN_ON(stream != gt->perf.exclusive_stream)) + if (WARN_ON(stream != g->exclusive_stream)) return; /* @@ -1584,7 +1590,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) * * See i915_oa_init_reg_state() and lrc_configure_all_contexts() */ - WRITE_ONCE(gt->perf.exclusive_stream, NULL); + WRITE_ONCE(g->exclusive_stream, NULL); perf->ops.disable_metric_set(stream); free_oa_buffer(stream); @@ -3182,6 +3188,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, { struct drm_i915_private *i915 = stream->perf->i915; struct i915_perf *perf = stream->perf; + struct i915_perf_group *g; struct intel_gt *gt; int ret; @@ -3191,6 +3198,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return -EINVAL; } gt = props->engine->gt; + g = props->engine->oa_group; /* * If the sysfs metrics/ directory wasn't registered for some @@ -3221,7 +3229,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, * counter reports and marshal to the appropriate client * we currently only allow exclusive access */ - if (gt->perf.exclusive_stream) { + if (g->exclusive_stream) { drm_dbg(&stream->perf->i915->drm, "OA unit already in use\n"); return -EBUSY; @@ -3316,7 +3324,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, stream->ops = &i915_oa_stream_ops; stream->engine->gt->perf.sseu = props->sseu; - WRITE_ONCE(gt->perf.exclusive_stream, stream); + WRITE_ONCE(g->exclusive_stream, stream); ret = i915_perf_stream_enable_sync(stream); if (ret) { @@ -3339,7 +3347,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return 0; err_enable: - WRITE_ONCE(gt->perf.exclusive_stream, NULL); + WRITE_ONCE(g->exclusive_stream, NULL); perf->ops.disable_metric_set(stream); free_oa_buffer(stream); @@ -3373,7 +3381,7 @@ void i915_oa_init_reg_state(const struct intel_context *ce, return; /* perf.exclusive_stream serialised by lrc_configure_all_contexts() */ - stream = READ_ONCE(engine->gt->perf.exclusive_stream); + stream = READ_ONCE(engine->oa_group->exclusive_stream); if (stream && GRAPHICS_VER(stream->perf->i915) < 12) gen8_update_reg_state_unlocked(ce, stream); } @@ -3965,6 +3973,13 @@ static int read_properties_unlocked(struct i915_perf *perf, return -EINVAL; } + if (!engine_supports_oa(props->engine)) { + drm_dbg(&perf->i915->drm, + "Engine not supported by OA %d:%d\n", + I915_ENGINE_CLASS_RENDER, 0); + return -EINVAL; + } + /* Considering that ID = 0 is reserved and assuming that we don't * (currently) expect any configurations to ever specify duplicate * values for a particular property ID then the last _PROP_MAX value is @@ -4743,6 +4758,60 @@ static struct ctl_table oa_table[] = { {} }; +static u32 num_perf_groups_per_gt(struct intel_gt *gt) +{ + return 1; +} + +static u32 __oa_engine_group(struct intel_engine_cs *engine) +{ + if (engine->class == RENDER_CLASS) + return PERF_GROUP_OAG; + else + return PERF_GROUP_INVALID; +} + +static int oa_init_gt(struct intel_gt *gt) +{ + u32 num_groups = num_perf_groups_per_gt(gt); + struct intel_engine_cs *engine; + struct i915_perf_group *g; + intel_engine_mask_t tmp; + + g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL); + if (!g) + return -ENOMEM; + + for_each_engine_masked(engine, gt, ALL_ENGINES, tmp) { + u32 index = __oa_engine_group(engine); + + engine->oa_group = NULL; + if (index < num_groups) { + g[index].num_engines++; + engine->oa_group = &g[index]; + } + } + + gt->perf.num_perf_groups = num_groups; + gt->perf.group = g; + + return 0; +} + +static int oa_init_engine_groups(struct i915_perf *perf) +{ + struct intel_gt *gt; + int i, ret; + + for_each_gt(gt, perf->i915, i) { + ret = oa_init_gt(gt); + if (ret) + return ret; + } + + return 0; +} + static void oa_init_supported_formats(struct i915_perf *perf) { struct drm_i915_private *i915 = perf->i915; @@ -4909,7 +4978,7 @@ void i915_perf_init(struct drm_i915_private *i915) if (perf->ops.enable_metric_set) { struct intel_gt *gt; - int i; + int i, ret; for_each_gt(gt, i915, i) mutex_init(>->perf.lock); @@ -4948,6 +5017,11 @@ void i915_perf_init(struct drm_i915_private *i915) perf->i915 = i915; + ret = oa_init_engine_groups(perf); + if (ret) + drm_err(&i915->drm, + "OA initialization failed %d\n", ret); + oa_init_supported_formats(perf); } } @@ -4976,10 +5050,15 @@ void i915_perf_sysctl_unregister(void) void i915_perf_fini(struct drm_i915_private *i915) { struct i915_perf *perf = &i915->perf; + struct intel_gt *gt; + int i; if (!perf->i915) return; + for_each_gt(gt, perf->i915, i) + kfree(gt->perf.group); + idr_for_each(&perf->metrics_idr, destroy_config, perf); idr_destroy(&perf->metrics_idr); diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index 4d5d8c365d9e..e7c83dcbec08 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -17,6 +17,7 @@ #include #include +#include "gt/intel_engine_types.h" #include "gt/intel_sseu.h" #include "i915_reg_defs.h" #include "intel_wakeref.h" @@ -30,6 +31,13 @@ struct i915_vma; struct intel_context; struct intel_engine_cs; +enum { + PERF_GROUP_OAG = 0, + + PERF_GROUP_MAX, + PERF_GROUP_INVALID = U32_MAX, +}; + struct i915_oa_format { u32 format; int size; @@ -390,6 +398,20 @@ struct i915_oa_ops { u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); }; +struct i915_perf_group { + /* + * @exclusive_stream: The stream currently using the OA unit. This is + * sometimes accessed outside a syscall associated to its file + * descriptor. + */ + struct i915_perf_stream *exclusive_stream; + + /* + * @num_engines: The number of engines using this OA unit. + */ + u32 num_engines; +}; + struct i915_perf_gt { /* * Lock associated with anything below within this structure. @@ -402,12 +424,15 @@ struct i915_perf_gt { */ struct intel_sseu sseu; + /** + * @num_perf_groups: number of perf groups per gt. + */ + u32 num_perf_groups; + /* - * @exclusive_stream: The stream currently using the OA unit. This is - * sometimes accessed outside a syscall associated to its file - * descriptor. + * @group: list of OA groups - one for each OA buffer. */ - struct i915_perf_stream *exclusive_stream; + struct i915_perf_group *group; }; struct i915_perf { -- cgit v1.2.3 From 772a5803922a097eaf94cf865c6f4a81416aedb8 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 23 Mar 2023 15:58:55 -0700 Subject: drm/i915/perf: Fail modprobe if i915_perf_init fails on OOM i915_perf_init can fail due to OOM. Fail driver init if i915_perf_init fails. v2: (Jani) - Reorder patch in the series Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20230323225901.3743681-6-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_driver.c | 4 +++- drivers/gpu/drm/i915/i915_perf.c | 8 ++++++-- drivers/gpu/drm/i915/i915_perf.h | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 19983b77118d..20b88cf226e7 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -610,7 +610,9 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) if (ret) return ret; - i915_perf_init(dev_priv); + ret = i915_perf_init(dev_priv); + if (ret) + return ret; ret = i915_ggtt_probe_hw(dev_priv); if (ret) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index cf4ea5e389a5..584e3e7b9e77 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4902,7 +4902,7 @@ static void i915_perf_init_info(struct drm_i915_private *i915) * Note: i915-perf initialization is split into an 'init' and 'register' * phase with the i915_perf_register() exposing state to userspace. */ -void i915_perf_init(struct drm_i915_private *i915) +int i915_perf_init(struct drm_i915_private *i915) { struct i915_perf *perf = &i915->perf; @@ -5018,12 +5018,16 @@ void i915_perf_init(struct drm_i915_private *i915) perf->i915 = i915; ret = oa_init_engine_groups(perf); - if (ret) + if (ret) { drm_err(&i915->drm, "OA initialization failed %d\n", ret); + return ret; + } oa_init_supported_formats(perf); } + + return 0; } static int destroy_config(int id, void *p, void *data) diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h index f96e09a4af04..253637651d5e 100644 --- a/drivers/gpu/drm/i915/i915_perf.h +++ b/drivers/gpu/drm/i915/i915_perf.h @@ -18,7 +18,7 @@ struct i915_oa_config; struct intel_context; struct intel_engine_cs; -void i915_perf_init(struct drm_i915_private *i915); +int i915_perf_init(struct drm_i915_private *i915); void i915_perf_fini(struct drm_i915_private *i915); void i915_perf_register(struct drm_i915_private *i915); void i915_perf_unregister(struct drm_i915_private *i915); -- cgit v1.2.3 From dbc9a5fb168deb140722c12d8332b25754def017 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 23 Mar 2023 15:58:56 -0700 Subject: drm/i915/perf: Parse 64bit report header formats correctly Now that OA formats come in flavor of 64 bit reports, the report header has 64 bit report-id, timestamp, context-id and gpu-ticks fields. When filtering these reports, use the right width for these fields. Note that upper dword of context id is reserved, so squash lower dword only. v2: (Ashutosh) - Drop inline - Update comment with dword definitions - report id and timestamp Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20230323225901.3743681-7-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 109 +++++++++++++++++++++++++-------- drivers/gpu/drm/i915/i915_perf_types.h | 6 ++ 2 files changed, 90 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 584e3e7b9e77..d1f7266bec6d 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -441,6 +441,67 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) return oastatus1 & GEN7_OASTATUS1_TAIL_MASK; } +#define oa_report_header_64bit(__s) \ + ((__s)->oa_buffer.format->header == HDR_64_BIT) + +static u64 oa_report_id(struct i915_perf_stream *stream, void *report) +{ + return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; +} + +static u64 oa_report_reason(struct i915_perf_stream *stream, void *report) +{ + return (oa_report_id(stream, report) >> OAREPORT_REASON_SHIFT) & + (GRAPHICS_VER(stream->perf->i915) == 12 ? + OAREPORT_REASON_MASK_EXTENDED : + OAREPORT_REASON_MASK); +} + +static void oa_report_id_clear(struct i915_perf_stream *stream, u32 *report) +{ + if (oa_report_header_64bit(stream)) + *(u64 *)report = 0; + else + *report = 0; +} + +static bool oa_report_ctx_invalid(struct i915_perf_stream *stream, void *report) +{ + return !(oa_report_id(stream, report) & + stream->perf->gen8_valid_ctx_bit) && + GRAPHICS_VER(stream->perf->i915) <= 11; +} + +static u64 oa_timestamp(struct i915_perf_stream *stream, void *report) +{ + return oa_report_header_64bit(stream) ? + *((u64 *)report + 1) : + *((u32 *)report + 1); +} + +static void oa_timestamp_clear(struct i915_perf_stream *stream, u32 *report) +{ + if (oa_report_header_64bit(stream)) + *(u64 *)&report[2] = 0; + else + report[1] = 0; +} + +static u32 oa_context_id(struct i915_perf_stream *stream, u32 *report) +{ + u32 ctx_id = oa_report_header_64bit(stream) ? report[4] : report[2]; + + return ctx_id & stream->specific_ctx_id_mask; +} + +static void oa_context_id_squash(struct i915_perf_stream *stream, u32 *report) +{ + if (oa_report_header_64bit(stream)) + report[4] = INVALID_CTX_ID; + else + report[2] = INVALID_CTX_ID; +} + /** * oa_buffer_check_unlocked - check for data and update tail ptr state * @stream: i915 stream instance @@ -509,21 +570,22 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) hw_tail -= gtt_offset; tail = hw_tail; - /* Walk the stream backward until we find a report with dword 0 - * & 1 not at 0. Since the circular buffer pointers progress by - * increments of 64 bytes and that reports can be up to 256 - * bytes long, we can't tell whether a report has fully landed - * in memory before the first 2 dwords of the following report - * have effectively landed. + /* Walk the stream backward until we find a report with report + * id and timestmap not at 0. Since the circular buffer pointers + * progress by increments of 64 bytes and that reports can be up + * to 256 bytes long, we can't tell whether a report has fully + * landed in memory before the report id and timestamp of the + * following report have effectively landed. * * This is assuming that the writes of the OA unit land in * memory in the order they were written to. * If not : (╯°□°)╯︵ ┻━┻ */ while (OA_TAKEN(tail, aged_tail) >= report_size) { - u32 *report32 = (void *)(stream->oa_buffer.vaddr + tail); + void *report = stream->oa_buffer.vaddr + tail; - if (report32[0] != 0 || report32[1] != 0) + if (oa_report_id(stream, report) || + oa_timestamp(stream, report)) break; tail = (tail - report_size) & (OA_BUFFER_SIZE - 1); @@ -702,7 +764,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, u8 *report = oa_buf_base + head; u32 *report32 = (void *)report; u32 ctx_id; - u32 reason; + u64 reason; /* * All the report sizes factor neatly into the buffer @@ -725,16 +787,12 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * triggered this specific report (mostly timer * triggered or e.g. due to a context switch). * - * This field is never expected to be zero so we can - * check that the report isn't invalid before copying - * it to userspace... + * In MMIO triggered reports, some platforms do not set the + * reason bit in this field and it is valid to have a reason + * field of zero. */ - reason = ((report32[0] >> OAREPORT_REASON_SHIFT) & - (GRAPHICS_VER(stream->perf->i915) == 12 ? - OAREPORT_REASON_MASK_EXTENDED : - OAREPORT_REASON_MASK)); - - ctx_id = report32[2] & stream->specific_ctx_id_mask; + reason = oa_report_reason(stream, report); + ctx_id = oa_context_id(stream, report32); /* * Squash whatever is in the CTX_ID field if it's marked as @@ -744,9 +802,10 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * Note: that we don't clear the valid_ctx_bit so userspace can * understand that the ID has been squashed by the kernel. */ - if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) && - GRAPHICS_VER(stream->perf->i915) <= 11) - ctx_id = report32[2] = INVALID_CTX_ID; + if (oa_report_ctx_invalid(stream, report)) { + ctx_id = INVALID_CTX_ID; + oa_context_id_squash(stream, report32); + } /* * NB: For Gen 8 the OA unit no longer supports clock gating @@ -790,7 +849,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, */ if (stream->ctx && stream->specific_ctx_id != ctx_id) { - report32[2] = INVALID_CTX_ID; + oa_context_id_squash(stream, report32); } ret = append_oa_sample(stream, buf, count, offset, @@ -802,11 +861,11 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, } /* - * Clear out the first 2 dword as a mean to detect unlanded + * Clear out the report id and timestamp as a means to detect unlanded * reports. */ - report32[0] = 0; - report32[1] = 0; + oa_report_id_clear(stream, report32); + oa_timestamp_clear(stream, report32); } if (start_offset != *offset) { diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index e7c83dcbec08..d8b92508a632 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -38,9 +38,15 @@ enum { PERF_GROUP_INVALID = U32_MAX, }; +enum report_header { + HDR_32_BIT = 0, + HDR_64_BIT, +}; + struct i915_oa_format { u32 format; int size; + enum report_header header; }; struct i915_oa_reg { -- cgit v1.2.3 From 3c67ce061b2136e0578734bae82fdf85c2c76217 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 23 Mar 2023 15:58:57 -0700 Subject: drm/i915/perf: Handle non-power-of-2 reports Some of the newer OA formats are not powers of 2. For those formats, adjust the hw_tail accordingly when checking for new reports. v2: (Ashutosh) - Switch to OA_TAKEN for diff calculation - Use OA_BUFFER_SIZE instead of the vma size - Update comments Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20230323225901.3743681-8-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 51 +++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index d1f7266bec6d..8c4bcdce8019 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -534,6 +534,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) bool pollin; u32 hw_tail; u64 now; + u32 partial_report_size; /* We have to consider the (unlikely) possibility that read() errors * could result in an OA buffer reset which might reset the head and @@ -543,10 +544,15 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) hw_tail = stream->perf->ops.oa_hw_tail_read(stream); - /* The tail pointer increases in 64 byte increments, - * not in report_size steps... + /* The tail pointer increases in 64 byte increments, not in report_size + * steps. Also the report size may not be a power of 2. Compute + * potentially partially landed report in the OA buffer */ - hw_tail &= ~(report_size - 1); + partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail); + partial_report_size %= report_size; + + /* Subtract partial amount off the tail */ + hw_tail = gtt_offset + OA_TAKEN(hw_tail, partial_report_size); now = ktime_get_mono_fast_ns(); @@ -669,6 +675,8 @@ static int append_oa_sample(struct i915_perf_stream *stream, { int report_size = stream->oa_buffer.format->size; struct drm_i915_perf_record_header header; + int report_size_partial; + u8 *oa_buf_end; header.type = DRM_I915_PERF_RECORD_SAMPLE; header.pad = 0; @@ -682,8 +690,20 @@ static int append_oa_sample(struct i915_perf_stream *stream, return -EFAULT; buf += sizeof(header); - if (copy_to_user(buf, report, report_size)) + oa_buf_end = stream->oa_buffer.vaddr + OA_BUFFER_SIZE; + report_size_partial = oa_buf_end - report; + + if (report_size_partial < report_size) { + if (copy_to_user(buf, report, report_size_partial)) + return -EFAULT; + buf += report_size_partial; + + if (copy_to_user(buf, stream->oa_buffer.vaddr, + report_size - report_size_partial)) + return -EFAULT; + } else if (copy_to_user(buf, report, report_size)) { return -EFAULT; + } (*offset) += header.size; @@ -747,12 +767,11 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * An out of bounds or misaligned head or tail pointer implies a driver * bug since we validate + align the tail pointers we read from the * hardware and we are in full control of the head pointer which should - * only be incremented by multiples of the report size (notably also - * all a power of two). + * only be incremented by multiples of the report size. */ if (drm_WARN_ONCE(&uncore->i915->drm, - head > OA_BUFFER_SIZE || head % report_size || - tail > OA_BUFFER_SIZE || tail % report_size, + head > OA_BUFFER_SIZE || + tail > OA_BUFFER_SIZE, "Inconsistent OA buffer pointers: head = %u, tail = %u\n", head, tail)) return -EIO; @@ -766,22 +785,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, u32 ctx_id; u64 reason; - /* - * All the report sizes factor neatly into the buffer - * size so we never expect to see a report split - * between the beginning and end of the buffer. - * - * Given the initial alignment check a misalignment - * here would imply a driver bug that would result - * in an overrun. - */ - if (drm_WARN_ON(&uncore->i915->drm, - (OA_BUFFER_SIZE - head) < report_size)) { - drm_err(&uncore->i915->drm, - "Spurious OA head ptr: non-integral report offset\n"); - break; - } - /* * The reason field includes flags identifying what * triggered this specific report (mostly timer -- cgit v1.2.3 From c61d04c9eb4354980839cf938488ca703eba0f83 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 23 Mar 2023 15:58:58 -0700 Subject: drm/i915/perf: Add engine class instance parameters to perf One or more engines map to a specific OA unit. All reports from these engines are captured in the OA buffer managed by this OA unit. Current i915 OA implementation supports only the OAG unit. OAG primarily caters to render engine, so i915 OA uses render as the default engine in the OA implementation. Since there are more OA units on newer hardware that map to other engines, allow user to pass engine class and instance to select and program specific OA units. UMD specific changes for GPUvis support: https://patchwork.freedesktop.org/patch/522827/?series=114023 https://patchwork.freedesktop.org/patch/522822/?series=114023 https://patchwork.freedesktop.org/patch/522826/?series=114023 https://patchwork.freedesktop.org/patch/522828/?series=114023 https://patchwork.freedesktop.org/patch/522816/?series=114023 https://patchwork.freedesktop.org/patch/522825/?series=114023 v2: (Ashutosh) - Clarify commit message - Add drm_dbg - Clarify uapi description v3: (Ashutosh) - Remove irrelevant info from the uapi comment v4: Ensure engine class:instance is passed together (Ashutosh) v5: Remove unnecessary quote (Ashutosh) Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20230323225901.3743681-9-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 71 +++++++++++++++++++++++++--------------- include/uapi/drm/i915_drm.h | 19 +++++++++++ 2 files changed, 63 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 8c4bcdce8019..081d1dd743e0 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4012,48 +4012,32 @@ static int read_properties_unlocked(struct i915_perf *perf, { struct drm_i915_gem_context_param_sseu user_sseu; u64 __user *uprop = uprops; + bool config_instance = false; + bool config_class = false; bool config_sseu = false; + u8 class, instance; u32 i; int ret; memset(props, 0, sizeof(struct perf_open_properties)); props->poll_oa_period = DEFAULT_POLL_PERIOD_NS; - if (!n_props) { - drm_dbg(&perf->i915->drm, - "No i915 perf properties given\n"); - return -EINVAL; - } - - /* At the moment we only support using i915-perf on the RCS. */ - props->engine = intel_engine_lookup_user(perf->i915, - I915_ENGINE_CLASS_RENDER, - 0); - if (!props->engine) { - drm_dbg(&perf->i915->drm, - "No RENDER-capable engines\n"); - return -EINVAL; - } - - if (!engine_supports_oa(props->engine)) { - drm_dbg(&perf->i915->drm, - "Engine not supported by OA %d:%d\n", - I915_ENGINE_CLASS_RENDER, 0); - return -EINVAL; - } - /* Considering that ID = 0 is reserved and assuming that we don't * (currently) expect any configurations to ever specify duplicate * values for a particular property ID then the last _PROP_MAX value is * one greater than the maximum number of properties we expect to get * from userspace. */ - if (n_props >= DRM_I915_PERF_PROP_MAX) { + if (!n_props || n_props >= DRM_I915_PERF_PROP_MAX) { drm_dbg(&perf->i915->drm, - "More i915 perf properties specified than exist\n"); + "Invalid number of i915 perf properties given\n"); return -EINVAL; } + /* Defaults when class:instance is not passed */ + class = I915_ENGINE_CLASS_RENDER; + instance = 0; + for (i = 0; i < n_props; i++) { u64 oa_period, oa_freq_hz; u64 id, value; @@ -4174,7 +4158,15 @@ static int read_properties_unlocked(struct i915_perf *perf, } props->poll_oa_period = value; break; - case DRM_I915_PERF_PROP_MAX: + case DRM_I915_PERF_PROP_OA_ENGINE_CLASS: + class = (u8)value; + config_class = true; + break; + case DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE: + instance = (u8)value; + config_instance = true; + break; + default: MISSING_CASE(id); return -EINVAL; } @@ -4182,6 +4174,28 @@ static int read_properties_unlocked(struct i915_perf *perf, uprop += 2; } + if ((config_class && !config_instance) || + (config_instance && !config_class)) { + drm_dbg(&perf->i915->drm, + "OA engine-class and engine-instance parameters must be passed together\n"); + return -EINVAL; + } + + props->engine = intel_engine_lookup_user(perf->i915, class, instance); + if (!props->engine) { + drm_dbg(&perf->i915->drm, + "OA engine class and instance invalid %d:%d\n", + class, instance); + return -EINVAL; + } + + if (!engine_supports_oa(props->engine)) { + drm_dbg(&perf->i915->drm, + "Engine not supported by OA %d:%d\n", + class, instance); + return -EINVAL; + } + if (config_sseu) { ret = get_sseu_config(&props->sseu, props->engine, &user_sseu); if (ret) { @@ -5158,8 +5172,11 @@ int i915_perf_ioctl_version(void) * * 5: Add DRM_I915_PERF_PROP_POLL_OA_PERIOD parameter that controls the * interval for the hrtimer used to check for OA data. + * + * 6: Add DRM_I915_PERF_PROP_OA_ENGINE_CLASS and + * DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE */ - return 5; + return 6; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 5e458d6f2895..384e74cac539 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2758,6 +2758,25 @@ enum drm_i915_perf_property_id { */ DRM_I915_PERF_PROP_POLL_OA_PERIOD, + /** + * Multiple engines may be mapped to the same OA unit. The OA unit is + * identified by class:instance of any engine mapped to it. + * + * This parameter specifies the engine class and must be passed along + * with DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE. + * + * This property is available in perf revision 6. + */ + DRM_I915_PERF_PROP_OA_ENGINE_CLASS, + + /** + * This parameter specifies the engine instance and must be passed along + * with DRM_I915_PERF_PROP_OA_ENGINE_CLASS. + * + * This property is available in perf revision 6. + */ + DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE, + DRM_I915_PERF_PROP_MAX /* non-ABI */ }; -- cgit v1.2.3 From 1cc064dce4ed0ff111b6d6cb06b3cccf1cba29f5 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 23 Mar 2023 15:58:59 -0700 Subject: drm/i915/perf: Add support for OA media units MTL introduces additional OA units dedicated to media use cases. Add support for programming these OA units by passing the media engine class and instance parameters. UMD specific changes for GPUvis support: https://patchwork.freedesktop.org/patch/522827/?series=114023 https://patchwork.freedesktop.org/patch/522822/?series=114023 https://patchwork.freedesktop.org/patch/522826/?series=114023 https://patchwork.freedesktop.org/patch/522828/?series=114023 https://patchwork.freedesktop.org/patch/522816/?series=114023 https://patchwork.freedesktop.org/patch/522825/?series=114023 v2: (Ashutosh) - check for IP_VER(12, 70) instead of MTL - remove PERF_GROUP_OAG comment in mtl_oa_base - remove oa_buffer.group - use engine->oa_group->type in engine_supports_oa_format - remove fw_domains and use FORCEWAKE_ALL - remove MPES/MPEC comment - s/xehp/mtl/ in b counter validation function name - remove engine_supports_oa in __oa_engine_group - remove warn_ON from __oam_engine_group - refactor oa_init_groups and oa_init_regs - assign g->type correctly - use enum oa_type definition v3: (Ashutosh) - Drop oa_unit_functional as engine_supports_oa is enough v4: - s/DRM_DEBUG/drm_dbg/ Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20230323225901.3743681-10-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/i915_perf.c | 184 +++++++++++++++++++++++++++---- drivers/gpu/drm/i915/i915_perf_oa_regs.h | 78 +++++++++++++ drivers/gpu/drm/i915/i915_perf_types.h | 30 +++++ drivers/gpu/drm/i915/intel_device_info.h | 1 + include/uapi/drm/i915_drm.h | 4 + 7 files changed, 278 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eaf738698149..d796bdf41af9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -905,6 +905,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, (INTEL_INFO(dev_priv)->has_oa_bpc_reporting) #define HAS_OA_SLICE_CONTRIB_LIMITS(dev_priv) \ (INTEL_INFO(dev_priv)->has_oa_slice_contrib_limits) +#define HAS_OAM(dev_priv) \ + (INTEL_INFO(dev_priv)->has_oam) /* * Set this flag, when platform requires 64K GTT page sizes or larger for diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 6b4a66734e09..c3231e323b98 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1027,6 +1027,7 @@ static const struct intel_device_info adl_p_info = { .has_mslice_steering = 1, \ .has_oa_bpc_reporting = 1, \ .has_oa_slice_contrib_limits = 1, \ + .has_oam = 1, \ .has_rc6 = 1, \ .has_reset_engine = 1, \ .has_rps = 1, \ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 081d1dd743e0..247dad57ab89 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -192,6 +192,7 @@ */ #include +#include #include #include @@ -326,6 +327,12 @@ static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = { [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, [I915_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, [I915_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256 }, + [I915_OAM_FORMAT_MPEC8u64_B8_C8] = { 1, 192, TYPE_OAM, HDR_64_BIT }, + [I915_OAM_FORMAT_MPEC8u32_B8_C8] = { 2, 128, TYPE_OAM, HDR_64_BIT }, +}; + +static const u32 mtl_oa_base[] = { + [PERF_GROUP_OAM_SAMEDIA_0] = 0x393000, }; #define SAMPLE_OA_REPORT (1<<0) @@ -418,11 +425,17 @@ static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo) kfree(oa_bo); } +static inline const +struct i915_perf_regs *__oa_regs(struct i915_perf_stream *stream) +{ + return &stream->engine->oa_group->regs; +} + static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; - return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) & + return intel_uncore_read(uncore, __oa_regs(stream)->oa_tail_ptr) & GEN12_OAG_OATAILPTR_MASK; } @@ -875,7 +888,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, i915_reg_t oaheadptr; oaheadptr = GRAPHICS_VER(stream->perf->i915) == 12 ? - GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR; + __oa_regs(stream)->oa_head_ptr : + GEN8_OAHEADPTR; spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); @@ -928,7 +942,8 @@ static int gen8_oa_read(struct i915_perf_stream *stream, return -EIO; oastatus_reg = GRAPHICS_VER(stream->perf->i915) == 12 ? - GEN12_OAG_OASTATUS : GEN8_OASTATUS; + __oa_regs(stream)->oa_status : + GEN8_OASTATUS; oastatus = intel_uncore_read(uncore, oastatus_reg); @@ -1637,6 +1652,11 @@ static bool engine_supports_oa(const struct intel_engine_cs *engine) return engine->oa_group; } +static bool engine_supports_oa_format(struct intel_engine_cs *engine, int type) +{ + return engine->oa_group && engine->oa_group->type == type; +} + static void i915_oa_stream_destroy(struct i915_perf_stream *stream) { struct i915_perf *perf = stream->perf; @@ -1788,8 +1808,8 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream) spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0); - intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR, + intel_uncore_write(uncore, __oa_regs(stream)->oa_status, 0); + intel_uncore_write(uncore, __oa_regs(stream)->oa_head_ptr, gtt_offset & GEN12_OAG_OAHEADPTR_MASK); stream->oa_buffer.head = gtt_offset; @@ -1801,9 +1821,9 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream) * to enable proper functionality of the overflow * bit." */ - intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset | + intel_uncore_write(uncore, __oa_regs(stream)->oa_buffer, gtt_offset | OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); - intel_uncore_write(uncore, GEN12_OAG_OATAILPTR, + intel_uncore_write(uncore, __oa_regs(stream)->oa_tail_ptr, gtt_offset & GEN12_OAG_OATAILPTR_MASK); /* Mark that we need updated tail pointers to read from... */ @@ -2563,7 +2583,8 @@ err_add_request: return err; } -static int gen8_configure_context(struct i915_gem_context *ctx, +static int gen8_configure_context(struct i915_perf_stream *stream, + struct i915_gem_context *ctx, struct flex *flex, unsigned int count) { struct i915_gem_engines_iter it; @@ -2704,7 +2725,7 @@ oa_configure_all_contexts(struct i915_perf_stream *stream, spin_unlock(&i915->gem.contexts.lock); - err = gen8_configure_context(ctx, regs, num_regs); + err = gen8_configure_context(stream, ctx, regs, num_regs); if (err) { i915_gem_context_put(ctx); return err; @@ -2749,6 +2770,9 @@ gen12_configure_all_contexts(struct i915_perf_stream *stream, }, }; + if (stream->engine->class != RENDER_CLASS) + return 0; + return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs), active); @@ -2878,7 +2902,7 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, _MASKED_BIT_ENABLE(GEN12_DISABLE_DOP_GATING)); } - intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG, + intel_uncore_write(uncore, __oa_regs(stream)->oa_debug, /* Disable clk ratio reports, like previous Gens. */ _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) | @@ -2888,7 +2912,7 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, */ oag_report_ctx_switches(stream)); - intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ? + intel_uncore_write(uncore, __oa_regs(stream)->oa_ctx_ctrl, periodic ? (GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME | GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE | (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT)) @@ -3042,8 +3066,8 @@ static void gen8_oa_enable(struct i915_perf_stream *stream) static void gen12_oa_enable(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = stream->uncore; - u32 report_format = stream->oa_buffer.format->format; + const struct i915_perf_regs *regs; + u32 val; /* * If we don't want OA reports from the OA buffer, then we don't even @@ -3054,9 +3078,11 @@ static void gen12_oa_enable(struct i915_perf_stream *stream) gen12_init_oa_buffer(stream); - intel_uncore_write(uncore, GEN12_OAG_OACONTROL, - (report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) | - GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE); + regs = __oa_regs(stream); + val = (stream->oa_buffer.format->format << regs->oa_ctrl_counter_format_shift) | + GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE; + + intel_uncore_write(stream->uncore, regs->oa_ctrl, val); } /** @@ -3108,9 +3134,9 @@ static void gen12_oa_disable(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; - intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0); + intel_uncore_write(uncore, __oa_regs(stream)->oa_ctrl, 0); if (intel_wait_for_register(uncore, - GEN12_OAG_OACONTROL, + __oa_regs(stream)->oa_ctrl, GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50)) drm_err(&stream->perf->i915->drm, @@ -4011,6 +4037,7 @@ static int read_properties_unlocked(struct i915_perf *perf, struct perf_open_properties *props) { struct drm_i915_gem_context_param_sseu user_sseu; + const struct i915_oa_format *f; u64 __user *uprop = uprops; bool config_instance = false; bool config_class = false; @@ -4196,6 +4223,15 @@ static int read_properties_unlocked(struct i915_perf *perf, return -EINVAL; } + i = array_index_nospec(props->oa_format, I915_OA_FORMAT_MAX); + f = &perf->oa_formats[i]; + if (!engine_supports_oa_format(props->engine, f->type)) { + drm_dbg(&perf->i915->drm, + "Invalid OA format %d for class %d\n", + f->type, props->engine->class); + return -EINVAL; + } + if (config_sseu) { ret = get_sseu_config(&props->sseu, props->engine, &user_sseu); if (ret) { @@ -4376,6 +4412,14 @@ static const struct i915_range gen12_oa_b_counters[] = { {} }; +static const struct i915_range mtl_oam_b_counters[] = { + { .start = 0x393000, .end = 0x39301c }, /* GEN12_OAM_STARTTRIG1[1-8] */ + { .start = 0x393020, .end = 0x39303c }, /* GEN12_OAM_REPORTTRIG1[1-8] */ + { .start = 0x393040, .end = 0x39307c }, /* GEN12_OAM_CEC[0-7][0-1] */ + { .start = 0x393200, .end = 0x39323C }, /* MPES[0-7] */ + {} +}; + static const struct i915_range xehp_oa_b_counters[] = { { .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */ { .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */ @@ -4429,6 +4473,8 @@ static const struct i915_range mtl_oa_mux_regs[] = { { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ + { .start = 0x38d100, .end = 0x38d114}, /* VISACTL */ + {} }; static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) @@ -4466,10 +4512,20 @@ static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) return reg_in_range_table(addr, gen12_oa_b_counters); } +static bool mtl_is_valid_oam_b_counter_addr(struct i915_perf *perf, u32 addr) +{ + if (HAS_OAM(perf->i915) && + GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70)) + return reg_in_range_table(addr, mtl_oam_b_counters); + + return false; +} + static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) { return reg_in_range_table(addr, xehp_oa_b_counters) || - reg_in_range_table(addr, gen12_oa_b_counters); + reg_in_range_table(addr, gen12_oa_b_counters) || + mtl_is_valid_oam_b_counter_addr(perf, addr); } static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr) @@ -4839,12 +4895,86 @@ static u32 num_perf_groups_per_gt(struct intel_gt *gt) return 1; } +static u32 __oam_engine_group(struct intel_engine_cs *engine) +{ + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70)) { + /* + * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices + * within the gt use the same OAM. All MTL SKUs list 1 SA MEDIA. + */ + drm_WARN_ON(&engine->i915->drm, + engine->gt->type != GT_MEDIA); + + return PERF_GROUP_OAM_SAMEDIA_0; + } + + return PERF_GROUP_INVALID; +} + static u32 __oa_engine_group(struct intel_engine_cs *engine) { - if (engine->class == RENDER_CLASS) + switch (engine->class) { + case RENDER_CLASS: return PERF_GROUP_OAG; - else + + case VIDEO_DECODE_CLASS: + case VIDEO_ENHANCEMENT_CLASS: + return __oam_engine_group(engine); + + default: return PERF_GROUP_INVALID; + } +} + +static struct i915_perf_regs __oam_regs(u32 base) +{ + return (struct i915_perf_regs) { + base, + GEN12_OAM_HEAD_POINTER(base), + GEN12_OAM_TAIL_POINTER(base), + GEN12_OAM_BUFFER(base), + GEN12_OAM_CONTEXT_CONTROL(base), + GEN12_OAM_CONTROL(base), + GEN12_OAM_DEBUG(base), + GEN12_OAM_STATUS(base), + GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT, + }; +} + +static struct i915_perf_regs __oag_regs(void) +{ + return (struct i915_perf_regs) { + 0, + GEN12_OAG_OAHEADPTR, + GEN12_OAG_OATAILPTR, + GEN12_OAG_OABUFFER, + GEN12_OAG_OAGLBCTXCTRL, + GEN12_OAG_OACONTROL, + GEN12_OAG_OA_DEBUG, + GEN12_OAG_OASTATUS, + GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT, + }; +} + +static void oa_init_groups(struct intel_gt *gt) +{ + int i, num_groups = gt->perf.num_perf_groups; + + for (i = 0; i < num_groups; i++) { + struct i915_perf_group *g = >->perf.group[i]; + + /* Fused off engines can result in a group with num_engines == 0 */ + if (g->num_engines == 0) + continue; + + if (i == PERF_GROUP_OAG && gt->type != GT_MEDIA) { + g->regs = __oag_regs(); + g->type = TYPE_OAG; + } else if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) { + g->regs = __oam_regs(mtl_oa_base[i]); + g->type = TYPE_OAM; + } + } } static int oa_init_gt(struct intel_gt *gt) @@ -4871,6 +5001,8 @@ static int oa_init_gt(struct intel_gt *gt) gt->perf.num_perf_groups = num_groups; gt->perf.group = g; + oa_init_groups(gt); + return 0; } @@ -4928,9 +5060,15 @@ static void oa_init_supported_formats(struct i915_perf *perf) break; case INTEL_DG2: + oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8); + oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8); + break; + case INTEL_METEORLAKE: oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8); oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8); + oa_format_add(perf, I915_OAM_FORMAT_MPEC8u64_B8_C8); + oa_format_add(perf, I915_OAM_FORMAT_MPEC8u32_B8_C8); break; default: @@ -5175,8 +5313,10 @@ int i915_perf_ioctl_version(void) * * 6: Add DRM_I915_PERF_PROP_OA_ENGINE_CLASS and * DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE + * + * 7: Add support for video decode and enhancement classes. */ - return 6; + return 7; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h b/drivers/gpu/drm/i915/i915_perf_oa_regs.h index 381d94101610..ba103875e19f 100644 --- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h +++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h @@ -138,4 +138,82 @@ #define GEN12_SQCNT1_PMON_ENABLE REG_BIT(30) #define GEN12_SQCNT1_OABPC REG_BIT(29) +/* Gen12 OAM unit */ +#define GEN12_OAM_HEAD_POINTER_OFFSET (0x1a0) +#define GEN12_OAM_HEAD_POINTER_MASK 0xffffffc0 + +#define GEN12_OAM_TAIL_POINTER_OFFSET (0x1a4) +#define GEN12_OAM_TAIL_POINTER_MASK 0xffffffc0 + +#define GEN12_OAM_BUFFER_OFFSET (0x1a8) +#define GEN12_OAM_BUFFER_SIZE_MASK (0x7) +#define GEN12_OAM_BUFFER_SIZE_SHIFT (3) +#define GEN12_OAM_BUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */ + +#define GEN12_OAM_CONTEXT_CONTROL_OFFSET (0x1bc) +#define GEN12_OAM_CONTEXT_CONTROL_TIMER_PERIOD_SHIFT 2 +#define GEN12_OAM_CONTEXT_CONTROL_TIMER_ENABLE REG_BIT(1) +#define GEN12_OAM_CONTEXT_CONTROL_COUNTER_RESUME REG_BIT(0) + +#define GEN12_OAM_CONTROL_OFFSET (0x194) +#define GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT 1 +#define GEN12_OAM_CONTROL_COUNTER_ENABLE REG_BIT(0) + +#define GEN12_OAM_DEBUG_OFFSET (0x198) +#define GEN12_OAM_DEBUG_BUFFER_SIZE_SELECT REG_BIT(12) +#define GEN12_OAM_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6) +#define GEN12_OAM_DEBUG_DISABLE_CLK_RATIO_REPORTS REG_BIT(5) +#define GEN12_OAM_DEBUG_DISABLE_GO_1_0_REPORTS REG_BIT(2) +#define GEN12_OAM_DEBUG_DISABLE_CTX_SWITCH_REPORTS REG_BIT(1) + +#define GEN12_OAM_STATUS_OFFSET (0x19c) +#define GEN12_OAM_STATUS_COUNTER_OVERFLOW REG_BIT(2) +#define GEN12_OAM_STATUS_BUFFER_OVERFLOW REG_BIT(1) +#define GEN12_OAM_STATUS_REPORT_LOST REG_BIT(0) + +#define GEN12_OAM_MMIO_TRG_OFFSET (0x1d0) + +#define GEN12_OAM_MMIO_TRG(base) \ + _MMIO((base) + GEN12_OAM_MMIO_TRG_OFFSET) + +#define GEN12_OAM_HEAD_POINTER(base) \ + _MMIO((base) + GEN12_OAM_HEAD_POINTER_OFFSET) +#define GEN12_OAM_TAIL_POINTER(base) \ + _MMIO((base) + GEN12_OAM_TAIL_POINTER_OFFSET) +#define GEN12_OAM_BUFFER(base) \ + _MMIO((base) + GEN12_OAM_BUFFER_OFFSET) +#define GEN12_OAM_CONTEXT_CONTROL(base) \ + _MMIO((base) + GEN12_OAM_CONTEXT_CONTROL_OFFSET) +#define GEN12_OAM_CONTROL(base) \ + _MMIO((base) + GEN12_OAM_CONTROL_OFFSET) +#define GEN12_OAM_DEBUG(base) \ + _MMIO((base) + GEN12_OAM_DEBUG_OFFSET) +#define GEN12_OAM_STATUS(base) \ + _MMIO((base) + GEN12_OAM_STATUS_OFFSET) + +#define GEN12_OAM_CEC0_0_OFFSET (0x40) +#define GEN12_OAM_CEC7_1_OFFSET (0x7c) +#define GEN12_OAM_CEC0_0(base) \ + _MMIO((base) + GEN12_OAM_CEC0_0_OFFSET) +#define GEN12_OAM_CEC7_1(base) \ + _MMIO((base) + GEN12_OAM_CEC7_1_OFFSET) + +#define GEN12_OAM_STARTTRIG1_OFFSET (0x00) +#define GEN12_OAM_STARTTRIG8_OFFSET (0x1c) +#define GEN12_OAM_STARTTRIG1(base) \ + _MMIO((base) + GEN12_OAM_STARTTRIG1_OFFSET) +#define GEN12_OAM_STARTTRIG8(base) \ + _MMIO((base) + GEN12_OAM_STARTTRIG8_OFFSET) + +#define GEN12_OAM_REPORTTRIG1_OFFSET (0x20) +#define GEN12_OAM_REPORTTRIG8_OFFSET (0x3c) +#define GEN12_OAM_REPORTTRIG1(base) \ + _MMIO((base) + GEN12_OAM_REPORTTRIG1_OFFSET) +#define GEN12_OAM_REPORTTRIG8(base) \ + _MMIO((base) + GEN12_OAM_REPORTTRIG8_OFFSET) + +#define GEN12_OAM_PERF_COUNTER_B0_OFFSET (0x84) +#define GEN12_OAM_PERF_COUNTER_B(base, idx) \ + _MMIO((base) + GEN12_OAM_PERF_COUNTER_B0_OFFSET + 4 * (idx)) + #endif /* __INTEL_PERF_OA_REGS__ */ diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index d8b92508a632..66dd5f74de05 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -20,6 +20,7 @@ #include "gt/intel_engine_types.h" #include "gt/intel_sseu.h" #include "i915_reg_defs.h" +#include "intel_uncore.h" #include "intel_wakeref.h" struct drm_i915_private; @@ -33,6 +34,7 @@ struct intel_engine_cs; enum { PERF_GROUP_OAG = 0, + PERF_GROUP_OAM_SAMEDIA_0 = 0, PERF_GROUP_MAX, PERF_GROUP_INVALID = U32_MAX, @@ -43,9 +45,27 @@ enum report_header { HDR_64_BIT, }; +struct i915_perf_regs { + u32 base; + i915_reg_t oa_head_ptr; + i915_reg_t oa_tail_ptr; + i915_reg_t oa_buffer; + i915_reg_t oa_ctx_ctrl; + i915_reg_t oa_ctrl; + i915_reg_t oa_debug; + i915_reg_t oa_status; + u32 oa_ctrl_counter_format_shift; +}; + +enum oa_type { + TYPE_OAG, + TYPE_OAM, +}; + struct i915_oa_format { u32 format; int size; + int type; enum report_header header; }; @@ -416,6 +436,16 @@ struct i915_perf_group { * @num_engines: The number of engines using this OA unit. */ u32 num_engines; + + /* + * @regs: OA buffer register group for programming the OA unit. + */ + struct i915_perf_regs regs; + + /* + * @type: Type of OA unit - OAM, OAG etc. + */ + enum oa_type type; }; struct i915_perf_gt { diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index d588e5fd2eea..8281513fe072 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -166,6 +166,7 @@ enum intel_ppgtt_type { func(has_mslice_steering); \ func(has_oa_bpc_reporting); \ func(has_oa_slice_contrib_limits); \ + func(has_oam); \ func(has_one_eu_per_fuse_bit); \ func(has_pxp); \ func(has_rc6); \ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 384e74cac539..dba7c5a5b25e 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2676,6 +2676,10 @@ enum drm_i915_oa_format { I915_OAR_FORMAT_A32u40_A4u32_B8_C8, I915_OA_FORMAT_A24u40_A14u32_B8_C8, + /* MTL OAM */ + I915_OAM_FORMAT_MPEC8u64_B8_C8, + I915_OAM_FORMAT_MPEC8u32_B8_C8, + I915_OA_FORMAT_MAX /* non-ABI */ }; -- cgit v1.2.3 From 94d82e95219a3c581435480ab395eb04f569635f Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 23 Mar 2023 15:59:00 -0700 Subject: drm/i915/perf: Pass i915 object to perf revision helper In some cases, perf revision may rely on specific steppings of a platform. To determine the platform, pass i915 object to the perf revision helper. Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20230323225901.3743681-11-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_getparam.c | 2 +- drivers/gpu/drm/i915/i915_perf.c | 2 +- drivers/gpu/drm/i915/i915_perf.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index 61ef2d9cfa62..2238e096c957 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -173,7 +173,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, value = INTEL_INFO(i915)->has_coherent_ggtt; break; case I915_PARAM_PERF_REVISION: - value = i915_perf_ioctl_version(); + value = i915_perf_ioctl_version(i915); break; case I915_PARAM_OA_TIMESTAMP_FREQUENCY: value = i915_perf_oa_timestamp_frequency(i915); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 247dad57ab89..18afa76653b7 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -5289,7 +5289,7 @@ void i915_perf_fini(struct drm_i915_private *i915) * * This version number is used by userspace to detect available features. */ -int i915_perf_ioctl_version(void) +int i915_perf_ioctl_version(struct drm_i915_private *i915) { /* * 1: Initial version diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h index 253637651d5e..accf626f2b13 100644 --- a/drivers/gpu/drm/i915/i915_perf.h +++ b/drivers/gpu/drm/i915/i915_perf.h @@ -22,7 +22,7 @@ int i915_perf_init(struct drm_i915_private *i915); void i915_perf_fini(struct drm_i915_private *i915); void i915_perf_register(struct drm_i915_private *i915); void i915_perf_unregister(struct drm_i915_private *i915); -int i915_perf_ioctl_version(void); +int i915_perf_ioctl_version(struct drm_i915_private *i915); int i915_perf_sysctl_register(void); void i915_perf_sysctl_unregister(void); -- cgit v1.2.3 From 86e11e30120387cb5c24bdb3a169a2135973a0a8 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 23 Mar 2023 15:59:01 -0700 Subject: drm/i915/perf: Wa_14017512683: Disable OAM if media C6 is enabled in BIOS OAM does not work with media C6 enabled on some steppings of MTL. Disable OAM if we detect that media C6 was enabled in bios. v2: (Ashutosh) - Remove drm_notice from the driver load path - Log a drm_err when opening an OAM stream on affected steppings v3: - Initialize the engine group even if mc6 is enabled (Ashutosh) - Checkpatch fix Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20230323225901.3743681-12-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 18afa76653b7..c035dbb84c9b 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -209,6 +209,7 @@ #include "gt/intel_gt_regs.h" #include "gt/intel_lrc.h" #include "gt/intel_lrc_reg.h" +#include "gt/intel_rc6.h" #include "gt/intel_ring.h" #include "gt/uc/intel_guc_slpc.h" @@ -4223,6 +4224,19 @@ static int read_properties_unlocked(struct i915_perf *perf, return -EINVAL; } + /* + * Wa_14017512683: mtl[a0..c0): Use of OAM must be preceded with Media + * C6 disable in BIOS. Fail if Media C6 is enabled on steppings where OAM + * does not work as expected. + */ + if (IS_MTL_MEDIA_STEP(props->engine->i915, STEP_A0, STEP_C0) && + props->engine->oa_group->type == TYPE_OAM && + intel_check_bios_c6_setup(&props->engine->gt->rc6)) { + drm_dbg(&perf->i915->drm, + "OAM requires media C6 to be disabled in BIOS\n"); + return -EINVAL; + } + i = array_index_nospec(props->oa_format, I915_OA_FORMAT_MAX); f = &perf->oa_formats[i]; if (!engine_supports_oa_format(props->engine, f->type)) { @@ -5316,6 +5330,23 @@ int i915_perf_ioctl_version(struct drm_i915_private *i915) * * 7: Add support for video decode and enhancement classes. */ + + /* + * Wa_14017512683: mtl[a0..c0): Use of OAM must be preceded with Media + * C6 disable in BIOS. If Media C6 is enabled in BIOS, return version 6 + * to indicate that OA media is not supported. + */ + if (IS_MTL_MEDIA_STEP(i915, STEP_A0, STEP_C0)) { + struct intel_gt *gt; + int i; + + for_each_gt(gt, i915, i) { + if (gt->type == GT_MEDIA && + intel_check_bios_c6_setup(>->rc6)) + return 6; + } + } + return 7; } -- cgit v1.2.3 From 5dff5d092ba6c5485aac1467dad938c74ba6ed57 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Fri, 24 Mar 2023 14:39:18 -0700 Subject: drm/i915/mtl: Disable C6 on MTL A0 for media Earlier merge dropped an if block when applying the patch - "drm/i915/mtl: Synchronize i915/BIOS on C6 enabling". Bring back the if block as the check is required by - "drm/i915/mtl: Disable MC6 for MTL A step" to disable C6 on media for A0 stepping. Fixes: 3735040978a4 ("drm/i915/mtl: Synchronize i915/BIOS on C6 enabling") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Vinay Belgaumkar Link: https://patchwork.freedesktop.org/patch/msgid/20230324213918.75212-1-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/gt/intel_rc6.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index fe788014b6fa..a0a0c57b2d8c 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -525,6 +525,13 @@ static bool rc6_supported(struct intel_rc6 *rc6) return false; } + if (IS_MTL_MEDIA_STEP(gt->i915, STEP_A0, STEP_B0) && + gt->type == GT_MEDIA) { + drm_notice(&i915->drm, + "Media RC6 disabled on A step\n"); + return false; + } + return true; } -- cgit v1.2.3 From de4149730d9d72f50d4e6dfedad0d11b1df05b7e Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 27 Mar 2023 21:55:46 +0200 Subject: drm/i915: Sanitycheck MMIO access early in driver load We occasionally see the PCI device in a non-accessible state at the point the driver is loaded. When this happens, all BAR accesses will read back as 0xFFFFFFFF. Rather than reading registers and misinterpreting their (invalid) values, let's specifically check for 0xFFFFFFFF in a register that cannot have that value to see if the device is accessible. Signed-off-by: Matt Roper Cc: Mika Kuoppala Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230327195547.356584-2-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 7894447c2858..67f57dde8672 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -2602,11 +2602,45 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) return 0; } +static int sanity_check_mmio_access(struct intel_uncore *uncore) +{ + struct drm_i915_private *i915 = uncore->i915; + + if (GRAPHICS_VER(i915) < 8) + return 0; + + /* + * Sanitycheck that MMIO access to the device is working properly. If + * the CPU is unable to communcate with a PCI device, BAR reads will + * return 0xFFFFFFFF. Let's make sure the device isn't in this state + * before we start trying to access registers. + * + * We use the primary GT's forcewake register as our guinea pig since + * it's been around since HSW and it's a masked register so the upper + * 16 bits can never read back as 1's if device access is operating + * properly. + * + * If MMIO isn't working, we'll wait up to 2 seconds to see if it + * recovers, then give up. + */ +#define COND (__raw_uncore_read32(uncore, FORCEWAKE_MT) != ~0) + if (wait_for(COND, 2000) == -ETIMEDOUT) { + drm_err(&i915->drm, "Device is non-operational; MMIO access returns 0xFFFFFFFF!\n"); + return -EIO; + } + + return 0; +} + int intel_uncore_init_mmio(struct intel_uncore *uncore) { struct drm_i915_private *i915 = uncore->i915; int ret; + ret = sanity_check_mmio_access(uncore); + if (ret) + return ret; + /* * The boot firmware initializes local memory and assesses its health. * If memory training fails, the punit will have been instructed to -- cgit v1.2.3 From fdd9b7dcf1ad7115b2d997e047e8e978c474736b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 27 Mar 2023 21:55:47 +0200 Subject: drm/i915: Check for unreliable MMIO during forcewake Although we now sanitycheck MMIO access during driver load to make sure the MMIO BAR isn't returning all 0xFFFFFFFF, there have been a few cases where (temporarily?) unreliable MMIO access has happened after GPU resets or power events. We'll often notice this on our next GT register access since forcewake handling will fail; let's change our handling slightly so that when this happens we print a more meaningful message clarifying that the problem is the MMIO access, not forcewake specifically. Signed-off-by: Matt Roper Cc: Mika Kuoppala Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230327195547.356584-3-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 67f57dde8672..242a8508f366 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -178,12 +178,19 @@ wait_ack_set(const struct intel_uncore_forcewake_domain *d, static inline void fw_domain_wait_ack_clear(const struct intel_uncore_forcewake_domain *d) { - if (wait_ack_clear(d, FORCEWAKE_KERNEL)) { + if (!wait_ack_clear(d, FORCEWAKE_KERNEL)) + return; + + if (fw_ack(d) == ~0) + drm_err(&d->uncore->i915->drm, + "%s: MMIO unreliable (forcewake register returns 0xFFFFFFFF)!\n", + intel_uncore_forcewake_domain_to_str(d->id)); + else drm_err(&d->uncore->i915->drm, "%s: timed out waiting for forcewake ack to clear.\n", intel_uncore_forcewake_domain_to_str(d->id)); - add_taint_for_CI(d->uncore->i915, TAINT_WARN); /* CI now unreliable */ - } + + add_taint_for_CI(d->uncore->i915, TAINT_WARN); /* CI now unreliable */ } enum ack_type { -- cgit v1.2.3 From cdf7911f7dbcb37228409a63bf75630776c45a15 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 13 Mar 2023 13:55:56 -0700 Subject: drm/i915/huc: Cancel HuC delayed load timer on reset. In the rare case where we do a full GT reset after starting the HuC load and before it completes (which basically boils down to i915 hanging during init), we need to cancel the delayed load fence, as it will be re-initialized in the post-reset recovery. Fixes: 27536e03271d ("drm/i915/huc: track delayed HuC load with a fence") Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20230313205556.1174503-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_huc.c | 7 +++++++ drivers/gpu/drm/i915/gt/uc/intel_huc.h | 7 +------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 72884e21470b..aefdaa62da99 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -241,6 +241,13 @@ static void delayed_huc_load_fini(struct intel_huc *huc) i915_sw_fence_fini(&huc->delayed_load.fence); } +int intel_huc_sanitize(struct intel_huc *huc) +{ + delayed_huc_load_complete(huc); + intel_uc_fw_sanitize(&huc->fw); + return 0; +} + static bool vcs_supported(struct intel_gt *gt) { intel_engine_mask_t mask = gt->info.engine_mask; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h index 52db03620c60..db555b3c1f56 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -41,6 +41,7 @@ struct intel_huc { } delayed_load; }; +int intel_huc_sanitize(struct intel_huc *huc); void intel_huc_init_early(struct intel_huc *huc); int intel_huc_init(struct intel_huc *huc); void intel_huc_fini(struct intel_huc *huc); @@ -54,12 +55,6 @@ bool intel_huc_is_authenticated(struct intel_huc *huc); void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus); void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus); -static inline int intel_huc_sanitize(struct intel_huc *huc) -{ - intel_uc_fw_sanitize(&huc->fw); - return 0; -} - static inline bool intel_huc_is_supported(struct intel_huc *huc) { return intel_uc_fw_is_supported(&huc->fw); -- cgit v1.2.3 From 625af47255d9b30e22d6c98b7f5e97adc903b98e Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 23 Mar 2023 16:18:56 -0700 Subject: drm/i915: limit double GT reset to pre-MTL Commit 3db9d590557d ("drm/i915/gt: Reset twice") modified the code to always hit the GDRST register twice when doing a reset, with the reported aim to fix invalid post-reset engine state on some platforms (Jasperlake being the only one actually mentioned). This is a problem on MTL, due to the fact that we have to apply a time consuming WA (coming in the next patch) every time we hit the GDRST register in a way that can include the GSC engine. Even post MTL, the expectation is that we'll have some work to do before and after hitting the GDRST if the GSC is involved. Since the issue requiring the double reset seems to be limited to older platforms, instead of trying to handle the double-reset on MTL and future platforms it is just easier to turn it off. The default on MTL is also for GuC to own engine reset, with i915 only covering full-GT reset. Signed-off-by: Daniele Ceraolo Spurio Cc: Chris Wilson Cc: Andi Shyti Cc: Mika Kuoppala Cc: Gwan-gyeong Mun Cc: John Harrison Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230323231857.2194435-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/intel_reset.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 0bb9094fdacd..2c3463f77e5c 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -268,9 +268,27 @@ out: static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) { struct intel_uncore *uncore = gt->uncore; - int loops = 2; + int loops; int err; + /* + * On some platforms, e.g. Jasperlake, we see that the engine register + * state is not cleared until shortly after GDRST reports completion, + * causing a failure as we try to immediately resume while the internal + * state is still in flux. If we immediately repeat the reset, the + * second reset appears to serialise with the first, and since it is a + * no-op, the registers should retain their reset value. However, there + * is still a concern that upon leaving the second reset, the internal + * engine state is still in flux and not ready for resuming. + * + * Starting on MTL, there are some prep steps that we need to do when + * resetting some engines that need to be applied every time we write to + * GEN6_GDRST. As those are time consuming (tens of ms), we don't want + * to perform that twice, so, since the Jasperlake issue hasn't been + * observed on MTL, we avoid repeating the reset on newer platforms. + */ + loops = GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70) ? 2 : 1; + /* * GEN6_GDRST is not in the gt power well, no need to check * for fifo space for the write or forcewake the chip for @@ -279,20 +297,7 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) do { intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask); - /* - * Wait for the device to ack the reset requests. - * - * On some platforms, e.g. Jasperlake, we see that the - * engine register state is not cleared until shortly after - * GDRST reports completion, causing a failure as we try - * to immediately resume while the internal state is still - * in flux. If we immediately repeat the reset, the second - * reset appears to serialise with the first, and since - * it is a no-op, the registers should retain their reset - * value. However, there is still a concern that upon - * leaving the second reset, the internal engine state - * is still in flux and not ready for resuming. - */ + /* Wait for the device to ack the reset requests. */ err = __intel_wait_for_register_fw(uncore, GEN6_GDRST, hw_domain_mask, 0, 2000, 0, -- cgit v1.2.3 From b7d70b8b06edf25c4b7526e20f5b3d11175cab81 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 23 Mar 2023 16:18:57 -0700 Subject: drm/i915/gsc: implement wa 14015076503 The WA states that we need to alert the GSC FW before doing a GSC engine reset and then wait for 200ms. The GuC owns engine reset, so on the i915 side we only need to apply this for full GT reset. Given that we do full GT resets in the resume paths to cleanup the HW state and that a long wait in those scenarios would not be acceptable, a faster path has been introduced where, if the GSC is idle, we try first to individually reset the GuC and all engines except the GSC and only fall back to full reset if that fails. Note: according to the WA specs, if the GSC is idle it should be possible to only wait for the uC wakeup time (~15ms) instead of the whole 200ms. However, the GSC FW team have mentioned that the wakeup time can change based on other things going on in the HW and pcode, so a good security margin would be required. Given that when the GSC is idle we already skip the wait & reset entirely and that this reduced wait would still likely be too long to use in resume paths, it's not worth adding support for this reduced wait. v2: add comment to explain why it is safe to skip the GSC reset (John) Signed-off-by: Daniele Ceraolo Spurio Cc: Matt Roper Cc: John Harrison Cc: Rodrigo Vivi Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20230323231857.2194435-2-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/intel_reset.c | 84 +++++++++++++++++++++++++++++-- drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h | 2 + drivers/gpu/drm/i915/i915_reg.h | 14 +++++- 3 files changed, 93 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 2c3463f77e5c..797ea8340467 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -14,6 +14,8 @@ #include "gt/intel_gt_regs.h" +#include "gt/uc/intel_gsc_fw.h" + #include "i915_drv.h" #include "i915_file_private.h" #include "i915_gpu_error.h" @@ -695,6 +697,74 @@ static reset_func intel_get_gpu_reset(const struct intel_gt *gt) return NULL; } +static int __reset_guc(struct intel_gt *gt) +{ + u32 guc_domain = + GRAPHICS_VER(gt->i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC; + + return gen6_hw_domain_reset(gt, guc_domain); +} + +static bool needs_wa_14015076503(struct intel_gt *gt, intel_engine_mask_t engine_mask) +{ + if (!IS_METEORLAKE(gt->i915) || !HAS_ENGINE(gt, GSC0)) + return false; + + if (!__HAS_ENGINE(engine_mask, GSC0)) + return false; + + return intel_gsc_uc_fw_init_done(>->uc.gsc); +} + +static intel_engine_mask_t +wa_14015076503_start(struct intel_gt *gt, intel_engine_mask_t engine_mask, bool first) +{ + if (!needs_wa_14015076503(gt, engine_mask)) + return engine_mask; + + /* + * wa_14015076503: if the GSC FW is loaded, we need to alert it that + * we're going to do a GSC engine reset and then wait for 200ms for the + * FW to get ready for it. However, if this is the first ALL_ENGINES + * reset attempt and the GSC is not busy, we can try to instead reset + * the GuC and all the other engines individually to avoid the 200ms + * wait. + * Skipping the GSC engine is safe because, differently from other + * engines, the GSCCS only role is to forward the commands to the GSC + * FW, so it doesn't have any HW outside of the CS itself and therefore + * it has no state that we don't explicitly re-init on resume or on + * context switch LRC or power context). The HW for the GSC uC is + * managed by the GSC FW so we don't need to care about that. + */ + if (engine_mask == ALL_ENGINES && first && intel_engine_is_idle(gt->engine[GSC0])) { + __reset_guc(gt); + engine_mask = gt->info.engine_mask & ~BIT(GSC0); + } else { + intel_uncore_rmw(gt->uncore, + HECI_H_GS1(MTL_GSC_HECI2_BASE), + 0, HECI_H_GS1_ER_PREP); + + /* make sure the reset bit is clear when writing the CSR reg */ + intel_uncore_rmw(gt->uncore, + HECI_H_CSR(MTL_GSC_HECI2_BASE), + HECI_H_CSR_RST, HECI_H_CSR_IG); + msleep(200); + } + + return engine_mask; +} + +static void +wa_14015076503_end(struct intel_gt *gt, intel_engine_mask_t engine_mask) +{ + if (!needs_wa_14015076503(gt, engine_mask)) + return; + + intel_uncore_rmw(gt->uncore, + HECI_H_GS1(MTL_GSC_HECI2_BASE), + HECI_H_GS1_ER_PREP, 0); +} + int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) { const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1; @@ -712,10 +782,16 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) */ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) { - GT_TRACE(gt, "engine_mask=%x\n", engine_mask); + intel_engine_mask_t reset_mask; + + reset_mask = wa_14015076503_start(gt, engine_mask, !retry); + + GT_TRACE(gt, "engine_mask=%x\n", reset_mask); preempt_disable(); - ret = reset(gt, engine_mask, retry); + ret = reset(gt, reset_mask, retry); preempt_enable(); + + wa_14015076503_end(gt, reset_mask); } intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); @@ -740,14 +816,12 @@ bool intel_has_reset_engine(const struct intel_gt *gt) int intel_reset_guc(struct intel_gt *gt) { - u32 guc_domain = - GRAPHICS_VER(gt->i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC; int ret; GEM_BUG_ON(!HAS_GT_UC(gt->i915)); intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); - ret = gen6_hw_domain_reset(gt, guc_domain); + ret = __reset_guc(gt); intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); return ret; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h index 4b5dbb44afb4..f4c1106bb2a9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h @@ -9,7 +9,9 @@ #include struct intel_gsc_uc; +struct intel_uncore; int intel_gsc_uc_fw_upload(struct intel_gsc_uc *gsc); bool intel_gsc_uc_fw_init_done(struct intel_gsc_uc *gsc); + #endif diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index cf80500fff79..818df429fc60 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -923,8 +923,18 @@ #define DG1_GSC_HECI2_BASE 0x00259000 #define DG2_GSC_HECI1_BASE 0x00373000 #define DG2_GSC_HECI2_BASE 0x00374000 - - +#define MTL_GSC_HECI1_BASE 0x00116000 +#define MTL_GSC_HECI2_BASE 0x00117000 + +#define HECI_H_CSR(base) _MMIO((base) + 0x4) +#define HECI_H_CSR_IE REG_BIT(0) +#define HECI_H_CSR_IS REG_BIT(1) +#define HECI_H_CSR_IG REG_BIT(2) +#define HECI_H_CSR_RDY REG_BIT(3) +#define HECI_H_CSR_RST REG_BIT(4) + +#define HECI_H_GS1(base) _MMIO((base) + 0xc4c) +#define HECI_H_GS1_ER_PREP REG_BIT(0) #define HSW_GTT_CACHE_EN _MMIO(0x4024) #define GTT_CACHE_EN_ALL 0xF0007FFF -- cgit v1.2.3 From 49f6f6483b652108bcb73accd0204a464b922395 Mon Sep 17 00:00:00 2001 From: Min Li Date: Tue, 28 Mar 2023 17:36:27 +0800 Subject: drm/i915: fix race condition UAF in i915_perf_add_config_ioctl Userspace can guess the id value and try to race oa_config object creation with config remove, resulting in a use-after-free if we dereference the object after unlocking the metrics_lock. For that reason, unlocking the metrics_lock must be done after we are done dereferencing the object. Signed-off-by: Min Li Fixes: f89823c21224 ("drm/i915/perf: Implement I915_PERF_ADD/REMOVE_CONFIG interface") Cc: # v4.14+ Reviewed-by: Andi Shyti Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20230328093627.5067-1-lm0963hack@gmail.com [tursulin: Manually added stable tag.] --- drivers/gpu/drm/i915/i915_perf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index c035dbb84c9b..050b8ae7b8e7 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4802,13 +4802,13 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, err = oa_config->id; goto sysfs_err; } - - mutex_unlock(&perf->metrics_lock); + id = oa_config->id; drm_dbg(&perf->i915->drm, "Added config %s id=%i\n", oa_config->uuid, oa_config->id); + mutex_unlock(&perf->metrics_lock); - return oa_config->id; + return id; sysfs_err: mutex_unlock(&perf->metrics_lock); -- cgit v1.2.3 From 5fba65efa7cfb8cef227a2c555deb10327a5e27b Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Wed, 29 Mar 2023 18:23:35 -0300 Subject: drm/i915/mtl: Add workarounds Wa_14017066071 and Wa_14017654203 Both workarounds require the same implementation and apply to MTL P and M from stepping A0 to B0 (exclusive). v2: - Remove unrelated brace removal. (Matt) Signed-off-by: Radhakrishna Sripada Signed-off-by: Gustavo Sousa Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20230329212336.106161-2-gustavo.sousa@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 97a60941eee5..014577971992 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1146,6 +1146,7 @@ #define ENABLE_SMALLPL REG_BIT(15) #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) #define GEN11_SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) +#define MTL_DISABLE_SAMPLER_SC_OOO REG_BIT(3) #define GEN9_HALF_SLICE_CHICKEN7 MCR_REG(0xe194) #define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 60e9cf273c5e..f22a43d98925 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -3051,6 +3051,15 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li add_render_compute_tuning_settings(i915, wal); + if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || + IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + /* + * Wa_14017066071 + * Wa_14017654203 + */ + wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, + MTL_DISABLE_SAMPLER_SC_OOO); + if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || -- cgit v1.2.3 From 9079363eda1ea0d9fa2cc5635e65821d8ed4f994 Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Wed, 29 Mar 2023 18:23:36 -0300 Subject: drm/i915/mtl: Add Wa_22015279794 Wa_22015279794 applies to MTL P from stepping A0 to B0 (exclusive). Signed-off-by: Radhakrishna Sripada Signed-off-by: Gustavo Sousa Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20230329212336.106161-3-gustavo.sousa@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 6 ++++++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 014577971992..39593467e1a5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1158,7 +1158,13 @@ #define ENABLE_EU_COUNT_FOR_TDL_FLUSH REG_BIT(10) #define DISABLE_ECC REG_BIT(5) #define FLOAT_BLEND_OPTIMIZATION_ENABLE REG_BIT(4) +/* + * We have both ENABLE and DISABLE defines below using the same bit because the + * meaning depends on the target platform. There are no platform prefix for them + * because different steppings of DG2 pick one or the other semantics. + */ #define ENABLE_PREFETCH_INTO_IC REG_BIT(3) +#define DISABLE_PREFETCH_INTO_IC REG_BIT(3) #define EU_PERF_CNTL0 PERF_REG(0xe458) #define EU_PERF_CNTL4 PERF_REG(0xe45c) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index f22a43d98925..642e57e6e3b4 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -3060,6 +3060,11 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, MTL_DISABLE_SAMPLER_SC_OOO); + if (IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + /* Wa_22015279794 */ + wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, + DISABLE_PREFETCH_INTO_IC); + if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || -- cgit v1.2.3 From b3e70051879c665acdd3a1ab50d0ed58d6a8001f Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 20 Mar 2023 15:14:23 +0000 Subject: drm/i915: Fix context runtime accounting When considering whether to mark one context as stopped and another as started we need to look at whether the previous and new _contexts_ are different and not just requests. Otherwise the software tracked context start time was incorrectly updated to the most recent lite-restore time- stamp, which was in some cases resulting in active time going backward, until the context switch (typically the heartbeat pulse) would synchronise with the hardware tracked context runtime. Easiest use case to observe this behaviour was with a full screen clients with close to 100% engine load. Signed-off-by: Tvrtko Ursulin Fixes: bb6287cb1886 ("drm/i915: Track context current active time") Cc: # v5.19+ Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20230320151423.1708436-1-tvrtko.ursulin@linux.intel.com [tursulin: Fix spelling in commit msg.] --- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 3c573d41d404..eefd327c4278 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2018,6 +2018,8 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) * inspecting the queue to see if we need to resumbit. */ if (*prev != *execlists->active) { /* elide lite-restores */ + struct intel_context *prev_ce = NULL, *active_ce = NULL; + /* * Note the inherent discrepancy between the HW runtime, * recorded as part of the context switch, and the CPU @@ -2029,9 +2031,15 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) * and correct overselves later when updating from HW. */ if (*prev) - lrc_runtime_stop((*prev)->context); + prev_ce = (*prev)->context; if (*execlists->active) - lrc_runtime_start((*execlists->active)->context); + active_ce = (*execlists->active)->context; + if (prev_ce != active_ce) { + if (prev_ce) + lrc_runtime_stop(prev_ce); + if (active_ce) + lrc_runtime_start(active_ce); + } new_timeslice(execlists); } -- cgit v1.2.3 From e6a1e701edd0ef8fd51fd50e11bf29bbb2f37313 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 31 Mar 2023 10:25:49 +0100 Subject: drm/i915/i915_scatterlist: Fix kerneldoc formatting issue - missing '@' Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/i915/i915_scatterlist.c:62: warning: Function parameter or member 'size' not described in 'i915_refct_sgt_init' Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: David Airlie Cc: Daniel Vetter Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Lee Jones Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20230331092607.700644-2-lee@kernel.org --- drivers/gpu/drm/i915/i915_scatterlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c index 756289e43dff..e71089d6741a 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.c +++ b/drivers/gpu/drm/i915/i915_scatterlist.c @@ -56,7 +56,7 @@ static const struct i915_refct_sgt_ops rsgt_ops = { /** * i915_refct_sgt_init - Initialize a struct i915_refct_sgt with default ops * @rsgt: The struct i915_refct_sgt to initialize. - * size: The size of the underlying memory buffer. + * @size: The size of the underlying memory buffer. */ void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size) { -- cgit v1.2.3 From 5c908cd57eeb857f107732773a653c89ad08e9ce Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 31 Mar 2023 10:25:50 +0100 Subject: drm/i915/intel_region_ttm: Provide missing description for 'offset' param Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/i915/intel_region_ttm.c:201: warning: Function parameter or member 'offset' not described in 'intel_region_ttm_resource_alloc' Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: David Airlie Cc: Daniel Vetter Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Lee Jones Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20230331092607.700644-3-lee@kernel.org --- drivers/gpu/drm/i915/intel_region_ttm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index b7fbd5abb42a..bf6097e7433d 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -181,6 +181,7 @@ intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem, /** * intel_region_ttm_resource_alloc - Allocate memory resources from a region * @mem: The memory region, + * @offset: BO offset * @size: The requested size in bytes * @flags: Allocation flags * -- cgit v1.2.3 From 445a1b818e20fbfff5905bb9070da703101a5c00 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 31 Mar 2023 10:25:54 +0100 Subject: drm/i915/gt/intel_rps: Demote a kerneldoc abuse for ips_ping_for_i915_load() Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/i915/gt/intel_rps.c:2646: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: David Airlie Cc: Daniel Vetter Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Lee Jones Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20230331092607.700644-7-lee@kernel.org --- drivers/gpu/drm/i915/gt/intel_rps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index fc73cfe0e39b..2adf221ab11b 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2674,7 +2674,7 @@ bool rps_read_mask_mmio(struct intel_rps *rps, static struct drm_i915_private __rcu *ips_mchdev; -/** +/* * Tells the intel_ips driver that the i915 driver is now loaded, if * IPS got loaded first. * -- cgit v1.2.3 From b29b32a2ae5a6753cdbe13cf2e64c752743f1923 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 31 Mar 2023 10:25:55 +0100 Subject: drm/i915/gem/i915_gem_create: Provide the function names for proper kerneldoc headers Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/i915/gem/i915_gem_create.c:147: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst drivers/gpu/drm/i915/gem/i915_gem_create.c:218: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst drivers/gpu/drm/i915/gem/i915_gem_create.c:402: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: David Airlie Cc: Daniel Vetter Cc: Matthew Auld Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Lee Jones Signed-off-by: Jani Nikula [Jani: fixed i915_gem_create_ext_ioctl while applying] Link: https://patchwork.freedesktop.org/patch/msgid/20230331092607.700644-8-lee@kernel.org --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index 005a7f842784..f55dcc01d97f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -143,7 +143,8 @@ object_free: } /** - * Creates a new object using the same path as DRM_I915_GEM_CREATE_EXT + * __i915_gem_object_create_user - Creates a new object using the same path as + * DRM_I915_GEM_CREATE_EXT * @i915: i915 private * @size: size of the buffer, in bytes * @placements: possible placement regions, in priority order @@ -214,7 +215,7 @@ i915_gem_dumb_create(struct drm_file *file, } /** - * Creates a new mm object and returns a handle to it. + * i915_gem_create_ioctl - Creates a new mm object and returns a handle to it. * @dev: drm device pointer * @data: ioctl data blob * @file: drm file pointer @@ -398,7 +399,7 @@ static const i915_user_extension_fn create_extensions[] = { }; /** - * Creates a new mm object and returns a handle to it. + * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it. * @dev: drm device pointer * @data: ioctl data blob * @file: drm file pointer -- cgit v1.2.3 From 0b81afa5b6d245809d3cc0613adfe6098695253d Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 31 Mar 2023 10:25:56 +0100 Subject: drm/i915/gem/i915_gem_domain: Provide function names to complete proper kerneldoc Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/i915/gem/i915_gem_domain.c:119: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst drivers/gpu/drm/i915/gem/i915_gem_domain.c:180: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst drivers/gpu/drm/i915/gem/i915_gem_domain.c:265: warning: expecting prototype for Changes the cache(). Prototype was for i915_gem_object_set_cache_level() instead drivers/gpu/drm/i915/gem/i915_gem_domain.c:470: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst drivers/gpu/drm/i915/gem/i915_gem_domain.c:514: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: David Airlie Cc: Daniel Vetter Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Lee Jones Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20230331092607.700644-9-lee@kernel.org --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 9969e687ad85..b0b6cdb0b07f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -115,7 +115,8 @@ void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj) } /** - * Moves a single object to the WC read, and possibly write domain. + * i915_gem_object_set_to_wc_domain - Moves a single object to the WC read, and + * possibly write domain. * @obj: object to act on * @write: ask for write access or read only * @@ -176,7 +177,8 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) } /** - * Moves a single object to the GTT read, and possibly write domain. + * i915_gem_object_set_to_gtt_domain - Moves a single object to the GTT read, + * and possibly write domain. * @obj: object to act on * @write: ask for write access or read only * @@ -245,7 +247,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) } /** - * Changes the cache-level of an object across all VMA. + * i915_gem_object_set_cache_level - Changes the cache-level of an object across all VMA. * @obj: object to act on * @cache_level: new cache level to set for the object * @@ -466,7 +468,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, } /** - * Moves a single object to the CPU read, and possibly write domain. + * i915_gem_object_set_to_cpu_domain - Moves a single object to the CPU read, + * and possibly write domain. * @obj: object to act on * @write: requesting write or read-only access * @@ -510,7 +513,8 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) } /** - * Called when user space prepares to use an object with the CPU, either + * i915_gem_set_domain_ioctl - Called when user space prepares to use an + * object with the CPU, either * through the mmap ioctl's mapping or a GTT mapping. * @dev: drm device * @data: ioctl data blob -- cgit v1.2.3 From 71d93eac585a5f94433d7d4a7340a0d7081d925c Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 31 Mar 2023 10:25:57 +0100 Subject: drm/i915/gem/i915_gem_ttm_pm: Provide a couple of missing descriptions for 'flags' and remove some superfluous ones Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c:156: warning: Function parameter or member 'flags' not described in 'i915_ttm_backup_region' drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c:156: warning: Excess function parameter 'allow_gpu' description in 'i915_ttm_backup_region' drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c:156: warning: Excess function parameter 'backup_pinned' description in 'i915_ttm_backup_region' drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c:223: warning: Function parameter or member 'flags' not described in 'i915_ttm_restore_region' drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c:223: warning: Excess function parameter 'allow_gpu' description in 'i915_ttm_restore_region' Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: David Airlie Cc: Daniel Vetter Cc: Matthew Auld Cc: Nirmoy Das Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Lee Jones Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20230331092607.700644-10-lee@kernel.org --- drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c index 7e67742bc65e..be644763d3bb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c @@ -144,8 +144,7 @@ void i915_ttm_recover_region(struct intel_memory_region *mr) /** * i915_ttm_backup_region - Back up all objects of a region to smem. * @mr: The memory region - * @allow_gpu: Whether to allow the gpu blitter for this backup. - * @backup_pinned: Backup also pinned objects. + * @flags: TTM backup flags * * Loops over all objects of a region and either evicts them if they are * evictable or backs them up using a backup object if they are pinned. @@ -209,7 +208,7 @@ static int i915_ttm_restore(struct i915_gem_apply_to_region *apply, /** * i915_ttm_restore_region - Restore backed-up objects of a region from smem. * @mr: The memory region - * @allow_gpu: Whether to allow the gpu blitter to recover. + * @flags: TTM backup flags * * Loops over all objects of a region and if they are backed-up, restores * them from smem. -- cgit v1.2.3 From 98a1dacc246dad474c9f7ddf4f0c7e92d49a15c3 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 31 Mar 2023 10:25:58 +0100 Subject: drm/i915/gem/i915_gem_ttm: Demote half-filled kerneldoc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hopefully someone knowledgable will follow-up to complete it. Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/i915/gem/i915_gem_ttm.c:1292: warning: Function parameter or member 'offset' not described in '__i915_gem_ttm_object_init' drivers/gpu/drm/i915/gem/i915_gem_ttm.c:1292: warning: Function parameter or member 'page_size' not described in '__i915_gem_ttm_object_init' Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: David Airlie Cc: Daniel Vetter Cc: Sumit Semwal Cc: "Christian König" Cc: Matthew Auld Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Signed-off-by: Lee Jones Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20230331092607.700644-11-lee@kernel.org --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 8cfed1bef629..e28c34d9dba9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -1250,7 +1250,7 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo) } } -/** +/* * __i915_gem_ttm_object_init - Initialize a ttm-backed i915 gem object * @mem: The initial memory region for the object. * @obj: The gem object. -- cgit v1.2.3 From 6adba2903fa16c0c55b1f1e3f6506c407a26ff88 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 31 Mar 2023 10:25:59 +0100 Subject: drm/i915/gem/i915_gem_ttm_move: Provide a couple of missing descriptions for 'num_pages' and 'ctx' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c:272: warning: Function parameter or member 'num_pages' not described in 'i915_ttm_memcpy_arg' drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c:569: warning: Function parameter or member 'ctx' not described in 'i915_ttm_move' Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: David Airlie Cc: Daniel Vetter Cc: Sumit Semwal Cc: "Christian König" Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Signed-off-by: Lee Jones Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20230331092607.700644-12-lee@kernel.org --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 2ebaaf4d663c..6e3de0f33add 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -237,6 +237,7 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo, * @_src_iter: Storage space for the source kmap iterator. * @dst_iter: Pointer to the destination kmap iterator. * @src_iter: Pointer to the source kmap iterator. + * @num_pages: Number of pages * @clear: Whether to clear instead of copy. * @src_rsgt: Refcounted scatter-gather list of source memory. * @dst_rsgt: Refcounted scatter-gather list of destination memory. @@ -541,6 +542,8 @@ out: * i915_ttm_move - The TTM move callback used by i915. * @bo: The buffer object. * @evict: Whether this is an eviction. + * @ctx: Pointer to a struct ttm_operation_ctx indicating how the waits should be + * performed if waiting * @dst_mem: The destination ttm resource. * @hop: If we need multihop, what temporary memory type to move to. * -- cgit v1.2.3 From 0f923778f47bfc8c47fabb6a93721ae71a6e6702 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 31 Mar 2023 10:26:00 +0100 Subject: drm/i915/gem/i915_gem_wait: Provide function name to validate the kerneldoc header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/i915/gem/i915_gem_wait.c:164: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: David Airlie Cc: Daniel Vetter Cc: Sumit Semwal Cc: "Christian König" Cc: Chris Wilson Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Signed-off-by: Lee Jones Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20230331092607.700644-13-lee@kernel.org --- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index e6e01c2a74a6..4a33ad2d122b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -161,7 +161,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, } /** - * Waits for rendering to the object to be completed + * i915_gem_object_wait - Waits for rendering to the object to be completed * @obj: i915 gem object * @flags: how to wait (under a lock, for all rendering or just for writes etc) * @timeout: how long to wait -- cgit v1.2.3 From 81d4baaf4b876589a72a500d45f2c67bbe82bcc2 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 31 Mar 2023 10:26:01 +0100 Subject: drm/i915/gem/i915_gem_object: Demote non-kerneldoc header with no param descriptions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/i915/gem/i915_gem_object.c:887: warning: Function parameter or member 'obj' not described in 'i915_gem_object_has_unknown_state' Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: David Airlie Cc: Daniel Vetter Cc: Sumit Semwal Cc: "Christian König" Cc: Matthew Auld Cc: Nirmoy Das Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Signed-off-by: Lee Jones Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20230331092607.700644-14-lee@kernel.org --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index e6d4efde4fc5..4666bb82f312 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -875,7 +875,7 @@ int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, return ret < 0 ? ret : 0; } -/** +/* * i915_gem_object_has_unknown_state - Return true if the object backing pages are * in an unknown_state. This means that userspace must NEVER be allowed to touch * the pages, with either the GPU or CPU. -- cgit v1.2.3 From 5d9543162fd6686e83f86a448fe2ba2e7a5ebbb5 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 31 Mar 2023 10:26:02 +0100 Subject: drm/i915/i915_gem: Provide function names to complete the expected kerneldoc format Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/i915/i915_gem.c:447: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst drivers/gpu/drm/i915/i915_gem.c:536: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst drivers/gpu/drm/i915/i915_gem.c:726: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst drivers/gpu/drm/i915/i915_gem.c:811: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: David Airlie Cc: Daniel Vetter Cc: Eric Anholt Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Lee Jones Signed-off-by: Jani Nikula [Jani: fix i915_gem_sw_finish_ioctl while applying] Link: https://patchwork.freedesktop.org/patch/msgid/20230331092607.700644-15-lee@kernel.org --- drivers/gpu/drm/i915/i915_gem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2ba922fbbd5f..ecb47f834217 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -444,7 +444,7 @@ out_rpm: } /** - * Reads data from the object referenced by handle. + * i915_gem_pread_ioctl - Reads data from the object referenced by handle. * @dev: drm device pointer * @data: ioctl data blob * @file: drm file pointer @@ -533,7 +533,7 @@ ggtt_write(struct io_mapping *mapping, } /** - * This is the fast pwrite path, where we copy the data directly from the + * i915_gem_gtt_pwrite_fast - This is the fast pwrite path, where we copy the data directly from the * user into the GTT, uncached. * @obj: i915 GEM object * @args: pwrite arguments structure @@ -723,7 +723,7 @@ err_unlock: } /** - * Writes data to the object referenced by handle. + * i915_gem_pwrite_ioctl - Writes data to the object referenced by handle. * @dev: drm device * @data: ioctl data blob * @file: drm file @@ -808,7 +808,7 @@ err: } /** - * Called when user space has done writes to this buffer + * i915_gem_sw_finish_ioctl - Called when user space has done writes to this buffer * @dev: drm device * @data: ioctl data blob * @file: drm file -- cgit v1.2.3 From 2447c731fe55a36accdd7aff96670d69c06c2372 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 31 Mar 2023 10:26:03 +0100 Subject: drm/i915/gt/uc/intel_guc_hwconfig: Demote a few non-conforming kerneldoc headers Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c:112: warning: Function parameter or member 'gt' not described in 'guc_hwconfig_init' drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c:112: warning: expecting prototype for intel_guc_hwconfig_init(). Prototype was for guc_hwconfig_init() instead drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c:145: warning: Function parameter or member 'gt' not described in 'intel_gt_init_hwconfig' drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c:158: warning: Function parameter or member 'gt' not described in 'intel_gt_fini_hwconfig' Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: David Airlie Cc: Daniel Vetter Cc: John Harrison Cc: Matt Roper Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Lee Jones Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20230331092607.700644-16-lee@kernel.org --- drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c index 4781fccc2687..852bea0208ce 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c @@ -102,7 +102,7 @@ static bool has_table(struct drm_i915_private *i915) return false; } -/** +/* * intel_guc_hwconfig_init - Initialize the HWConfig * * Retrieve the HWConfig table from the GuC and save it locally. @@ -136,7 +136,7 @@ static int guc_hwconfig_init(struct intel_gt *gt) return 0; } -/** +/* * intel_gt_init_hwconfig - Initialize the HWConfig if available * * Retrieve the HWConfig table if available on the current platform. @@ -149,7 +149,7 @@ int intel_gt_init_hwconfig(struct intel_gt *gt) return guc_hwconfig_init(gt); } -/** +/* * intel_gt_fini_hwconfig - Finalize the HWConfig * * Free up the memory allocation holding the table. -- cgit v1.2.3 From a915450e0e44e9ed2a87fc5b3208d5ce01554a8a Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 31 Mar 2023 10:26:04 +0100 Subject: drm/i915/i915_vma: Provide one missing param and demote another non-kerneldoc header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/i915/i915_vma.c:756: warning: Function parameter or member 'ww' not described in 'i915_vma_insert' drivers/gpu/drm/i915/i915_vma.c:1744: warning: Function parameter or member 'vma' not described in 'i915_vma_destroy_locked' Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: David Airlie Cc: Daniel Vetter Cc: Sumit Semwal Cc: "Christian König" Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Signed-off-by: Lee Jones Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20230331092607.700644-17-lee@kernel.org --- drivers/gpu/drm/i915/i915_vma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 5272e2be990e..b9455f9441c4 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -738,6 +738,7 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color) /** * i915_vma_insert - finds a slot for the vma in its address space * @vma: the vma + * @ww: An optional struct i915_gem_ww_ctx * @size: requested size in bytes (can be larger than the VMA) * @alignment: required alignment * @flags: mask of PIN_* flags to use @@ -1713,7 +1714,7 @@ static void release_references(struct i915_vma *vma, struct intel_gt *gt, i915_vma_free(vma); } -/** +/* * i915_vma_destroy_locked - Remove all weak reference to the vma and put * the initial reference. * -- cgit v1.2.3 From aa7b93eb94ad6d883016bffda670e028fe168051 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 31 Mar 2023 16:16:36 +0200 Subject: drm/i915/gt: Hold a wakeref for the active VM There may be a disconnect between the GT used by the engine and the GT used for the VM, requiring us to hold a wakeref on both while the GPU is active with this request. v2: added explanation to __queue_and_release_pm Signed-off-by: Chris Wilson [ahajda: removed not-yet-upstremed wakeref tracking bits] Signed-off-by: Andrzej Hajda Reviewed-by: Tvrtko Ursulin Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230330-hold_wakeref_for_active_vm-v2-1-724d201499c2@intel.com --- drivers/gpu/drm/i915/gt/intel_context.h | 15 +++++++++++---- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 9 +++++++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 0a8d553da3f4..48f888c3da08 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -14,6 +14,7 @@ #include "i915_drv.h" #include "intel_context_types.h" #include "intel_engine_types.h" +#include "intel_gt_pm.h" #include "intel_ring_types.h" #include "intel_timeline_types.h" #include "i915_trace.h" @@ -207,8 +208,11 @@ void intel_context_exit_engine(struct intel_context *ce); static inline void intel_context_enter(struct intel_context *ce) { lockdep_assert_held(&ce->timeline->mutex); - if (!ce->active_count++) - ce->ops->enter(ce); + if (ce->active_count++) + return; + + ce->ops->enter(ce); + intel_gt_pm_get(ce->vm->gt); } static inline void intel_context_mark_active(struct intel_context *ce) @@ -222,8 +226,11 @@ static inline void intel_context_exit(struct intel_context *ce) { lockdep_assert_held(&ce->timeline->mutex); GEM_BUG_ON(!ce->active_count); - if (!--ce->active_count) - ce->ops->exit(ce); + if (--ce->active_count) + return; + + intel_gt_pm_put_async(ce->vm->gt); + ce->ops->exit(ce); } static inline struct intel_context *intel_context_get(struct intel_context *ce) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index e971b153fda9..ee531a5c142c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -114,6 +114,15 @@ __queue_and_release_pm(struct i915_request *rq, ENGINE_TRACE(engine, "parking\n"); + /* + * Open coded one half of intel_context_enter, which we have to omit + * here (see the large comment below) and because the other part must + * not be called due constructing directly with __i915_request_create + * which increments active count via intel_context_mark_active. + */ + GEM_BUG_ON(rq->context->active_count != 1); + __intel_gt_pm_get(engine->gt); + /* * We have to serialise all potential retirement paths with our * submission, as we don't want to underflow either the -- cgit v1.2.3 From 4b51210f98c2b89ce37aede5b8dc5105be0572c6 Mon Sep 17 00:00:00 2001 From: Haridhar Kalvala Date: Tue, 4 Apr 2023 23:02:20 +0530 Subject: drm/i915/mtl: Add Wa_14017856879 Wa_14017856879 implementation for mtl. Bspec: 46046 Signed-off-by: Haridhar Kalvala Reviewed-by: Gustavo Sousa Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20230404173220.3175577-1-haridhar.kalvala@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 2 ++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 39593467e1a5..6115cc1805de 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1179,7 +1179,9 @@ #define THREAD_EX_ARB_MODE_RR_AFTER_DEP REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2) #define HSW_ROW_CHICKEN3 _MMIO(0xe49c) +#define GEN9_ROW_CHICKEN3 MCR_REG(0xe49c) #define HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE (1 << 6) +#define MTL_DISABLE_FIX_FOR_EOT_FLUSH REG_BIT(9) #define GEN8_ROW_CHICKEN MCR_REG(0xe4f0) #define FLOW_CONTROL_ENABLE REG_BIT(15) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 642e57e6e3b4..c0de5276b3d3 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -3051,6 +3051,11 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li add_render_compute_tuning_settings(i915, wal); + if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) || + IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER)) + /* Wa_14017856879 */ + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH); + if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) /* -- cgit v1.2.3