From 8ad0152afb1bb3878bba282308f037d73a87ace5 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 27 Jul 2022 19:20:27 -0700 Subject: drm/i915/guc: Make GuC log sizes runtime configurable The GuC log buffer sizes had to be configured statically at compile time. This can be quite troublesome when needing to get larger logs out of a released driver. So re-organise the code to allow a boot time module parameter override. Signed-off-by: John Harrison Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20220728022028.2190627-7-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 53 ++------ drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c | 14 +- drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 176 +++++++++++++++++++++++-- drivers/gpu/drm/i915/gt/uc/intel_guc_log.h | 42 +++--- drivers/gpu/drm/i915/i915_params.c | 12 ++ drivers/gpu/drm/i915/i915_params.h | 3 + 6 files changed, 226 insertions(+), 74 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index ab4aacc516aa..01f2705cb94a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -224,53 +224,22 @@ static u32 guc_ctl_feature_flags(struct intel_guc *guc) static u32 guc_ctl_log_params_flags(struct intel_guc *guc) { - u32 offset = intel_guc_ggtt_offset(guc, guc->log.vma) >> PAGE_SHIFT; - u32 flags; - - #if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0) - #define LOG_UNIT SZ_1M - #define LOG_FLAG GUC_LOG_LOG_ALLOC_UNITS - #else - #define LOG_UNIT SZ_4K - #define LOG_FLAG 0 - #endif - - #if (((CAPTURE_BUFFER_SIZE) % SZ_1M) == 0) - #define CAPTURE_UNIT SZ_1M - #define CAPTURE_FLAG GUC_LOG_CAPTURE_ALLOC_UNITS - #else - #define CAPTURE_UNIT SZ_4K - #define CAPTURE_FLAG 0 - #endif - - BUILD_BUG_ON(!CRASH_BUFFER_SIZE); - BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, LOG_UNIT)); - BUILD_BUG_ON(!DEBUG_BUFFER_SIZE); - BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, LOG_UNIT)); - BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE); - BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT)); - - BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) > - (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT)); - BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) > - (GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT)); - BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) > - (GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT)); + struct intel_guc_log *log = &guc->log; + u32 offset, flags; + + GEM_BUG_ON(!log->sizes_initialised); + + offset = intel_guc_ggtt_offset(guc, log->vma) >> PAGE_SHIFT; flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | - CAPTURE_FLAG | - LOG_FLAG | - ((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) | - ((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) | - ((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) << GUC_LOG_CAPTURE_SHIFT) | + log->sizes[GUC_LOG_SECTIONS_DEBUG].flag | + log->sizes[GUC_LOG_SECTIONS_CAPTURE].flag | + (log->sizes[GUC_LOG_SECTIONS_CRASH].count << GUC_LOG_CRASH_SHIFT) | + (log->sizes[GUC_LOG_SECTIONS_DEBUG].count << GUC_LOG_DEBUG_SHIFT) | + (log->sizes[GUC_LOG_SECTIONS_CAPTURE].count << GUC_LOG_CAPTURE_SHIFT) | (offset << GUC_LOG_BUF_ADDR_SHIFT); - #undef LOG_UNIT - #undef LOG_FLAG - #undef CAPTURE_UNIT - #undef CAPTURE_FLAG - return flags; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index b54b7883320b..d2ac53d4f3b6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -656,16 +656,17 @@ static void check_guc_capture_size(struct intel_guc *guc) struct drm_i915_private *i915 = guc_to_gt(guc)->i915; int min_size = guc_capture_output_min_size_est(guc); int spare_size = min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER; + u32 buffer_size = intel_guc_log_section_size_capture(&guc->log); if (min_size < 0) drm_warn(&i915->drm, "Failed to calculate GuC error state capture buffer minimum size: %d!\n", min_size); - else if (min_size > CAPTURE_BUFFER_SIZE) + else if (min_size > buffer_size) drm_warn(&i915->drm, "GuC error state capture buffer is too small: %d < %d\n", - CAPTURE_BUFFER_SIZE, min_size); - else if (spare_size > CAPTURE_BUFFER_SIZE) + buffer_size, min_size); + else if (spare_size > buffer_size) drm_notice(&i915->drm, "GuC error state capture buffer maybe too small: %d < %d (min = %d)\n", - CAPTURE_BUFFER_SIZE, spare_size, min_size); + buffer_size, spare_size, min_size); } /* @@ -1294,7 +1295,8 @@ static void __guc_capture_process_output(struct intel_guc *guc) log_buf_state = guc->log.buf_addr + (sizeof(struct guc_log_buffer_state) * GUC_CAPTURE_LOG_BUFFER); - src_data = guc->log.buf_addr + intel_guc_get_log_buffer_offset(GUC_CAPTURE_LOG_BUFFER); + src_data = guc->log.buf_addr + + intel_guc_get_log_buffer_offset(&guc->log, GUC_CAPTURE_LOG_BUFFER); /* * Make a copy of the state structure, inside GuC log buffer @@ -1302,7 +1304,7 @@ static void __guc_capture_process_output(struct intel_guc *guc) * from it multiple times. */ memcpy(&log_buf_state_local, log_buf_state, sizeof(struct guc_log_buffer_state)); - buffer_size = intel_guc_get_log_buffer_size(GUC_CAPTURE_LOG_BUFFER); + buffer_size = intel_guc_get_log_buffer_size(&guc->log, GUC_CAPTURE_LOG_BUFFER); read_offset = log_buf_state_local.read_ptr; write_offset = log_buf_state_local.sampled_write_ptr; full_count = log_buf_state_local.buffer_full_cnt; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 44e6e6853b38..3a2243b4ac9f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -13,8 +13,158 @@ #include "intel_guc_capture.h" #include "intel_guc_log.h" +#if defined(CONFIG_DRM_I915_DEBUG_GUC) +#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_2M +#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_16M +#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_4M +#elif defined(CONFIG_DRM_I915_DEBUG_GEM) +#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_1M +#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_2M +#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_4M +#else +#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_8K +#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_64K +#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_2M +#endif + static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log); +struct guc_log_section { + u32 max; + u32 flag; + u32 default_val; + const char *name; +}; + +static s32 scale_log_param(struct intel_guc_log *log, const struct guc_log_section *section, + s32 param) +{ + /* -1 means default */ + if (param < 0) + return section->default_val; + + /* Check for 32-bit overflow */ + if (param >= SZ_4K) { + drm_err(&guc_to_gt(log_to_guc(log))->i915->drm, "Size too large for GuC %s log: %dMB!", + section->name, param); + return section->default_val; + } + + /* Param units are 1MB */ + return param * SZ_1M; +} + +static void _guc_log_init_sizes(struct intel_guc_log *log) +{ + struct intel_guc *guc = log_to_guc(log); + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + static const struct guc_log_section sections[GUC_LOG_SECTIONS_LIMIT] = { + { + GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT, + GUC_LOG_LOG_ALLOC_UNITS, + GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE, + "crash dump" + }, + { + GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT, + GUC_LOG_LOG_ALLOC_UNITS, + GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE, + "debug", + }, + { + GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT, + GUC_LOG_CAPTURE_ALLOC_UNITS, + GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE, + "capture", + } + }; + s32 params[GUC_LOG_SECTIONS_LIMIT] = { + i915->params.guc_log_size_crash, + i915->params.guc_log_size_debug, + i915->params.guc_log_size_capture, + }; + int i; + + for (i = 0; i < GUC_LOG_SECTIONS_LIMIT; i++) + log->sizes[i].bytes = scale_log_param(log, sections + i, params[i]); + + /* If debug size > 1MB then bump default crash size to keep the same units */ + if (log->sizes[GUC_LOG_SECTIONS_DEBUG].bytes >= SZ_1M && + (i915->params.guc_log_size_crash == -1) && + GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE < SZ_1M) + log->sizes[GUC_LOG_SECTIONS_CRASH].bytes = SZ_1M; + + /* Prepare the GuC API structure fields: */ + for (i = 0; i < GUC_LOG_SECTIONS_LIMIT; i++) { + /* Convert to correct units */ + if ((log->sizes[i].bytes % SZ_1M) == 0) { + log->sizes[i].units = SZ_1M; + log->sizes[i].flag = sections[i].flag; + } else { + log->sizes[i].units = SZ_4K; + log->sizes[i].flag = 0; + } + + if (!IS_ALIGNED(log->sizes[i].bytes, log->sizes[i].units)) + drm_err(&i915->drm, "Mis-aligned GuC log %s size: 0x%X vs 0x%X!", + sections[i].name, log->sizes[i].bytes, log->sizes[i].units); + log->sizes[i].count = log->sizes[i].bytes / log->sizes[i].units; + + if (!log->sizes[i].count) { + drm_err(&i915->drm, "Zero GuC log %s size!", sections[i].name); + } else { + /* Size is +1 unit */ + log->sizes[i].count--; + } + + /* Clip to field size */ + if (log->sizes[i].count > sections[i].max) { + drm_err(&i915->drm, "GuC log %s size too large: %d vs %d!", + sections[i].name, log->sizes[i].count + 1, sections[i].max + 1); + log->sizes[i].count = sections[i].max; + } + } + + if (log->sizes[GUC_LOG_SECTIONS_CRASH].units != log->sizes[GUC_LOG_SECTIONS_DEBUG].units) { + drm_err(&i915->drm, "Unit mis-match for GuC log crash and debug sections: %d vs %d!", + log->sizes[GUC_LOG_SECTIONS_CRASH].units, + log->sizes[GUC_LOG_SECTIONS_DEBUG].units); + log->sizes[GUC_LOG_SECTIONS_CRASH].units = log->sizes[GUC_LOG_SECTIONS_DEBUG].units; + log->sizes[GUC_LOG_SECTIONS_CRASH].count = 0; + } + + log->sizes_initialised = true; +} + +static void guc_log_init_sizes(struct intel_guc_log *log) +{ + if (log->sizes_initialised) + return; + + _guc_log_init_sizes(log); +} + +static u32 intel_guc_log_section_size_crash(struct intel_guc_log *log) +{ + guc_log_init_sizes(log); + + return log->sizes[GUC_LOG_SECTIONS_CRASH].bytes; +} + +static u32 intel_guc_log_section_size_debug(struct intel_guc_log *log) +{ + guc_log_init_sizes(log); + + return log->sizes[GUC_LOG_SECTIONS_DEBUG].bytes; +} + +u32 intel_guc_log_section_size_capture(struct intel_guc_log *log) +{ + guc_log_init_sizes(log); + + return log->sizes[GUC_LOG_SECTIONS_CAPTURE].bytes; +} + static u32 intel_guc_log_size(struct intel_guc_log *log) { /* @@ -38,7 +188,10 @@ static u32 intel_guc_log_size(struct intel_guc_log *log) * | Capture logs | * +===============================+ + CAPTURE_SIZE */ - return PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE + CAPTURE_BUFFER_SIZE; + return PAGE_SIZE + + intel_guc_log_section_size_crash(log) + + intel_guc_log_section_size_debug(log) + + intel_guc_log_section_size_capture(log); } /** @@ -165,7 +318,8 @@ static void guc_move_to_next_buf(struct intel_guc_log *log) smp_wmb(); /* All data has been written, so now move the offset of sub buffer. */ - relay_reserve(log->relay.channel, log->vma->obj->base.size - CAPTURE_BUFFER_SIZE); + relay_reserve(log->relay.channel, log->vma->obj->base.size - + intel_guc_log_section_size_capture(log)); /* Switch to the next sub buffer */ relay_flush(log->relay.channel); @@ -210,15 +364,16 @@ bool intel_guc_check_log_buf_overflow(struct intel_guc_log *log, return overflow; } -unsigned int intel_guc_get_log_buffer_size(enum guc_log_buffer_type type) +unsigned int intel_guc_get_log_buffer_size(struct intel_guc_log *log, + enum guc_log_buffer_type type) { switch (type) { case GUC_DEBUG_LOG_BUFFER: - return DEBUG_BUFFER_SIZE; + return intel_guc_log_section_size_debug(log); case GUC_CRASH_DUMP_LOG_BUFFER: - return CRASH_BUFFER_SIZE; + return intel_guc_log_section_size_crash(log); case GUC_CAPTURE_LOG_BUFFER: - return CAPTURE_BUFFER_SIZE; + return intel_guc_log_section_size_capture(log); default: MISSING_CASE(type); } @@ -226,7 +381,8 @@ unsigned int intel_guc_get_log_buffer_size(enum guc_log_buffer_type type) return 0; } -size_t intel_guc_get_log_buffer_offset(enum guc_log_buffer_type type) +size_t intel_guc_get_log_buffer_offset(struct intel_guc_log *log, + enum guc_log_buffer_type type) { enum guc_log_buffer_type i; size_t offset = PAGE_SIZE;/* for the log_buffer_states */ @@ -234,7 +390,7 @@ size_t intel_guc_get_log_buffer_offset(enum guc_log_buffer_type type) for (i = GUC_DEBUG_LOG_BUFFER; i < GUC_MAX_LOG_BUFFER; ++i) { if (i == type) break; - offset += intel_guc_get_log_buffer_size(i); + offset += intel_guc_get_log_buffer_size(log, i); } return offset; @@ -285,7 +441,7 @@ static void _guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log) */ memcpy(&log_buf_state_local, log_buf_state, sizeof(struct guc_log_buffer_state)); - buffer_size = intel_guc_get_log_buffer_size(type); + buffer_size = intel_guc_get_log_buffer_size(log, type); read_offset = log_buf_state_local.read_ptr; write_offset = log_buf_state_local.sampled_write_ptr; full_cnt = log_buf_state_local.buffer_full_cnt; @@ -400,7 +556,7 @@ static int guc_log_relay_create(struct intel_guc_log *log) * Keep the size of sub buffers same as shared log buffer * but GuC log-events excludes the error-state-capture logs */ - subbuf_size = log->vma->size - CAPTURE_BUFFER_SIZE; + subbuf_size = log->vma->size - intel_guc_log_section_size_capture(log); /* * Store up to 8 snapshots, which is large enough to buffer sufficient diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index dc9715411d62..02127703be80 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -15,20 +15,6 @@ struct intel_guc; -#if defined(CONFIG_DRM_I915_DEBUG_GUC) -#define CRASH_BUFFER_SIZE SZ_2M -#define DEBUG_BUFFER_SIZE SZ_16M -#define CAPTURE_BUFFER_SIZE SZ_4M -#elif defined(CONFIG_DRM_I915_DEBUG_GEM) -#define CRASH_BUFFER_SIZE SZ_1M -#define DEBUG_BUFFER_SIZE SZ_2M -#define CAPTURE_BUFFER_SIZE SZ_4M -#else -#define CRASH_BUFFER_SIZE SZ_8K -#define DEBUG_BUFFER_SIZE SZ_64K -#define CAPTURE_BUFFER_SIZE SZ_2M -#endif - /* * While we're using plain log level in i915, GuC controls are much more... * "elaborate"? We have a couple of bits for verbosity, separate bit for actual @@ -46,10 +32,30 @@ struct intel_guc; #define GUC_VERBOSITY_TO_LOG_LEVEL(x) ((x) + 2) #define GUC_LOG_LEVEL_MAX GUC_VERBOSITY_TO_LOG_LEVEL(GUC_LOG_VERBOSITY_MAX) +enum { + GUC_LOG_SECTIONS_CRASH, + GUC_LOG_SECTIONS_DEBUG, + GUC_LOG_SECTIONS_CAPTURE, + GUC_LOG_SECTIONS_LIMIT +}; + struct intel_guc_log { u32 level; + + /* Allocation settings */ + struct { + s32 bytes; /* Size in bytes */ + s32 units; /* GuC API units - 1MB or 4KB */ + s32 count; /* Number of API units */ + u32 flag; /* GuC API units flag */ + } sizes[GUC_LOG_SECTIONS_LIMIT]; + bool sizes_initialised; + + /* Combined buffer allocation */ struct i915_vma *vma; void *buf_addr; + + /* RelayFS support */ struct { bool buf_in_use; bool started; @@ -58,6 +64,7 @@ struct intel_guc_log { struct mutex lock; u32 full_count; } relay; + /* logging related stats */ struct { u32 sampled_overflow; @@ -69,8 +76,9 @@ struct intel_guc_log { void intel_guc_log_init_early(struct intel_guc_log *log); bool intel_guc_check_log_buf_overflow(struct intel_guc_log *log, enum guc_log_buffer_type type, unsigned int full_cnt); -unsigned int intel_guc_get_log_buffer_size(enum guc_log_buffer_type type); -size_t intel_guc_get_log_buffer_offset(enum guc_log_buffer_type type); +unsigned int intel_guc_get_log_buffer_size(struct intel_guc_log *log, + enum guc_log_buffer_type type); +size_t intel_guc_get_log_buffer_offset(struct intel_guc_log *log, enum guc_log_buffer_type type); int intel_guc_log_create(struct intel_guc_log *log); void intel_guc_log_destroy(struct intel_guc_log *log); @@ -92,4 +100,6 @@ void intel_guc_log_info(struct intel_guc_log *log, struct drm_printer *p); int intel_guc_log_dump(struct intel_guc_log *log, struct drm_printer *p, bool dump_load_err); +u32 intel_guc_log_section_size_capture(struct intel_guc_log *log); + #endif diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 6fc475a5db61..06ca5b822111 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -171,6 +171,18 @@ i915_param_named(guc_log_level, int, 0400, "GuC firmware logging level. Requires GuC to be loaded. " "(-1=auto [default], 0=disable, 1..4=enable with verbosity min..max)"); +i915_param_named(guc_log_size_crash, int, 0400, + "GuC firmware logging buffer size for crash dumps (in MB)" + "(-1=auto [default], NB: max = 4, other restrictions apply)"); + +i915_param_named(guc_log_size_debug, int, 0400, + "GuC firmware logging buffer size for debug logs (in MB)" + "(-1=auto [default], NB: max = 16, other restrictions apply)"); + +i915_param_named(guc_log_size_capture, int, 0400, + "GuC error capture register dump buffer size (in MB)" + "(-1=auto [default], NB: max = 4, other restrictions apply)"); + i915_param_named_unsafe(guc_firmware_path, charp, 0400, "GuC firmware path to use instead of the default one"); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 2733cb6cfe09..f684d1ab8707 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -61,6 +61,9 @@ struct drm_printer; param(int, invert_brightness, 0, 0600) \ param(int, enable_guc, -1, 0400) \ param(int, guc_log_level, -1, 0400) \ + param(int, guc_log_size_crash, -1, 0400) \ + param(int, guc_log_size_debug, -1, 0400) \ + param(int, guc_log_size_capture, -1, 0400) \ param(char *, guc_firmware_path, NULL, 0400) \ param(char *, huc_firmware_path, NULL, 0400) \ param(char *, dmc_firmware_path, NULL, 0400) \ -- cgit v1.2.3 From b092e4a9d3e3335fdc5aa23a9444eeebfa81da34 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 27 Jul 2022 19:20:28 -0700 Subject: drm/i915/guc: Reduce spam from error capture Some debug code got left in when the GuC based register save for error capture was added. Remove that. Signed-off-by: John Harrison Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20220728022028.2190627-8-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c | 67 +++++++++++--------------- 1 file changed, 28 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index d2ac53d4f3b6..8f1165146013 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -1383,33 +1383,22 @@ guc_capture_reg_to_str(const struct intel_guc *guc, u32 owner, u32 type, return NULL; } -#ifdef CONFIG_DRM_I915_DEBUG_GUC -#define __out(a, ...) \ - do { \ - drm_warn((&(a)->i915->drm), __VA_ARGS__); \ - i915_error_printf((a), __VA_ARGS__); \ - } while (0) -#else -#define __out(a, ...) \ - i915_error_printf(a, __VA_ARGS__) -#endif - #define GCAP_PRINT_INTEL_ENG_INFO(ebuf, eng) \ do { \ - __out(ebuf, " i915-Eng-Name: %s command stream\n", \ - (eng)->name); \ - __out(ebuf, " i915-Eng-Inst-Class: 0x%02x\n", (eng)->class); \ - __out(ebuf, " i915-Eng-Inst-Id: 0x%02x\n", (eng)->instance); \ - __out(ebuf, " i915-Eng-LogicalMask: 0x%08x\n", \ - (eng)->logical_mask); \ + i915_error_printf(ebuf, " i915-Eng-Name: %s command stream\n", \ + (eng)->name); \ + i915_error_printf(ebuf, " i915-Eng-Inst-Class: 0x%02x\n", (eng)->class); \ + i915_error_printf(ebuf, " i915-Eng-Inst-Id: 0x%02x\n", (eng)->instance); \ + i915_error_printf(ebuf, " i915-Eng-LogicalMask: 0x%08x\n", \ + (eng)->logical_mask); \ } while (0) #define GCAP_PRINT_GUC_INST_INFO(ebuf, node) \ do { \ - __out(ebuf, " GuC-Engine-Inst-Id: 0x%08x\n", \ - (node)->eng_inst); \ - __out(ebuf, " GuC-Context-Id: 0x%08x\n", (node)->guc_id); \ - __out(ebuf, " LRCA: 0x%08x\n", (node)->lrca); \ + i915_error_printf(ebuf, " GuC-Engine-Inst-Id: 0x%08x\n", \ + (node)->eng_inst); \ + i915_error_printf(ebuf, " GuC-Context-Id: 0x%08x\n", (node)->guc_id); \ + i915_error_printf(ebuf, " LRCA: 0x%08x\n", (node)->lrca); \ } while (0) int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *ebuf, @@ -1441,57 +1430,57 @@ int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *ebuf, guc = &ee->engine->gt->uc.guc; - __out(ebuf, "global --- GuC Error Capture on %s command stream:\n", - ee->engine->name); + i915_error_printf(ebuf, "global --- GuC Error Capture on %s command stream:\n", + ee->engine->name); node = ee->guc_capture_node; if (!node) { - __out(ebuf, " No matching ee-node\n"); + i915_error_printf(ebuf, " No matching ee-node\n"); return 0; } - __out(ebuf, "Coverage: %s\n", grptype[node->is_partial]); + i915_error_printf(ebuf, "Coverage: %s\n", grptype[node->is_partial]); for (i = GUC_CAPTURE_LIST_TYPE_GLOBAL; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) { - __out(ebuf, " RegListType: %s\n", - datatype[i % GUC_CAPTURE_LIST_TYPE_MAX]); - __out(ebuf, " Owner-Id: %d\n", node->reginfo[i].vfid); + i915_error_printf(ebuf, " RegListType: %s\n", + datatype[i % GUC_CAPTURE_LIST_TYPE_MAX]); + i915_error_printf(ebuf, " Owner-Id: %d\n", node->reginfo[i].vfid); switch (i) { case GUC_CAPTURE_LIST_TYPE_GLOBAL: default: break; case GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS: - __out(ebuf, " GuC-Eng-Class: %d\n", node->eng_class); - __out(ebuf, " i915-Eng-Class: %d\n", - guc_class_to_engine_class(node->eng_class)); + i915_error_printf(ebuf, " GuC-Eng-Class: %d\n", node->eng_class); + i915_error_printf(ebuf, " i915-Eng-Class: %d\n", + guc_class_to_engine_class(node->eng_class)); break; case GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE: eng = intel_guc_lookup_engine(guc, node->eng_class, node->eng_inst); if (eng) GCAP_PRINT_INTEL_ENG_INFO(ebuf, eng); else - __out(ebuf, " i915-Eng-Lookup Fail!\n"); + i915_error_printf(ebuf, " i915-Eng-Lookup Fail!\n"); GCAP_PRINT_GUC_INST_INFO(ebuf, node); break; } numregs = node->reginfo[i].num_regs; - __out(ebuf, " NumRegs: %d\n", numregs); + i915_error_printf(ebuf, " NumRegs: %d\n", numregs); j = 0; while (numregs--) { regs = node->reginfo[i].regs; str = guc_capture_reg_to_str(guc, GUC_CAPTURE_LIST_INDEX_PF, i, node->eng_class, 0, regs[j].offset, &is_ext); if (!str) - __out(ebuf, " REG-0x%08x", regs[j].offset); + i915_error_printf(ebuf, " REG-0x%08x", regs[j].offset); else - __out(ebuf, " %s", str); + i915_error_printf(ebuf, " %s", str); if (is_ext) - __out(ebuf, "[%ld][%ld]", - FIELD_GET(GUC_REGSET_STEERING_GROUP, regs[j].flags), - FIELD_GET(GUC_REGSET_STEERING_INSTANCE, regs[j].flags)); - __out(ebuf, ": 0x%08x\n", regs[j].value); + i915_error_printf(ebuf, "[%ld][%ld]", + FIELD_GET(GUC_REGSET_STEERING_GROUP, regs[j].flags), + FIELD_GET(GUC_REGSET_STEERING_INSTANCE, regs[j].flags)); + i915_error_printf(ebuf, ": 0x%08x\n", regs[j].value); ++j; } } -- cgit v1.2.3 From b0f2eb942b8a449432267571d045613e35ada2de Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 8 Jul 2022 15:41:58 -0700 Subject: drm/i915/guc: skip scrub_ctbs selftest if reset is disabled The test needs GT reset to trigger the scrubbing logic, so we can only run it when reset is enabled. Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: Matthew Brost Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220708224158.929327-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/selftest_guc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c index 20e0c39259fb..e28518fe8b90 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -54,6 +54,9 @@ static int intel_guc_scrub_ctbs(void *arg) struct intel_engine_cs *engine; struct intel_context *ce; + if (!intel_has_gpu_reset(gt)) + return 0; + wakeref = intel_runtime_pm_get(gt->uncore->rpm); engine = intel_selftest_find_any_engine(gt); -- cgit v1.2.3 From f922fbb0f2ad1fd3e3186f39c46673419e6d9281 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 11 Aug 2022 14:08:12 -0700 Subject: drm/i915/guc: clear stalled request after a reset If the GuC CTs are full and we need to stall the request submission while waiting for space, we save the stalled request and where the stall occurred; when the CTs have space again we pick up the request submission from where we left off. If a full GT reset occurs, the state of all contexts is cleared and all non-guilty requests are unsubmitted, therefore we need to restart the stalled request submission from scratch. To make sure that we do so, clear the saved request after a reset. Fixes note: the patch that introduced the bug is in 5.15, but no officially supported platform had GuC submission enabled by default in that kernel, so the backport to that particular version (and only that one) can potentially be skipped. Fixes: 925dc1cf58ed ("drm/i915/guc: Implement GuC submission tasklet") Signed-off-by: Daniele Ceraolo Spurio Cc: Matthew Brost Cc: John Harrison Cc: # v5.15+ Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220811210812.3239621-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 0d17da77e787..0d56b615bf78 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -4002,6 +4002,13 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc) /* make sure all descriptors are clean... */ xa_destroy(&guc->context_lookup); + /* + * A reset might have occurred while we had a pending stalled request, + * so make sure we clean that up. + */ + guc->stalled_request = NULL; + guc->submission_stall_reason = STALL_NONE; + /* * Some contexts might have been pinned before we enabled GuC * submission, so we need to add them to the GuC bookeeping. -- cgit v1.2.3 From 61faec5fa66cbd1afcd5074f168f09529f8119bf Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 16 Aug 2022 19:05:10 -0700 Subject: drm/i915/selftests: Use correct selfest calls for live tests This will help in an upcoming patch where the live selftest wrappers are extended to do more. Signed-off-by: Matthew Brost Signed-off-by: Alan Previn Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220817020511.2180747-2-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c | 2 +- drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c | 2 +- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 2 +- drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/selftests/i915_perf.c | 2 +- drivers/gpu/drm/i915/selftests/i915_request.c | 2 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 13b088cc787e..a666d7e610f5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -434,5 +434,5 @@ int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_gem_coherency), }; - return i915_subtests(tests, i915); + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index 62c61af77a42..51ed824b020c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -476,5 +476,5 @@ int i915_gem_dmabuf_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_dmabuf_import_same_driver_lmem_smem), }; - return i915_subtests(tests, i915); + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 3ced9948a331..3cff08f04f6c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -1844,5 +1844,5 @@ int i915_gem_mman_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_mmap_gpu), }; - return i915_subtests(tests, i915); + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c index fe0a890775e2..bdf5bb40ccf1 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c @@ -95,5 +95,5 @@ int i915_gem_object_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_gem_huge), }; - return i915_subtests(tests, i915); + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index ab9f17fc85bc..fb5e61963479 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -2324,5 +2324,5 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) GEM_BUG_ON(offset_in_page(to_gt(i915)->ggtt->vm.total)); - return i915_subtests(tests, i915); + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c index 88db2e3d81d0..429c6d73b159 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf.c +++ b/drivers/gpu/drm/i915/selftests/i915_perf.c @@ -431,7 +431,7 @@ int i915_perf_live_selftests(struct drm_i915_private *i915) if (err) return err; - err = i915_subtests(tests, i915); + err = i915_live_subtests(tests, i915); destroy_empty_config(&i915->perf); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index ec05f578a698..818a4909c1f3 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -1821,7 +1821,7 @@ int i915_request_live_selftests(struct drm_i915_private *i915) if (intel_gt_is_wedged(to_gt(i915))) return 0; - return i915_subtests(tests, i915); + return i915_live_subtests(tests, i915); } static int switch_to_kernel_sync(struct intel_context *ce, int err) diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 6921ba128015..e3821398a5b0 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -1103,5 +1103,5 @@ int i915_vma_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_vma_remapped_gtt), }; - return i915_subtests(tests, i915); + return i915_live_subtests(tests, i915); } -- cgit v1.2.3 From 6a079903847cce1dd06345127d2a32f26d2cd9c6 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 16 Aug 2022 19:05:11 -0700 Subject: drm/i915/guc: Add delay to disable scheduling after pin count goes to zero Add a delay, configurable via debugfs (default 34ms), to disable scheduling of a context after the pin count goes to zero. Disable scheduling is a costly operation as it requires synchronizing with the GuC. So the idea is that a delay allows the user to resubmit something before doing this operation. This delay is only done if the context isn't closed and less than a given threshold (default is 3/4) of the guc_ids are in use. As temporary WA disable this feature for the selftests. Selftests are very timing sensitive and any change in timing can cause failure. A follow up patch will fixup the selftests to understand this delay. Alan Previn: Matt Brost first introduced this series back in Oct 2021. However no real world workload with measured performance impact was available to prove the intended results. Today, this series is being republished in response to a real world workload that benefited greatly from it along with measured performance improvement. Workload description: 36 containers were created on a DG2 device where each container was performing a combination of 720p 3d game rendering and 30fps video encoding. The workload density was configured in a way that guaranteed each container to ALWAYS be able to render and encode no less than 30fps with a predefined maximum render + encode latency time. That means the totality of all 36 containers and their workloads were not saturating the engines to their max (in order to maintain just enough headrooom to meet the min fps and max latencies of incoming container submissions). Problem statement: It was observed that the CPU core processing the i915 soft IRQ work was experiencing severe load. Using tracelogs and an instrumentation patch to count specific i915 IRQ events, it was confirmed that the majority of the CPU cycles were caused by the gen11_other_irq_handler() -> guc_irq_handler() code path. The vast majority of the cycles was determined to be processing a specific G2H IRQ: i.e. INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE. These IRQs are sent by GuC in response to i915 KMD sending H2G requests: INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET. Those H2G requests are sent whenever a context goes idle so that we can unpin the context from GuC. The high CPU utilization % symptom was limiting density scaling. Root Cause Analysis: Because the incoming execution buffers were spread across 36 different containers (each with multiple contexts) but the system in totality was NOT saturated to the max, it was assumed that each context was constantly idling between submissions. This was causing a thrashing of unpinning contexts from GuC at one moment, followed quickly by repinning them due to incoming workload the very next moment. These event-pairs were being triggered across multiple contexts per container, across all containers at the rate of > 30 times per sec per context. Metrics: When running this workload without this patch, we measured an average of ~69K INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE events every 10 seconds or ~10 million times over ~25+ mins. With this patch, the count reduced to ~480 every 10 seconds or about ~28K over ~10 mins. The improvement observed is ~99% for the average counts per 10 seconds. Signed-off-by: Matthew Brost Signed-off-by: Alan Previn Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20220817020511.2180747-3-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/gt/intel_context.h | 8 ++ drivers/gpu/drm/i915/gt/intel_context_types.h | 7 + drivers/gpu/drm/i915/gt/uc/intel_guc.h | 17 ++- drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c | 60 +++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 154 ++++++++++++++++++---- drivers/gpu/drm/i915/i915_selftest.h | 2 + 7 files changed, 223 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index dabdfe09f5e5..df7fd1b019ec 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1454,7 +1454,7 @@ static void engines_idle_release(struct i915_gem_context *ctx, int err; /* serialises with execbuf */ - set_bit(CONTEXT_CLOSED_BIT, &ce->flags); + intel_context_close(ce); if (!intel_context_pin_if_active(ce)) continue; diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 8e2d70630c49..f96420f0b5bb 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -276,6 +276,14 @@ static inline bool intel_context_is_barrier(const struct intel_context *ce) return test_bit(CONTEXT_BARRIER_BIT, &ce->flags); } +static inline void intel_context_close(struct intel_context *ce) +{ + set_bit(CONTEXT_CLOSED_BIT, &ce->flags); + + if (ce->ops->close) + ce->ops->close(ce); +} + static inline bool intel_context_is_closed(const struct intel_context *ce) { return test_bit(CONTEXT_CLOSED_BIT, &ce->flags); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 04eacae1aca5..86ac84e2edb9 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -43,6 +43,8 @@ struct intel_context_ops { void (*revoke)(struct intel_context *ce, struct i915_request *rq, unsigned int preempt_timeout_ms); + void (*close)(struct intel_context *ce); + int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr); int (*pin)(struct intel_context *ce, void *vaddr); void (*unpin)(struct intel_context *ce); @@ -208,6 +210,11 @@ struct intel_context { * each priority bucket */ u32 prio_count[GUC_CLIENT_PRIORITY_NUM]; + /** + * @sched_disable_delay: worker to disable scheduling on this + * context + */ + struct delayed_work sched_disable_delay; } guc_state; struct { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 804133df1ac9..944b549b8797 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -112,6 +112,10 @@ struct intel_guc { * refs */ struct list_head guc_id_list; + /** + * @guc_ids_in_use: Number single-lrc guc_ids in use + */ + u16 guc_ids_in_use; /** * @destroyed_contexts: list of contexts waiting to be destroyed * (deregistered with the GuC) @@ -132,6 +136,16 @@ struct intel_guc { * @reset_fail_mask: mask of engines that failed to reset */ intel_engine_mask_t reset_fail_mask; + /** + * @sched_disable_delay_ms: schedule disable delay, in ms, for + * contexts + */ + u64 sched_disable_delay_ms; + /** + * @sched_disable_gucid_threshold: threshold of min remaining available + * guc_ids before we start bypassing the schedule disable delay + */ + int sched_disable_gucid_threshold; } submission_state; /** @@ -461,9 +475,10 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc); void intel_guc_submission_cancel_requests(struct intel_guc *guc); void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); +void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p); void intel_guc_write_barrier(struct intel_guc *guc); -void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p); +int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c index 25f09a420561..c91b150bb7ac 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c @@ -71,12 +71,72 @@ static bool intel_eval_slpc_support(void *data) return intel_guc_slpc_is_used(guc); } +static int guc_sched_disable_delay_ms_get(void *data, u64 *val) +{ + struct intel_guc *guc = data; + + if (!intel_guc_submission_is_used(guc)) + return -ENODEV; + + *val = guc->submission_state.sched_disable_delay_ms; + + return 0; +} + +static int guc_sched_disable_delay_ms_set(void *data, u64 val) +{ + struct intel_guc *guc = data; + + if (!intel_guc_submission_is_used(guc)) + return -ENODEV; + + guc->submission_state.sched_disable_delay_ms = val; + + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_delay_ms_fops, + guc_sched_disable_delay_ms_get, + guc_sched_disable_delay_ms_set, "%lld\n"); + +static int guc_sched_disable_gucid_threshold_get(void *data, u64 *val) +{ + struct intel_guc *guc = data; + + if (!intel_guc_submission_is_used(guc)) + return -ENODEV; + + *val = guc->submission_state.sched_disable_gucid_threshold; + return 0; +} + +static int guc_sched_disable_gucid_threshold_set(void *data, u64 val) +{ + struct intel_guc *guc = data; + + if (!intel_guc_submission_is_used(guc)) + return -ENODEV; + + if (val > intel_guc_sched_disable_gucid_threshold_max(guc)) + guc->submission_state.sched_disable_gucid_threshold = + intel_guc_sched_disable_gucid_threshold_max(guc); + else + guc->submission_state.sched_disable_gucid_threshold = val; + + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_gucid_threshold_fops, + guc_sched_disable_gucid_threshold_get, + guc_sched_disable_gucid_threshold_set, "%lld\n"); + void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root) { static const struct intel_gt_debugfs_file files[] = { { "guc_info", &guc_info_fops, NULL }, { "guc_registered_contexts", &guc_registered_contexts_fops, NULL }, { "guc_slpc_info", &guc_slpc_info_fops, &intel_eval_slpc_support}, + { "guc_sched_disable_delay_ms", &guc_sched_disable_delay_ms_fops, NULL }, + { "guc_sched_disable_gucid_threshold", &guc_sched_disable_gucid_threshold_fops, + NULL }, }; if (!intel_guc_is_supported(guc)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 0d56b615bf78..a0cebb4590e9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -65,7 +65,13 @@ * corresponding G2H returns indicating the scheduling disable operation has * completed it is safe to unpin the context. While a disable is in flight it * isn't safe to resubmit the context so a fence is used to stall all future - * requests of that context until the G2H is returned. + * requests of that context until the G2H is returned. Because this interaction + * with the GuC takes a non-zero amount of time we delay the disabling of + * scheduling after the pin count goes to zero by a configurable period of time + * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of + * time to resubmit something on the context before doing this costly operation. + * This delay is only done if the context isn't closed and the guc_id usage is + * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD). * * Context deregistration: * Before a context can be destroyed or if we steal its guc_id we must @@ -1989,6 +1995,9 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) if (unlikely(ret < 0)) return ret; + if (!intel_context_is_parent(ce)) + ++guc->submission_state.guc_ids_in_use; + ce->guc_id.id = ret; return 0; } @@ -1998,14 +2007,16 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) GEM_BUG_ON(intel_context_is_child(ce)); if (!context_guc_id_invalid(ce)) { - if (intel_context_is_parent(ce)) + if (intel_context_is_parent(ce)) { bitmap_release_region(guc->submission_state.guc_ids_bitmap, ce->guc_id.id, order_base_2(ce->parallel.number_children + 1)); - else + } else { + --guc->submission_state.guc_ids_in_use; ida_simple_remove(&guc->submission_state.guc_ids, ce->guc_id.id); + } clr_ctx_id_mapping(guc, ce->guc_id.id); set_context_guc_id_invalid(ce); } @@ -2993,41 +3004,98 @@ guc_context_revoke(struct intel_context *ce, struct i915_request *rq, } } -static void guc_context_sched_disable(struct intel_context *ce) +static void guc_context_sched_disable(struct intel_context *ce); + +static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce, + unsigned long flags) + __releases(ce->guc_state.lock) { - struct intel_guc *guc = ce_to_guc(ce); - unsigned long flags; struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; intel_wakeref_t wakeref; - u16 guc_id; + lockdep_assert_held(&ce->guc_state.lock); + + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + with_intel_runtime_pm(runtime_pm, wakeref) + guc_context_sched_disable(ce); +} + +static bool bypass_sched_disable(struct intel_guc *guc, + struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); GEM_BUG_ON(intel_context_is_child(ce)); + if (submission_disabled(guc) || context_guc_id_invalid(ce) || + !ctx_id_mapped(guc, ce->guc_id.id)) { + clr_context_enabled(ce); + return true; + } + + return !context_enabled(ce); +} + +static void __delay_sched_disable(struct work_struct *wrk) +{ + struct intel_context *ce = + container_of(wrk, typeof(*ce), guc_state.sched_disable_delay.work); + struct intel_guc *guc = ce_to_guc(ce); + unsigned long flags; + spin_lock_irqsave(&ce->guc_state.lock, flags); - /* - * We have to check if the context has been disabled by another thread, - * check if submssion has been disabled to seal a race with reset and - * finally check if any more requests have been committed to the - * context ensursing that a request doesn't slip through the - * 'context_pending_disable' fence. - */ - if (unlikely(!context_enabled(ce) || submission_disabled(guc) || - context_has_committed_requests(ce))) { - clr_context_enabled(ce); + if (bypass_sched_disable(guc, ce)) { spin_unlock_irqrestore(&ce->guc_state.lock, flags); - goto unpin; + intel_context_sched_disable_unpin(ce); + } else { + do_sched_disable(guc, ce, flags); } - guc_id = prep_context_pending_disable(ce); +} - spin_unlock_irqrestore(&ce->guc_state.lock, flags); +static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce) +{ + /* + * parent contexts are perma-pinned, if we are unpinning do schedule + * disable immediately. + */ + if (intel_context_is_parent(ce)) + return true; - with_intel_runtime_pm(runtime_pm, wakeref) - __guc_context_sched_disable(guc, ce, guc_id); + /* + * If we are beyond the threshold for avail guc_ids, do schedule disable immediately. + */ + return guc->submission_state.guc_ids_in_use > + guc->submission_state.sched_disable_gucid_threshold; +} + +static void guc_context_sched_disable(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + u64 delay = guc->submission_state.sched_disable_delay_ms; + unsigned long flags; + + spin_lock_irqsave(&ce->guc_state.lock, flags); + + if (bypass_sched_disable(guc, ce)) { + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + intel_context_sched_disable_unpin(ce); + } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) && + delay) { + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + mod_delayed_work(system_unbound_wq, + &ce->guc_state.sched_disable_delay, + msecs_to_jiffies(delay)); + } else { + do_sched_disable(guc, ce, flags); + } +} - return; -unpin: - intel_context_sched_disable_unpin(ce); +static void guc_context_close(struct intel_context *ce) +{ + if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && + cancel_delayed_work(&ce->guc_state.sched_disable_delay)) + __delay_sched_disable(&ce->guc_state.sched_disable_delay.work); } static inline void guc_lrc_desc_unpin(struct intel_context *ce) @@ -3346,6 +3414,8 @@ static void remove_from_context(struct i915_request *rq) static const struct intel_context_ops guc_context_ops = { .alloc = guc_context_alloc, + .close = guc_context_close, + .pre_pin = guc_context_pre_pin, .pin = guc_context_pin, .unpin = guc_context_unpin, @@ -3428,6 +3498,10 @@ static void guc_context_init(struct intel_context *ce) rcu_read_unlock(); ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); + + INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay, + __delay_sched_disable); + set_bit(CONTEXT_GUC_INIT, &ce->flags); } @@ -3465,6 +3539,9 @@ static int guc_request_alloc(struct i915_request *rq) if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) guc_context_init(ce); + if (cancel_delayed_work(&ce->guc_state.sched_disable_delay)) + intel_context_sched_disable_unpin(ce); + /* * Call pin_guc_id here rather than in the pinning step as with * dma_resv, contexts can be repeatedly pinned / unpinned trashing the @@ -3595,6 +3672,8 @@ static int guc_virtual_context_alloc(struct intel_context *ce) static const struct intel_context_ops virtual_guc_context_ops = { .alloc = guc_virtual_context_alloc, + .close = guc_context_close, + .pre_pin = guc_virtual_context_pre_pin, .pin = guc_virtual_context_pin, .unpin = guc_virtual_context_unpin, @@ -3684,6 +3763,8 @@ static void guc_child_context_destroy(struct kref *kref) static const struct intel_context_ops virtual_parent_context_ops = { .alloc = guc_virtual_context_alloc, + .close = guc_context_close, + .pre_pin = guc_context_pre_pin, .pin = guc_parent_context_pin, .unpin = guc_parent_context_unpin, @@ -4214,6 +4295,26 @@ static bool __guc_submission_selected(struct intel_guc *guc) return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; } +int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc) +{ + return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc); +} + +/* + * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher + * workloads are able to enjoy the latency reduction when delaying the schedule-disable + * operation. This matches the 30fps game-render + encode (real world) workload this + * knob was tested against. + */ +#define SCHED_DISABLE_DELAY_MS 34 + +/* + * A threshold of 75% is a reasonable starting point considering that real world apps + * generally don't get anywhere near this. + */ +#define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \ + (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4) + void intel_guc_submission_init_early(struct intel_guc *guc) { xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); @@ -4230,7 +4331,10 @@ void intel_guc_submission_init_early(struct intel_guc *guc) spin_lock_init(&guc->timestamp.lock); INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); + guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS; guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; + guc->submission_state.sched_disable_gucid_threshold = + NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc); guc->submission_supported = __guc_submission_supported(guc); guc->submission_selected = __guc_submission_selected(guc); } diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h index f54de0499be7..bdf3e22c0a34 100644 --- a/drivers/gpu/drm/i915/i915_selftest.h +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -92,12 +92,14 @@ int __i915_subtests(const char *caller, T, ARRAY_SIZE(T), data) #define i915_live_subtests(T, data) ({ \ typecheck(struct drm_i915_private *, data); \ + (data)->gt[0]->uc.guc.submission_state.sched_disable_delay_ms = 0; \ __i915_subtests(__func__, \ __i915_live_setup, __i915_live_teardown, \ T, ARRAY_SIZE(T), data); \ }) #define intel_gt_live_subtests(T, data) ({ \ typecheck(struct intel_gt *, data); \ + (data)->uc.guc.submission_state.sched_disable_delay_ms = 0; \ __i915_subtests(__func__, \ __intel_gt_live_setup, __intel_gt_live_teardown, \ T, ARRAY_SIZE(T), data); \ -- cgit v1.2.3 From 54c204c522fd2a887b52c7672b9238903ba59a8b Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 19 Aug 2022 13:39:04 +0100 Subject: Revert "drm/i915/guc: Add delay to disable scheduling after pin count goes to zero" This reverts commit 6a079903847cce1dd06345127d2a32f26d2cd9c6. Everything in CI using GuC is now timing out[1], and killing the machine with this change (perhaps a deadlock?). CI was recently on fire due to some changes coming in from -rc1, so likely the pre-merge CI results for this series were invalid? For now just revert, unless GuC experts already have a fix in mind. [1] https://intel-gfx-ci.01.org/tree/drm-tip/index.html? Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: Alan Previn Cc: John Harrison Reviewed-by: John Harrison Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20220819123904.913750-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/gt/intel_context.h | 8 -- drivers/gpu/drm/i915/gt/intel_context_types.h | 7 - drivers/gpu/drm/i915/gt/uc/intel_guc.h | 17 +-- drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c | 60 --------- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 154 ++++------------------ drivers/gpu/drm/i915/i915_selftest.h | 2 - 7 files changed, 27 insertions(+), 223 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index df7fd1b019ec..dabdfe09f5e5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1454,7 +1454,7 @@ static void engines_idle_release(struct i915_gem_context *ctx, int err; /* serialises with execbuf */ - intel_context_close(ce); + set_bit(CONTEXT_CLOSED_BIT, &ce->flags); if (!intel_context_pin_if_active(ce)) continue; diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index f96420f0b5bb..8e2d70630c49 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -276,14 +276,6 @@ static inline bool intel_context_is_barrier(const struct intel_context *ce) return test_bit(CONTEXT_BARRIER_BIT, &ce->flags); } -static inline void intel_context_close(struct intel_context *ce) -{ - set_bit(CONTEXT_CLOSED_BIT, &ce->flags); - - if (ce->ops->close) - ce->ops->close(ce); -} - static inline bool intel_context_is_closed(const struct intel_context *ce) { return test_bit(CONTEXT_CLOSED_BIT, &ce->flags); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 86ac84e2edb9..04eacae1aca5 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -43,8 +43,6 @@ struct intel_context_ops { void (*revoke)(struct intel_context *ce, struct i915_request *rq, unsigned int preempt_timeout_ms); - void (*close)(struct intel_context *ce); - int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr); int (*pin)(struct intel_context *ce, void *vaddr); void (*unpin)(struct intel_context *ce); @@ -210,11 +208,6 @@ struct intel_context { * each priority bucket */ u32 prio_count[GUC_CLIENT_PRIORITY_NUM]; - /** - * @sched_disable_delay: worker to disable scheduling on this - * context - */ - struct delayed_work sched_disable_delay; } guc_state; struct { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 944b549b8797..804133df1ac9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -112,10 +112,6 @@ struct intel_guc { * refs */ struct list_head guc_id_list; - /** - * @guc_ids_in_use: Number single-lrc guc_ids in use - */ - u16 guc_ids_in_use; /** * @destroyed_contexts: list of contexts waiting to be destroyed * (deregistered with the GuC) @@ -136,16 +132,6 @@ struct intel_guc { * @reset_fail_mask: mask of engines that failed to reset */ intel_engine_mask_t reset_fail_mask; - /** - * @sched_disable_delay_ms: schedule disable delay, in ms, for - * contexts - */ - u64 sched_disable_delay_ms; - /** - * @sched_disable_gucid_threshold: threshold of min remaining available - * guc_ids before we start bypassing the schedule disable delay - */ - int sched_disable_gucid_threshold; } submission_state; /** @@ -475,10 +461,9 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc); void intel_guc_submission_cancel_requests(struct intel_guc *guc); void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); -void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p); void intel_guc_write_barrier(struct intel_guc *guc); -int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc); +void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c index c91b150bb7ac..25f09a420561 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c @@ -71,72 +71,12 @@ static bool intel_eval_slpc_support(void *data) return intel_guc_slpc_is_used(guc); } -static int guc_sched_disable_delay_ms_get(void *data, u64 *val) -{ - struct intel_guc *guc = data; - - if (!intel_guc_submission_is_used(guc)) - return -ENODEV; - - *val = guc->submission_state.sched_disable_delay_ms; - - return 0; -} - -static int guc_sched_disable_delay_ms_set(void *data, u64 val) -{ - struct intel_guc *guc = data; - - if (!intel_guc_submission_is_used(guc)) - return -ENODEV; - - guc->submission_state.sched_disable_delay_ms = val; - - return 0; -} -DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_delay_ms_fops, - guc_sched_disable_delay_ms_get, - guc_sched_disable_delay_ms_set, "%lld\n"); - -static int guc_sched_disable_gucid_threshold_get(void *data, u64 *val) -{ - struct intel_guc *guc = data; - - if (!intel_guc_submission_is_used(guc)) - return -ENODEV; - - *val = guc->submission_state.sched_disable_gucid_threshold; - return 0; -} - -static int guc_sched_disable_gucid_threshold_set(void *data, u64 val) -{ - struct intel_guc *guc = data; - - if (!intel_guc_submission_is_used(guc)) - return -ENODEV; - - if (val > intel_guc_sched_disable_gucid_threshold_max(guc)) - guc->submission_state.sched_disable_gucid_threshold = - intel_guc_sched_disable_gucid_threshold_max(guc); - else - guc->submission_state.sched_disable_gucid_threshold = val; - - return 0; -} -DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_gucid_threshold_fops, - guc_sched_disable_gucid_threshold_get, - guc_sched_disable_gucid_threshold_set, "%lld\n"); - void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root) { static const struct intel_gt_debugfs_file files[] = { { "guc_info", &guc_info_fops, NULL }, { "guc_registered_contexts", &guc_registered_contexts_fops, NULL }, { "guc_slpc_info", &guc_slpc_info_fops, &intel_eval_slpc_support}, - { "guc_sched_disable_delay_ms", &guc_sched_disable_delay_ms_fops, NULL }, - { "guc_sched_disable_gucid_threshold", &guc_sched_disable_gucid_threshold_fops, - NULL }, }; if (!intel_guc_is_supported(guc)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index a0cebb4590e9..0d56b615bf78 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -65,13 +65,7 @@ * corresponding G2H returns indicating the scheduling disable operation has * completed it is safe to unpin the context. While a disable is in flight it * isn't safe to resubmit the context so a fence is used to stall all future - * requests of that context until the G2H is returned. Because this interaction - * with the GuC takes a non-zero amount of time we delay the disabling of - * scheduling after the pin count goes to zero by a configurable period of time - * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of - * time to resubmit something on the context before doing this costly operation. - * This delay is only done if the context isn't closed and the guc_id usage is - * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD). + * requests of that context until the G2H is returned. * * Context deregistration: * Before a context can be destroyed or if we steal its guc_id we must @@ -1995,9 +1989,6 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) if (unlikely(ret < 0)) return ret; - if (!intel_context_is_parent(ce)) - ++guc->submission_state.guc_ids_in_use; - ce->guc_id.id = ret; return 0; } @@ -2007,16 +1998,14 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) GEM_BUG_ON(intel_context_is_child(ce)); if (!context_guc_id_invalid(ce)) { - if (intel_context_is_parent(ce)) { + if (intel_context_is_parent(ce)) bitmap_release_region(guc->submission_state.guc_ids_bitmap, ce->guc_id.id, order_base_2(ce->parallel.number_children + 1)); - } else { - --guc->submission_state.guc_ids_in_use; + else ida_simple_remove(&guc->submission_state.guc_ids, ce->guc_id.id); - } clr_ctx_id_mapping(guc, ce->guc_id.id); set_context_guc_id_invalid(ce); } @@ -3004,98 +2993,41 @@ guc_context_revoke(struct intel_context *ce, struct i915_request *rq, } } -static void guc_context_sched_disable(struct intel_context *ce); - -static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce, - unsigned long flags) - __releases(ce->guc_state.lock) +static void guc_context_sched_disable(struct intel_context *ce) { + struct intel_guc *guc = ce_to_guc(ce); + unsigned long flags; struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; intel_wakeref_t wakeref; + u16 guc_id; - lockdep_assert_held(&ce->guc_state.lock); - - spin_unlock_irqrestore(&ce->guc_state.lock, flags); - - with_intel_runtime_pm(runtime_pm, wakeref) - guc_context_sched_disable(ce); -} - -static bool bypass_sched_disable(struct intel_guc *guc, - struct intel_context *ce) -{ - lockdep_assert_held(&ce->guc_state.lock); GEM_BUG_ON(intel_context_is_child(ce)); - if (submission_disabled(guc) || context_guc_id_invalid(ce) || - !ctx_id_mapped(guc, ce->guc_id.id)) { - clr_context_enabled(ce); - return true; - } - - return !context_enabled(ce); -} - -static void __delay_sched_disable(struct work_struct *wrk) -{ - struct intel_context *ce = - container_of(wrk, typeof(*ce), guc_state.sched_disable_delay.work); - struct intel_guc *guc = ce_to_guc(ce); - unsigned long flags; - spin_lock_irqsave(&ce->guc_state.lock, flags); - if (bypass_sched_disable(guc, ce)) { - spin_unlock_irqrestore(&ce->guc_state.lock, flags); - intel_context_sched_disable_unpin(ce); - } else { - do_sched_disable(guc, ce, flags); - } -} - -static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce) -{ - /* - * parent contexts are perma-pinned, if we are unpinning do schedule - * disable immediately. - */ - if (intel_context_is_parent(ce)) - return true; - /* - * If we are beyond the threshold for avail guc_ids, do schedule disable immediately. + * We have to check if the context has been disabled by another thread, + * check if submssion has been disabled to seal a race with reset and + * finally check if any more requests have been committed to the + * context ensursing that a request doesn't slip through the + * 'context_pending_disable' fence. */ - return guc->submission_state.guc_ids_in_use > - guc->submission_state.sched_disable_gucid_threshold; -} - -static void guc_context_sched_disable(struct intel_context *ce) -{ - struct intel_guc *guc = ce_to_guc(ce); - u64 delay = guc->submission_state.sched_disable_delay_ms; - unsigned long flags; - - spin_lock_irqsave(&ce->guc_state.lock, flags); - - if (bypass_sched_disable(guc, ce)) { - spin_unlock_irqrestore(&ce->guc_state.lock, flags); - intel_context_sched_disable_unpin(ce); - } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) && - delay) { + if (unlikely(!context_enabled(ce) || submission_disabled(guc) || + context_has_committed_requests(ce))) { + clr_context_enabled(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); - mod_delayed_work(system_unbound_wq, - &ce->guc_state.sched_disable_delay, - msecs_to_jiffies(delay)); - } else { - do_sched_disable(guc, ce, flags); + goto unpin; } -} + guc_id = prep_context_pending_disable(ce); -static void guc_context_close(struct intel_context *ce) -{ - if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && - cancel_delayed_work(&ce->guc_state.sched_disable_delay)) - __delay_sched_disable(&ce->guc_state.sched_disable_delay.work); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + with_intel_runtime_pm(runtime_pm, wakeref) + __guc_context_sched_disable(guc, ce, guc_id); + + return; +unpin: + intel_context_sched_disable_unpin(ce); } static inline void guc_lrc_desc_unpin(struct intel_context *ce) @@ -3414,8 +3346,6 @@ static void remove_from_context(struct i915_request *rq) static const struct intel_context_ops guc_context_ops = { .alloc = guc_context_alloc, - .close = guc_context_close, - .pre_pin = guc_context_pre_pin, .pin = guc_context_pin, .unpin = guc_context_unpin, @@ -3498,10 +3428,6 @@ static void guc_context_init(struct intel_context *ce) rcu_read_unlock(); ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); - - INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay, - __delay_sched_disable); - set_bit(CONTEXT_GUC_INIT, &ce->flags); } @@ -3539,9 +3465,6 @@ static int guc_request_alloc(struct i915_request *rq) if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) guc_context_init(ce); - if (cancel_delayed_work(&ce->guc_state.sched_disable_delay)) - intel_context_sched_disable_unpin(ce); - /* * Call pin_guc_id here rather than in the pinning step as with * dma_resv, contexts can be repeatedly pinned / unpinned trashing the @@ -3672,8 +3595,6 @@ static int guc_virtual_context_alloc(struct intel_context *ce) static const struct intel_context_ops virtual_guc_context_ops = { .alloc = guc_virtual_context_alloc, - .close = guc_context_close, - .pre_pin = guc_virtual_context_pre_pin, .pin = guc_virtual_context_pin, .unpin = guc_virtual_context_unpin, @@ -3763,8 +3684,6 @@ static void guc_child_context_destroy(struct kref *kref) static const struct intel_context_ops virtual_parent_context_ops = { .alloc = guc_virtual_context_alloc, - .close = guc_context_close, - .pre_pin = guc_context_pre_pin, .pin = guc_parent_context_pin, .unpin = guc_parent_context_unpin, @@ -4295,26 +4214,6 @@ static bool __guc_submission_selected(struct intel_guc *guc) return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; } -int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc) -{ - return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc); -} - -/* - * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher - * workloads are able to enjoy the latency reduction when delaying the schedule-disable - * operation. This matches the 30fps game-render + encode (real world) workload this - * knob was tested against. - */ -#define SCHED_DISABLE_DELAY_MS 34 - -/* - * A threshold of 75% is a reasonable starting point considering that real world apps - * generally don't get anywhere near this. - */ -#define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \ - (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4) - void intel_guc_submission_init_early(struct intel_guc *guc) { xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); @@ -4331,10 +4230,7 @@ void intel_guc_submission_init_early(struct intel_guc *guc) spin_lock_init(&guc->timestamp.lock); INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); - guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS; guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; - guc->submission_state.sched_disable_gucid_threshold = - NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc); guc->submission_supported = __guc_submission_supported(guc); guc->submission_selected = __guc_submission_selected(guc); } diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h index bdf3e22c0a34..f54de0499be7 100644 --- a/drivers/gpu/drm/i915/i915_selftest.h +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -92,14 +92,12 @@ int __i915_subtests(const char *caller, T, ARRAY_SIZE(T), data) #define i915_live_subtests(T, data) ({ \ typecheck(struct drm_i915_private *, data); \ - (data)->gt[0]->uc.guc.submission_state.sched_disable_delay_ms = 0; \ __i915_subtests(__func__, \ __i915_live_setup, __i915_live_teardown, \ T, ARRAY_SIZE(T), data); \ }) #define intel_gt_live_subtests(T, data) ({ \ typecheck(struct intel_gt *, data); \ - (data)->uc.guc.submission_state.sched_disable_delay_ms = 0; \ __i915_subtests(__func__, \ __intel_gt_live_setup, __intel_gt_live_teardown, \ T, ARRAY_SIZE(T), data); \ -- cgit v1.2.3 From f0c70d41e4e8341651db7b75374bbff0b14dd310 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 19 Aug 2022 15:02:34 +0300 Subject: drm/i915/guc: remove runtime info printing from time stamp logging Commit 368d179adbac ("drm/i915/guc: Add GuC <-> kernel time stamp translation information") added intel_device_info_print_runtime() in the time info dump for no obvious reason or explanation in the commit message. It only logs the rawclk freq. Remove it. Cc: John Harrison Cc: Alan Previn Signed-off-by: Jani Nikula Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/b395ac4c909042f5daabf29959d8733993545aa2.1660910433.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 01f2705cb94a..24451d000a6a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -365,8 +365,6 @@ void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p) u32 stamp = 0; u64 ktime; - intel_device_info_print_runtime(RUNTIME_INFO(gt->i915), p); - with_intel_runtime_pm(>->i915->runtime_pm, wakeref) stamp = intel_uncore_read(gt->uncore, GUCPMTIMESTAMP); ktime = ktime_get_boottime_ns(); -- cgit v1.2.3 From 95ccf312a1e4f5a1150dd1a0a2d81c1043e33fb6 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Fri, 19 Aug 2022 18:08:32 -0700 Subject: drm/i915/guc/slpc: Allow SLPC to use efficient frequency Host Turbo operates at efficient frequency when GT is not idle unless the user or workload has forced it to a higher level. Replicate the same behavior in SLPC by allowing the algorithm to use efficient frequency. We had disabled it during boot due to concerns that it might break kernel ABI for min frequency. However, this is not the case since SLPC will still abide by the (min,max) range limits. With this change, min freq will be at efficient frequency level at init instead of fused min (RPn). If user chooses to reduce min freq below the efficient freq, we will turn off usage of efficient frequency and honor the user request. When a higher value is written, it will get toggled back again. The patch also corrects the register which needs to be read for obtaining the correct efficient frequency for Gen9+. We see much better perf numbers with benchmarks like glmark2 with efficient frequency usage enabled as expected. v2: Address review comments (Rodrigo) v3: with efficient frequency being dynamic, it is possible that the req frequency may go beyond max freq. This will cause SLPC selftests to fail. Add a FIXME there to start the test with [RPn, RP0] instead and restore it afterwards. BugLink: https://gitlab.freedesktop.org/drm/intel/-/issues/5468 Cc: Rodrigo Vivi Signed-off-by: Vinay Belgaumkar Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20220820010832.15350-1-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/intel_rps.c | 7 ++- drivers/gpu/drm/i915/gt/selftest_slpc.c | 9 ++++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 66 ++++++----------------------- drivers/gpu/drm/i915/intel_mchbar_regs.h | 3 ++ 4 files changed, 31 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index c7d381ad90cf..8c289a032103 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1107,7 +1107,12 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c caps->min_freq = (rp_state_cap >> 0) & 0xff; } else { caps->rp0_freq = (rp_state_cap >> 0) & 0xff; - caps->rp1_freq = (rp_state_cap >> 8) & 0xff; + if (GRAPHICS_VER(i915) >= 10) + caps->rp1_freq = REG_FIELD_GET(RPE_MASK, + intel_uncore_read(to_gt(i915)->uncore, + GEN10_FREQ_INFO_REC)); + else + caps->rp1_freq = (rp_state_cap >> 8) & 0xff; caps->min_freq = (rp_state_cap >> 16) & 0xff; } diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index ac29691e0b1a..f8a1d27df272 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -166,6 +166,15 @@ static int run_test(struct intel_gt *gt, int test_type) return -EIO; } + /* + * FIXME: With efficient frequency enabled, GuC can request + * frequencies higher than the SLPC max. While this is fixed + * in GuC, we level set these tests with RPn as min. + */ + err = slpc_set_min_freq(slpc, slpc->min_freq); + if (err) + return err; + if (slpc->min_freq == slpc->rp0_freq) { pr_err("Min/Max are fused to the same value\n"); return -EINVAL; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index e1fa1f32f29e..9d49ccef03bb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -137,17 +137,6 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) return ret > 0 ? -EPROTO : ret; } -static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id) -{ - u32 request[] = { - GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, - SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1), - id, - }; - - return intel_guc_send(guc, request, ARRAY_SIZE(request)); -} - static bool slpc_is_running(struct intel_guc_slpc *slpc) { return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING; @@ -201,16 +190,6 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value) return ret; } -static int slpc_unset_param(struct intel_guc_slpc *slpc, - u8 id) -{ - struct intel_guc *guc = slpc_to_guc(slpc); - - GEM_BUG_ON(id >= SLPC_MAX_PARAM); - - return guc_action_slpc_unset_param(guc, id); -} - static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) { struct drm_i915_private *i915 = slpc_to_i915(slpc); @@ -491,6 +470,16 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + /* Ignore efficient freq if lower min freq is requested */ + ret = slpc_set_param(slpc, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, + val < slpc->rp1_freq); + if (unlikely(ret)) { + i915_probe_error(i915, "Failed to toggle efficient freq (%pe)\n", + ERR_PTR(ret)); + return ret; + } + ret = slpc_set_param(slpc, SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, val); @@ -587,7 +576,9 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc) return ret; if (!slpc->min_freq_softlimit) { - slpc->min_freq_softlimit = slpc->min_freq; + ret = intel_guc_slpc_get_min_freq(slpc, &slpc->min_freq_softlimit); + if (unlikely(ret)) + return ret; slpc_to_gt(slpc)->defaults.min_freq = slpc->min_freq_softlimit; } else if (slpc->min_freq_softlimit != slpc->min_freq) { return intel_guc_slpc_set_min_freq(slpc, @@ -597,29 +588,6 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc) return 0; } -static int slpc_ignore_eff_freq(struct intel_guc_slpc *slpc, bool ignore) -{ - int ret = 0; - - if (ignore) { - ret = slpc_set_param(slpc, - SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, - ignore); - if (!ret) - return slpc_set_param(slpc, - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, - slpc->min_freq); - } else { - ret = slpc_unset_param(slpc, - SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY); - if (!ret) - return slpc_unset_param(slpc, - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ); - } - - return ret; -} - static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc) { /* Force SLPC to used platform rp0 */ @@ -679,14 +647,6 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) slpc_get_rp_values(slpc); - /* Ignore efficient freq and set min to platform min */ - ret = slpc_ignore_eff_freq(slpc, true); - if (unlikely(ret)) { - i915_probe_error(i915, "Failed to set SLPC min to RPn (%pe)\n", - ERR_PTR(ret)); - return ret; - } - /* Set SLPC max limit to RP0 */ ret = slpc_use_fused_rp0(slpc); if (unlikely(ret)) { diff --git a/drivers/gpu/drm/i915/intel_mchbar_regs.h b/drivers/gpu/drm/i915/intel_mchbar_regs.h index 2aad2f0cc8db..ffc702b79579 100644 --- a/drivers/gpu/drm/i915/intel_mchbar_regs.h +++ b/drivers/gpu/drm/i915/intel_mchbar_regs.h @@ -196,6 +196,9 @@ #define RP1_CAP_MASK REG_GENMASK(15, 8) #define RPN_CAP_MASK REG_GENMASK(23, 16) +#define GEN10_FREQ_INFO_REC _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5ef0) +#define RPE_MASK REG_GENMASK(15, 8) + /* snb MCH registers for priority tuning */ #define MCH_SSKPD _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5d10) #define SSKPD_NEW_WM0_MASK_HSW REG_GENMASK64(63, 56) -- cgit v1.2.3 From 6509dd1111928a351204af1fc8e6aa61e0c59002 Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Wed, 17 Aug 2022 15:43:04 -0700 Subject: drm/i915: Skip Bit12 fw domain reset for gen12+ Bit12 of the Forcewake request register should not be cleared post gen12. Do not touch this bit while clearing during fw domain reset. v2: Tweak the comment to drop older platforms(MattR) Bspec: 52542 Signed-off-by: Sushma Venkatesh Reddy Signed-off-by: Radhakrishna Sripada Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20220817224304.255767-1-radhakrishna.sripada@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index a852c471d1b3..2a21c6515eaf 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -112,8 +112,11 @@ fw_domain_reset(const struct intel_uncore_forcewake_domain *d) * trying to reset here does exist at this point (engines could be fused * off in ICL+), so no waiting for acks */ - /* WaRsClearFWBitsAtReset:bdw,skl */ - fw_clear(d, 0xffff); + /* WaRsClearFWBitsAtReset */ + if (GRAPHICS_VER(d->uncore->i915) >= 12) + fw_clear(d, 0xefff); + else + fw_clear(d, 0xffff); } static inline void -- cgit v1.2.3 From da30390b93c377545fdf5ecec34aee018f90485b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 18 Aug 2022 16:41:44 -0700 Subject: drm/i915/mtl: MMIO range is now 4MB Previously only dgfx platforms had a 4MB MMIO range, but starting with MTL we now use the larger range for all platforms. Bspec: 63834, 63830 Signed-off-by: Matt Roper Signed-off-by: Radhakrishna Sripada Reviewed-by: Balasubramani Vivekanandan Link: https://patchwork.freedesktop.org/patch/msgid/20220818234202.451742-4-radhakrishna.sripada@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 2a21c6515eaf..c7ef5f2ff2e1 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -2235,14 +2235,15 @@ int intel_uncore_setup_mmio(struct intel_uncore *uncore, phys_addr_t phys_addr) * clobbering the GTT which we want ioremap_wc instead. Fortunately, * the register BAR remains the same size for all the earlier * generations up to Ironlake. - * For dgfx chips register range is expanded to 4MB. + * For dgfx chips register range is expanded to 4MB, and this larger + * range is also used for integrated gpus beginning with Meteor Lake. */ - if (GRAPHICS_VER(i915) < 5) - mmio_size = 512 * 1024; - else if (IS_DGFX(i915)) + if (IS_DGFX(i915) || GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) mmio_size = 4 * 1024 * 1024; - else + else if (GRAPHICS_VER(i915) >= 5) mmio_size = 2 * 1024 * 1024; + else + mmio_size = 512 * 1024; uncore->regs = ioremap(phys_addr, mmio_size); if (uncore->regs == NULL) { -- cgit v1.2.3 From 068a0f5c8260dcc4ccbaefd2dbf21ea84162ac17 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 18 Aug 2022 16:41:45 -0700 Subject: drm/i915/mtl: Don't mask off CCS according to DSS fusing Unlike the Xe_HP platforms, MTL only has a single CCS engine; the quad-based engine masking logic does not apply to this platform (or presumably any future platforms that only have 0 or 1 CCS). Signed-off-by: Matt Roper Signed-off-by: Radhakrishna Sripada Reviewed-by: Balasubramani Vivekanandan Link: https://patchwork.freedesktop.org/patch/msgid/20220818234202.451742-5-radhakrishna.sripada@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 37fa813af766..17e7f20bbb48 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -672,7 +672,7 @@ static void engine_mask_apply_compute_fuses(struct intel_gt *gt) unsigned long ccs_mask; unsigned int i; - if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) + if (hweight32(CCS_MASK(gt)) <= 1) return; ccs_mask = intel_slicemask_from_xehp_dssmask(info->sseu.compute_subslice_mask, -- cgit v1.2.3 From 6127b3bcd33299cdebb79ffcc9c9ca135eaf763e Mon Sep 17 00:00:00 2001 From: Juston Li Date: Thu, 18 Aug 2022 17:42:05 +0000 Subject: drm/i915/pxp: don't start pxp without mei_pxp bind pxp will not start correctly until after mei_pxp bind completes and intel_pxp_init_hw() is called. Wait for the bind to complete before proceeding with startup. This fixes a race condition during bootup where we observed a small window for pxp commands to be sent, starting pxp before mei_pxp bind completed. Changes since v2: - wait for pxp_component to bind instead of returning -EAGAIN (Daniele) Changes since v1: - check pxp_component instead of pxp_component_added (Daniele) - pxp_component needs tee_mutex (Daniele) - return -EAGAIN so caller knows to retry (Daniele) Signed-off-by: Juston Li Reviewed-by: Andrzej Hajda Signed-off-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20220818174205.2412730-1-justonli@chromium.org --- drivers/gpu/drm/i915/pxp/intel_pxp.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index 15311eaed848..17109c513259 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -176,6 +176,18 @@ static void pxp_queue_termination(struct intel_pxp *pxp) spin_unlock_irq(>->irq_lock); } +static bool pxp_component_bound(struct intel_pxp *pxp) +{ + bool bound = false; + + mutex_lock(&pxp->tee_mutex); + if (pxp->pxp_component) + bound = true; + mutex_unlock(&pxp->tee_mutex); + + return bound; +} + /* * the arb session is restarted from the irq work when we receive the * termination completion interrupt @@ -187,6 +199,9 @@ int intel_pxp_start(struct intel_pxp *pxp) if (!intel_pxp_is_enabled(pxp)) return -ENODEV; + if (wait_for(pxp_component_bound(pxp), 250)) + return -ENXIO; + mutex_lock(&pxp->arb_mutex); if (pxp->arb_is_valid) -- cgit v1.2.3 From 25bcc828d237cda65d34c736d70e4467fffb80b9 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 23 Aug 2022 13:24:49 -0700 Subject: drm/i915/dg2: Incorporate Wa_16014892111 into DRAW_WATERMARK tuning Although register tuning settings are generally implemented via the workaround infrastructure, it turns out that the DRAW_WATERMARK register is not properly saved/restored by hardware around power events (i.e., RC6 entry) so updates to the value cannot be applied in the usual manner. New workaround Wa_16014892111 informs us that any tuning updates to this register must instead be applied via an INDIRECT_CTX batch buffer. This will ensure that the necessary value is re-applied when a context begins running, even if an RC6 entry had wiped the register back to hardware defaults since the last context ran. Fixes: 6dc85721df74 ("drm/i915/dg2: Add additional tuning settings") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/6642 Signed-off-by: Matt Roper Reviewed-by: Balasubramani Vivekanandan Link: https://patchwork.freedesktop.org/patch/msgid/20220823202449.83727-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_lrc.c | 21 +++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 2 -- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index eec73c66406c..070cec4ff8a4 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1242,6 +1242,23 @@ dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs) return cs; } +/* + * The bspec's tuning guide asks us to program a vertical watermark value of + * 0x3FF. However this register is not saved/restored properly by the + * hardware, so we're required to apply the desired value via INDIRECT_CTX + * batch buffer to ensure the value takes effect properly. All other bits + * in this register should remain at 0 (the hardware default). + */ +static u32 * +dg2_emit_draw_watermark_setting(u32 *cs) +{ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(DRAW_WATERMARK); + *cs++ = REG_FIELD_PREP(VERT_WM_VAL, 0x3FF); + + return cs; +} + static u32 * gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) { @@ -1263,6 +1280,10 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) if (!HAS_FLAT_CCS(ce->engine->i915)) cs = gen12_emit_aux_table_inv(cs, GEN12_GFX_CCS_AUX_NV); + /* Wa_16014892111 */ + if (IS_DG2(ce->engine->i915)) + cs = dg2_emit_draw_watermark_setting(cs); + return cs; } diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 31e129329fb0..3cdb8294e13f 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2685,8 +2685,6 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, if (IS_DG2(i915)) { wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); wa_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); - wa_write_clr_set(wal, DRAW_WATERMARK, VERT_WM_VAL, - REG_FIELD_PREP(VERT_WM_VAL, 0x3FF)); /* * This is also listed as Wa_22012654132 for certain DG2 -- cgit v1.2.3 From f54e515c91806288126f64b37da0c78baa2d8c1f Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 26 Aug 2022 12:23:43 +0300 Subject: drm/i915/guc: Remove log size module parameters Remove the module parameters for configuring GuC log size. We should instead rely on tuning the defaults to be usable for reporting bugs. v2: - Use correct 1M unit Fixes: 8ad0152afb1b ("drm/i915/guc: Make GuC log sizes runtime configurable") Signed-off-by: Joonas Lahtinen Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: John Harrison Cc: Alan Previn Reviewed-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20220826092343.184568-1-joonas.lahtinen@linux.intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 7 +++---- drivers/gpu/drm/i915/i915_params.c | 12 ------------ drivers/gpu/drm/i915/i915_params.h | 3 --- 3 files changed, 3 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 3a2243b4ac9f..55d4b8f8e33e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -79,9 +79,9 @@ static void _guc_log_init_sizes(struct intel_guc_log *log) } }; s32 params[GUC_LOG_SECTIONS_LIMIT] = { - i915->params.guc_log_size_crash, - i915->params.guc_log_size_debug, - i915->params.guc_log_size_capture, + GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE / SZ_1M, + GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE / SZ_1M, + GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE / SZ_1M, }; int i; @@ -90,7 +90,6 @@ static void _guc_log_init_sizes(struct intel_guc_log *log) /* If debug size > 1MB then bump default crash size to keep the same units */ if (log->sizes[GUC_LOG_SECTIONS_DEBUG].bytes >= SZ_1M && - (i915->params.guc_log_size_crash == -1) && GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE < SZ_1M) log->sizes[GUC_LOG_SECTIONS_CRASH].bytes = SZ_1M; diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 06ca5b822111..6fc475a5db61 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -171,18 +171,6 @@ i915_param_named(guc_log_level, int, 0400, "GuC firmware logging level. Requires GuC to be loaded. " "(-1=auto [default], 0=disable, 1..4=enable with verbosity min..max)"); -i915_param_named(guc_log_size_crash, int, 0400, - "GuC firmware logging buffer size for crash dumps (in MB)" - "(-1=auto [default], NB: max = 4, other restrictions apply)"); - -i915_param_named(guc_log_size_debug, int, 0400, - "GuC firmware logging buffer size for debug logs (in MB)" - "(-1=auto [default], NB: max = 16, other restrictions apply)"); - -i915_param_named(guc_log_size_capture, int, 0400, - "GuC error capture register dump buffer size (in MB)" - "(-1=auto [default], NB: max = 4, other restrictions apply)"); - i915_param_named_unsafe(guc_firmware_path, charp, 0400, "GuC firmware path to use instead of the default one"); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index f684d1ab8707..2733cb6cfe09 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -61,9 +61,6 @@ struct drm_printer; param(int, invert_brightness, 0, 0600) \ param(int, enable_guc, -1, 0400) \ param(int, guc_log_level, -1, 0400) \ - param(int, guc_log_size_crash, -1, 0400) \ - param(int, guc_log_size_debug, -1, 0400) \ - param(int, guc_log_size_capture, -1, 0400) \ param(char *, guc_firmware_path, NULL, 0400) \ param(char *, huc_firmware_path, NULL, 0400) \ param(char *, dmc_firmware_path, NULL, 0400) \ -- cgit v1.2.3 From 13cc5123e9530c5895799b4185fb7a1a2e1b7f88 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Thu, 25 Aug 2022 17:42:39 +0200 Subject: drm/i915/selftests: allow misaligned_pin test work with unmappable memory In case of Small BAR configurations stolen local memory can be unmappable. Since the test does not touch the memory, passing I915_BO_ALLOC_GPU_ONLY flag to i915_gem_object_create_region, will prevent -ENOSPC error from _i915_gem_object_stolen_init. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/6565 Signed-off-by: Andrzej Hajda Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220825154239.52343-1-andrzej.hajda@intel.com --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index fb5e61963479..e050a2de5fd1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1080,7 +1080,7 @@ static int misaligned_case(struct i915_address_space *vm, struct intel_memory_re bool is_stolen = mr->type == INTEL_MEMORY_STOLEN_SYSTEM || mr->type == INTEL_MEMORY_STOLEN_LOCAL; - obj = i915_gem_object_create_region(mr, size, 0, 0); + obj = i915_gem_object_create_region(mr, size, 0, I915_BO_ALLOC_GPU_ONLY); if (IS_ERR(obj)) { /* if iGVT-g or DMAR is active, stolen mem will be uninitialized */ if (PTR_ERR(obj) == -ENODEV && is_stolen) -- cgit v1.2.3 From d9927abb4594ba940b3ed1dd86fa0447faf13a7b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 26 Aug 2022 14:02:33 -0700 Subject: Revert "drm/i915/dg2: Add preemption changes for Wa_14015141709" This reverts commit ca6920811aa5428270dd78af0a7a36b10119065a. The intent of Wa_14015141709 was to inform us that userspace can no longer control object-level preemption as it has on past platforms (i.e., by twiddling register bit CS_CHICKEN1[0]). The description of the workaround in the spec wasn't terribly well-written, and when we requested clarification from the hardware teams we were told that on the kernel side we should also probably stop setting FF_SLICE_CS_CHICKEN1[14], which is the register bit that directs the hardware to honor the settings in per-context register CS_CHICKEN1. It turns out that this guidance about FF_SLICE_CS_CHICKEN1[14] was a mistake; even though CS_CHICKEN1[0] is non-operational and useless to userspace, there are other bits in the register that do still work and might need to be adjusted by userspace in the future (e.g., to implement other workarounds that show up). If we don't set FF_SLICE_CS_CHICKEN1[14] in i915, then those future workarounds would not take effect. This miscommunication came to light because another workaround (Wa_16013994831) has now shown up that requires userspace to adjust the value of CS_CHICKEN[10] in certain circumstances. To ensure userspace's updates to this chicken bit are handled properly by the hardware, we need to make sure that FF_SLICE_CS_CHICKEN1[14] is once again set by the kernel. Signed-off-by: Matt Roper Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20220826210233.406482-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 3cdb8294e13f..69a0c6a74474 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2389,7 +2389,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) FF_DOP_CLOCK_GATE_DISABLE); } - if (HAS_PERCTX_PREEMPT_CTRL(i915)) { + if (IS_GRAPHICS_VER(i915, 9, 12)) { /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */ wa_masked_en(wal, GEN7_FF_SLICE_CS_CHICKEN1, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8feb4b2ace27..66871be58a47 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1414,9 +1414,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_GUC_DEPRIVILEGE(dev_priv) \ (INTEL_INFO(dev_priv)->has_guc_deprivilege) -#define HAS_PERCTX_PREEMPT_CTRL(i915) \ - ((GRAPHICS_VER(i915) >= 9) && GRAPHICS_VER_FULL(i915) < IP_VER(12, 55)) - #define HAS_D12_PLANE_MINIMIZATION(dev_priv) (IS_ROCKETLAKE(dev_priv) || \ IS_ALDERLAKE_S(dev_priv)) -- cgit v1.2.3 From 73c7a8a871dc9aa6b7876c1a30bdbe0f899eb4f6 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 26 Aug 2022 14:27:18 -0700 Subject: drm/i915/ats-m: Add thread execution tuning setting On client DG2 platforms, optimal performance is achieved with the hardware's default "age based" thread execution setting. However on ATS-M, switching this to "round robin after dependencies" provides better performance. We'll add a new "tuning" feature flag to the ATS-M device info to enable/disable this setting. Bspec: 68331 Cc: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20220826212718.409948-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 2 ++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 9 +++++++++ drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/intel_device_info.h | 1 + 4 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 94f9ddcfb3a5..d414785003cc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1110,6 +1110,8 @@ #define GEN12_DISABLE_TDL_PUSH REG_BIT(9) #define GEN11_DIS_PICK_2ND_EU REG_BIT(7) #define GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX REG_BIT(4) +#define THREAD_EX_ARB_MODE REG_GENMASK(3, 2) +#define THREAD_EX_ARB_MODE_RR_AFTER_DEP REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2) #define HSW_ROW_CHICKEN3 _MMIO(0xe49c) #define HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE (1 << 6) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 69a0c6a74474..6d2003d598e6 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2700,6 +2700,15 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, 0 /* write-only, so skip validation */, true); } + + /* + * This tuning setting proves beneficial only on ATS-M designs; the + * default "age based" setting is optimal on regular DG2 and other + * platforms. + */ + if (INTEL_INFO(i915)->tuning_thread_rr_after_dep) + wa_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE, + THREAD_EX_ARB_MODE_RR_AFTER_DEP); } /* diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 3e3e95c7a63f..7b0384373e99 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1083,6 +1083,7 @@ static const struct intel_device_info ats_m_info = { DG2_FEATURES, .display = { 0 }, .require_force_probe = 1, + .tuning_thread_rr_after_dep = 1, }; #define XE_HPC_FEATURES \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 677fb68f1726..9cd912c2703c 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -172,6 +172,7 @@ enum intel_ppgtt_type { func(has_runtime_pm); \ func(has_snoop); \ func(has_coherent_ggtt); \ + func(tuning_thread_rr_after_dep); \ func(unfenced_needs_alignment); \ func(hws_needs_physical); -- cgit v1.2.3 From ff4e0cafe845110c9b7fe26eb8a6b49d60a1288c Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 30 Aug 2022 15:35:37 -0400 Subject: drm/i915/slpc: Fix inconsistent locked return Fix for intel_guc_slpc_set_min_freq() warn: inconsistent returns '&slpc->lock'. v2: Avoid with_intel_runtime_pm with the internal goto/return. (Ashutosh) Also standardize the 'ret' if this came from the efficient setup. And avoid the 'unlikely'. Fixes: 95ccf312a1e4 ("drm/i915/guc/slpc: Allow SLPC to use efficient frequency") Reported-by: kernel test robot Reported-by: Dan Carpenter Cc: Ashutosh Dixit Signed-off-by: Rodrigo Vivi Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20220830193537.52201-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 40 ++++++++++++++--------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 9d49ccef03bb..fdd895f73f9f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -467,33 +467,33 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) /* Need a lock now since waitboost can be modifying min as well */ mutex_lock(&slpc->lock); - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - - /* Ignore efficient freq if lower min freq is requested */ - ret = slpc_set_param(slpc, - SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, - val < slpc->rp1_freq); - if (unlikely(ret)) { - i915_probe_error(i915, "Failed to toggle efficient freq (%pe)\n", - ERR_PTR(ret)); - return ret; - } - - ret = slpc_set_param(slpc, - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, - val); - - /* Return standardized err code for sysfs calls */ - if (ret) - ret = -EIO; + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + /* Ignore efficient freq if lower min freq is requested */ + ret = slpc_set_param(slpc, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, + val < slpc->rp1_freq); + if (ret) { + i915_probe_error(i915, "Failed to toggle efficient freq (%pe)\n", + ERR_PTR(ret)); + goto out; } + ret = slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + val); + if (!ret) slpc->min_freq_softlimit = val; +out: + intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&slpc->lock); + /* Return standardized err code for sysfs calls */ + if (ret) + ret = -EIO; + return ret; } -- cgit v1.2.3 From 018a7bdbb090b9155a6509a0d1a684db4afaa5b1 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 31 Aug 2022 17:45:38 -0400 Subject: drm/i915/slpc: Let's fix the PCODE min freq table setup for SLPC We need to inform PCODE of a desired ring frequencies so PCODE update the memory frequencies to us. rps->min_freq and rps->max_freq are the frequencies used in that request. However they were unset when SLPC was enabled and PCODE never updated the memory freq. v2 (as Suggested by Ashutosh): if SLPC is in use, let's pick the right frequencies from the get_ia_constants instead of the fake init of rps' min and max. v3: don't forget the max <= min return v4: Move all the freq conversion to intel_rps.c. And the max <= min check to where it belongs. v5: (Ashutosh) Fix old comment s/50 HZ/50 MHz and add a doc explaining the "raw format" Fixes: 7ba79a671568 ("drm/i915/guc/slpc: Gate Host RPS when SLPC is enabled") Cc: # v5.15+ Cc: Ashutosh Dixit Tested-by: Sushma Venkatesh Reddy Signed-off-by: Rodrigo Vivi Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20220831214538.143950-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/gt/intel_llc.c | 19 +++++++------- drivers/gpu/drm/i915/gt/intel_rps.c | 50 +++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_rps.h | 2 ++ 3 files changed, 61 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c index 14fe65812e42..1d19c073ba2e 100644 --- a/drivers/gpu/drm/i915/gt/intel_llc.c +++ b/drivers/gpu/drm/i915/gt/intel_llc.c @@ -12,6 +12,7 @@ #include "intel_llc.h" #include "intel_mchbar_regs.h" #include "intel_pcode.h" +#include "intel_rps.h" struct ia_constants { unsigned int min_gpu_freq; @@ -55,9 +56,6 @@ static bool get_ia_constants(struct intel_llc *llc, if (!HAS_LLC(i915) || IS_DGFX(i915)) return false; - if (rps->max_freq <= rps->min_freq) - return false; - consts->max_ia_freq = cpu_max_MHz(); consts->min_ring_freq = @@ -65,13 +63,8 @@ static bool get_ia_constants(struct intel_llc *llc, /* convert DDR frequency from units of 266.6MHz to bandwidth */ consts->min_ring_freq = mult_frac(consts->min_ring_freq, 8, 3); - consts->min_gpu_freq = rps->min_freq; - consts->max_gpu_freq = rps->max_freq; - if (GRAPHICS_VER(i915) >= 9) { - /* Convert GT frequency to 50 HZ units */ - consts->min_gpu_freq /= GEN9_FREQ_SCALER; - consts->max_gpu_freq /= GEN9_FREQ_SCALER; - } + consts->min_gpu_freq = intel_rps_get_min_raw_freq(rps); + consts->max_gpu_freq = intel_rps_get_max_raw_freq(rps); return true; } @@ -130,6 +123,12 @@ static void gen6_update_ring_freq(struct intel_llc *llc) if (!get_ia_constants(llc, &consts)) return; + /* + * Although this is unlikely on any platform during initialization, + * let's ensure we don't get accidentally into infinite loop + */ + if (consts.max_gpu_freq <= consts.min_gpu_freq) + return; /* * For each potential GPU frequency, load a ring frequency we'd like * to use for memory access. We do this by specifying the IA frequency diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 8c289a032103..579ae9ac089c 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2133,6 +2133,31 @@ u32 intel_rps_get_max_frequency(struct intel_rps *rps) return intel_gpu_freq(rps, rps->max_freq_softlimit); } +/** + * intel_rps_get_max_raw_freq - returns the max frequency in some raw format. + * @rps: the intel_rps structure + * + * Returns the max frequency in a raw format. In newer platforms raw is in + * units of 50 MHz. + */ +u32 intel_rps_get_max_raw_freq(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + u32 freq; + + if (rps_uses_slpc(rps)) { + return DIV_ROUND_CLOSEST(slpc->rp0_freq, + GT_FREQUENCY_MULTIPLIER); + } else { + freq = rps->max_freq; + if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) { + /* Convert GT frequency to 50 MHz units */ + freq /= GEN9_FREQ_SCALER; + } + return freq; + } +} + u32 intel_rps_get_rp0_frequency(struct intel_rps *rps) { struct intel_guc_slpc *slpc = rps_to_slpc(rps); @@ -2221,6 +2246,31 @@ u32 intel_rps_get_min_frequency(struct intel_rps *rps) return intel_gpu_freq(rps, rps->min_freq_softlimit); } +/** + * intel_rps_get_min_raw_freq - returns the min frequency in some raw format. + * @rps: the intel_rps structure + * + * Returns the min frequency in a raw format. In newer platforms raw is in + * units of 50 MHz. + */ +u32 intel_rps_get_min_raw_freq(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + u32 freq; + + if (rps_uses_slpc(rps)) { + return DIV_ROUND_CLOSEST(slpc->min_freq, + GT_FREQUENCY_MULTIPLIER); + } else { + freq = rps->min_freq; + if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) { + /* Convert GT frequency to 50 MHz units */ + freq /= GEN9_FREQ_SCALER; + } + return freq; + } +} + static int set_min_freq(struct intel_rps *rps, u32 val) { int ret = 0; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 1e8d56491308..4509dfdc52e0 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -37,8 +37,10 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1); u32 intel_rps_read_actual_frequency(struct intel_rps *rps); u32 intel_rps_get_requested_frequency(struct intel_rps *rps); u32 intel_rps_get_min_frequency(struct intel_rps *rps); +u32 intel_rps_get_min_raw_freq(struct intel_rps *rps); int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val); u32 intel_rps_get_max_frequency(struct intel_rps *rps); +u32 intel_rps_get_max_raw_freq(struct intel_rps *rps); int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val); u32 intel_rps_get_rp0_frequency(struct intel_rps *rps); u32 intel_rps_get_rp1_frequency(struct intel_rps *rps); -- cgit v1.2.3 From c2a6502f36248e9e17806d1342e4617d895960b0 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Thu, 1 Sep 2022 19:22:17 +0200 Subject: drm/i915/ttm: Abort suspend on i915_ttm_backup failure On system suspend when system memory is low then i915_gem_obj_copy_ttm() could fail trying to backup a lmem obj. GEM_WARN_ON() is not enough, suspend shouldn't continue if i915_ttm_backup() throws an error. v2: Keep the fdo issue till we have a igt test(Matt). v3: Use %pe(Andrzej) References: https://gitlab.freedesktop.org/drm/intel/-/issues/6529 Reviewed-by: Matthew Auld Reviewed-by: Andrzej Hajda Suggested-by: Chris P Wilson Signed-off-by: Nirmoy Das Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220901172217.18392-1-nirmoy.das@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c index 9aad84059d56..07e49f22f2de 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c @@ -79,7 +79,12 @@ static int i915_ttm_backup(struct i915_gem_apply_to_region *apply, goto out_no_populate; err = i915_gem_obj_copy_ttm(backup, obj, pm_apply->allow_gpu, false); - GEM_WARN_ON(err); + if (err) { + drm_err(&i915->drm, + "Unable to copy from device to system memory, err:%pe\n", + ERR_PTR(err)); + goto out_no_populate; + } ttm_bo_wait_ctx(backup_bo, &ctx); obj->ttm.backup = backup; -- cgit v1.2.3 From 873fef8833ea794526b7f4179088e565078fe0e8 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 5 Sep 2022 11:53:29 +0100 Subject: drm/i915: consider HAS_FLAT_CCS() in needs_ccs_pages Just move the HAS_FLAT_CCS() check into needs_ccs_pages. This also then fixes i915_ttm_memcpy_allowed() which was incorrectly reporting true on DG1, even though it doesn't have small-BAR or flat-CCS. References: https://gitlab.freedesktop.org/drm/intel/-/issues/6605 Fixes: efeb3caf4341 ("drm/i915/ttm: disallow CPU fallback mode for ccs pages") Signed-off-by: Matthew Auld Cc: Nirmoy Das Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20220905105329.41455-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 3 +++ drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 389e9f157ca5..85482a04d158 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -723,6 +723,9 @@ bool i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj) bool lmem_placement = false; int i; + if (!HAS_FLAT_CCS(to_i915(obj->base.dev))) + return false; + for (i = 0; i < obj->mm.n_placements; i++) { /* Compression is not allowed for the objects with smem placement */ if (obj->mm.placements[i]->type == INTEL_MEMORY_SYSTEM) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index f131dc065f47..6f3ab7ade41a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -297,7 +297,7 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, i915_tt->is_shmem = true; } - if (HAS_FLAT_CCS(i915) && i915_gem_object_needs_ccs_pages(obj)) + if (i915_gem_object_needs_ccs_pages(obj)) ccs_pages = DIV_ROUND_UP(DIV_ROUND_UP(bo->base.size, NUM_BYTES_PER_CCS_BYTE), PAGE_SIZE); -- cgit v1.2.3 From 68eb42b3f3b30df1a335b3139b21c32187c0efaa Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 2 Sep 2022 05:51:25 -0400 Subject: drm/i915: Don't try to disable host RPS when this was never enabled. Specially in GT reset case this could be triggered and try to disable things that had never been enabled. Let's add some protection here. Cc: Ashutosh Dixit Signed-off-by: Rodrigo Vivi Reviewed-by: Ashutosh Dixit Acked-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20220902095126.373036-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/gt/intel_rps.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 579ae9ac089c..6fadde4ee7bf 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1551,6 +1551,9 @@ void intel_rps_disable(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); + if (!intel_rps_is_enabled(rps)) + return; + intel_rps_clear_enabled(rps); intel_rps_clear_interrupts(rps); intel_rps_clear_timer(rps); -- cgit v1.2.3 From 665ae9c9ca79bdfc83def0981e015e181ea463b7 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 6 Sep 2022 16:01:46 -0700 Subject: drm/i915/uc: Support for version reduced and multiple firmware files There was a misunderstanding in how firmware file compatibility should be managed within i915. This has been clarified as: i915 must support all existing firmware releases forever new minor firmware releases should replace prior versions only backwards compatibility breaking releases should be a new file This patch cleans up the single fallback file support that was added as a quick fix emergency effort. That is now removed in preference to supporting arbitrary numbers of firmware files per platform. The patch also adds support for having GuC firmware files that are named by major version only (because the major version indicates backwards breaking changes that affect the KMD) and for having HuC firmware files with no version number at all (because the KMD has no interface requirements with the HuC). For GuC, the driver will report via dmesg if the found file is older than expected. For HuC, the KMD will no longer require updating for any new HuC release so will not be able to report what the latest expected version is. Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20220906230147.479945-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 10 +- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 4 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 442 ++++++++++++++-------- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 33 +- drivers/gpu/drm/i915/i915_gpu_error.c | 16 +- 5 files changed, 319 insertions(+), 186 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 0d56b615bf78..04393932623c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1868,7 +1868,7 @@ int intel_guc_submission_init(struct intel_guc *guc) if (guc->submission_initialized) return 0; - if (guc->fw.major_ver_found < 70) { + if (guc->fw.file_selected.major_ver < 70) { ret = guc_lrc_desc_pool_create_v69(guc); if (ret) return ret; @@ -2303,7 +2303,7 @@ static int register_context(struct intel_context *ce, bool loop) GEM_BUG_ON(intel_context_is_child(ce)); trace_intel_context_register(ce); - if (guc->fw.major_ver_found >= 70) + if (guc->fw.file_selected.major_ver >= 70) ret = register_context_v70(guc, ce, loop); else ret = register_context_v69(guc, ce, loop); @@ -2315,7 +2315,7 @@ static int register_context(struct intel_context *ce, bool loop) set_context_registered(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); - if (guc->fw.major_ver_found >= 70) + if (guc->fw.file_selected.major_ver >= 70) guc_context_policy_init_v70(ce, loop); } @@ -2921,7 +2921,7 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc, u16 guc_id, u32 preemption_timeout) { - if (guc->fw.major_ver_found >= 70) { + if (guc->fw.file_selected.major_ver >= 70) { struct context_policy policy; __guc_context_policy_start_klv(&policy, guc_id); @@ -3186,7 +3186,7 @@ static int guc_context_alloc(struct intel_context *ce) static void __guc_context_set_prio(struct intel_guc *guc, struct intel_context *ce) { - if (guc->fw.major_ver_found >= 70) { + if (guc->fw.file_selected.major_ver >= 70) { struct context_policy policy; __guc_context_policy_start_klv(&policy, ce->guc_id.id); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index f2e7c82985ef..d965ac4832d6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -436,8 +436,8 @@ static void print_fw_ver(struct intel_uc *uc, struct intel_uc_fw *fw) struct drm_i915_private *i915 = uc_to_gt(uc)->i915; drm_info(&i915->drm, "%s firmware %s version %u.%u\n", - intel_uc_fw_type_repr(fw->type), fw->path, - fw->major_ver_found, fw->minor_ver_found); + intel_uc_fw_type_repr(fw->type), fw->file_selected.path, + fw->file_selected.major_ver, fw->file_selected.minor_ver); } static int __uc_init_hw(struct intel_uc *uc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 58547292efa0..610f36f1b989 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -41,7 +41,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, "%s firmware -> %s\n", intel_uc_fw_type_repr(uc_fw->type), status == INTEL_UC_FIRMWARE_SELECTED ? - uc_fw->path : intel_uc_fw_status_repr(status)); + uc_fw->file_selected.path : intel_uc_fw_status_repr(status)); } #endif @@ -51,84 +51,149 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * * Note that RKL and ADL-S have the same GuC/HuC device ID's and use the same * firmware as TGL. + * + * Version numbers: + * Originally, the driver required an exact match major/minor/patch furmware + * file and only supported that one version for any given platform. However, + * the new direction from upstream is to be backwards compatible with all + * prior releases and to be as flexible as possible as to what firmware is + * loaded. + * + * For GuC, the major version number signifies a backwards breaking API change. + * So, new format GuC firmware files are labelled by their major version only. + * For HuC, there is no KMD interaction, hence no version matching requirement. + * So, new format HuC firmware files have no version number at all. + * + * All of which means that the table below must keep all old format files with + * full three point version number. But newer files have reduced requirements. + * Having said that, the driver still needs to track the minor version number + * for GuC at least. As it is useful to report to the user that they are not + * running with a recent enough version for all KMD supported features, + * security fixes, etc. to be enabled. */ -#define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \ - fw_def(DG2, 0, guc_def(dg2, 70, 4, 1)) \ - fw_def(ALDERLAKE_P, 0, guc_def(adlp, 70, 1, 1)) \ - fw_def(ALDERLAKE_S, 0, guc_def(tgl, 70, 1, 1)) \ - fw_def(DG1, 0, guc_def(dg1, 70, 1, 1)) \ - fw_def(ROCKETLAKE, 0, guc_def(tgl, 70, 1, 1)) \ - fw_def(TIGERLAKE, 0, guc_def(tgl, 70, 1, 1)) \ - fw_def(JASPERLAKE, 0, guc_def(ehl, 70, 1, 1)) \ - fw_def(ELKHARTLAKE, 0, guc_def(ehl, 70, 1, 1)) \ - fw_def(ICELAKE, 0, guc_def(icl, 70, 1, 1)) \ - fw_def(COMETLAKE, 5, guc_def(cml, 70, 1, 1)) \ - fw_def(COMETLAKE, 0, guc_def(kbl, 70, 1, 1)) \ - fw_def(COFFEELAKE, 0, guc_def(kbl, 70, 1, 1)) \ - fw_def(GEMINILAKE, 0, guc_def(glk, 70, 1, 1)) \ - fw_def(KABYLAKE, 0, guc_def(kbl, 70, 1, 1)) \ - fw_def(BROXTON, 0, guc_def(bxt, 70, 1, 1)) \ - fw_def(SKYLAKE, 0, guc_def(skl, 70, 1, 1)) - -#define INTEL_GUC_FIRMWARE_DEFS_FALLBACK(fw_def, guc_def) \ - fw_def(ALDERLAKE_P, 0, guc_def(adlp, 69, 0, 3)) \ - fw_def(ALDERLAKE_S, 0, guc_def(tgl, 69, 0, 3)) - -#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \ - fw_def(ALDERLAKE_P, 0, huc_def(tgl, 7, 9, 3)) \ - fw_def(ALDERLAKE_S, 0, huc_def(tgl, 7, 9, 3)) \ - fw_def(DG1, 0, huc_def(dg1, 7, 9, 3)) \ - fw_def(ROCKETLAKE, 0, huc_def(tgl, 7, 9, 3)) \ - fw_def(TIGERLAKE, 0, huc_def(tgl, 7, 9, 3)) \ - fw_def(JASPERLAKE, 0, huc_def(ehl, 9, 0, 0)) \ - fw_def(ELKHARTLAKE, 0, huc_def(ehl, 9, 0, 0)) \ - fw_def(ICELAKE, 0, huc_def(icl, 9, 0, 0)) \ - fw_def(COMETLAKE, 5, huc_def(cml, 4, 0, 0)) \ - fw_def(COMETLAKE, 0, huc_def(kbl, 4, 0, 0)) \ - fw_def(COFFEELAKE, 0, huc_def(kbl, 4, 0, 0)) \ - fw_def(GEMINILAKE, 0, huc_def(glk, 4, 0, 0)) \ - fw_def(KABYLAKE, 0, huc_def(kbl, 4, 0, 0)) \ - fw_def(BROXTON, 0, huc_def(bxt, 2, 0, 0)) \ - fw_def(SKYLAKE, 0, huc_def(skl, 2, 0, 0)) - -#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \ +#define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_maj, guc_mmp) \ + fw_def(DG2, 0, guc_mmp(dg2, 70, 4, 1)) \ + fw_def(ALDERLAKE_P, 0, guc_mmp(adlp, 70, 1, 1)) \ + fw_def(ALDERLAKE_P, 0, guc_mmp(adlp, 69, 0, 3)) \ + fw_def(ALDERLAKE_S, 0, guc_mmp(tgl, 70, 1, 1)) \ + fw_def(ALDERLAKE_S, 0, guc_mmp(tgl, 69, 0, 3)) \ + fw_def(DG1, 0, guc_mmp(dg1, 70, 1, 1)) \ + fw_def(ROCKETLAKE, 0, guc_mmp(tgl, 70, 1, 1)) \ + fw_def(TIGERLAKE, 0, guc_mmp(tgl, 70, 1, 1)) \ + fw_def(JASPERLAKE, 0, guc_mmp(ehl, 70, 1, 1)) \ + fw_def(ELKHARTLAKE, 0, guc_mmp(ehl, 70, 1, 1)) \ + fw_def(ICELAKE, 0, guc_mmp(icl, 70, 1, 1)) \ + fw_def(COMETLAKE, 5, guc_mmp(cml, 70, 1, 1)) \ + fw_def(COMETLAKE, 0, guc_mmp(kbl, 70, 1, 1)) \ + fw_def(COFFEELAKE, 0, guc_mmp(kbl, 70, 1, 1)) \ + fw_def(GEMINILAKE, 0, guc_mmp(glk, 70, 1, 1)) \ + fw_def(KABYLAKE, 0, guc_mmp(kbl, 70, 1, 1)) \ + fw_def(BROXTON, 0, guc_mmp(bxt, 70, 1, 1)) \ + fw_def(SKYLAKE, 0, guc_mmp(skl, 70, 1, 1)) + +#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_raw, huc_mmp) \ + fw_def(ALDERLAKE_P, 0, huc_mmp(tgl, 7, 9, 3)) \ + fw_def(ALDERLAKE_S, 0, huc_mmp(tgl, 7, 9, 3)) \ + fw_def(DG1, 0, huc_mmp(dg1, 7, 9, 3)) \ + fw_def(ROCKETLAKE, 0, huc_mmp(tgl, 7, 9, 3)) \ + fw_def(TIGERLAKE, 0, huc_mmp(tgl, 7, 9, 3)) \ + fw_def(JASPERLAKE, 0, huc_mmp(ehl, 9, 0, 0)) \ + fw_def(ELKHARTLAKE, 0, huc_mmp(ehl, 9, 0, 0)) \ + fw_def(ICELAKE, 0, huc_mmp(icl, 9, 0, 0)) \ + fw_def(COMETLAKE, 5, huc_mmp(cml, 4, 0, 0)) \ + fw_def(COMETLAKE, 0, huc_mmp(kbl, 4, 0, 0)) \ + fw_def(COFFEELAKE, 0, huc_mmp(kbl, 4, 0, 0)) \ + fw_def(GEMINILAKE, 0, huc_mmp(glk, 4, 0, 0)) \ + fw_def(KABYLAKE, 0, huc_mmp(kbl, 4, 0, 0)) \ + fw_def(BROXTON, 0, huc_mmp(bxt, 2, 0, 0)) \ + fw_def(SKYLAKE, 0, huc_mmp(skl, 2, 0, 0)) + +/* + * Set of macros for producing a list of filenames from the above table. + */ +#define __MAKE_UC_FW_PATH_BLANK(prefix_, name_) \ + "i915/" \ + __stringify(prefix_) name_ ".bin" + +#define __MAKE_UC_FW_PATH_MAJOR(prefix_, name_, major_) \ + "i915/" \ + __stringify(prefix_) name_ \ + __stringify(major_) ".bin" + +#define __MAKE_UC_FW_PATH_MMP(prefix_, name_, major_, minor_, patch_) \ "i915/" \ __stringify(prefix_) name_ \ __stringify(major_) "." \ __stringify(minor_) "." \ __stringify(patch_) ".bin" -#define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \ - __MAKE_UC_FW_PATH(prefix_, "_guc_", major_, minor_, patch_) +/* Minor for internal driver use, not part of file name */ +#define MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_) \ + __MAKE_UC_FW_PATH_MAJOR(prefix_, "_guc_", major_) -#define MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_) \ - __MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_) +#define MAKE_GUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \ + __MAKE_UC_FW_PATH_MMP(prefix_, "_guc_", major_, minor_, patch_) -/* All blobs need to be declared via MODULE_FIRMWARE() */ +#define MAKE_HUC_FW_PATH_BLANK(prefix_) \ + __MAKE_UC_FW_PATH_BLANK(prefix_, "_huc") + +#define MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \ + __MAKE_UC_FW_PATH_MMP(prefix_, "_huc_", major_, minor_, patch_) + +/* + * All blobs need to be declared via MODULE_FIRMWARE(). + * This first expansion of the table macros is solely to provide + * that declaration. + */ #define INTEL_UC_MODULE_FW(platform_, revid_, uc_) \ MODULE_FIRMWARE(uc_); -INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH) -INTEL_GUC_FIRMWARE_DEFS_FALLBACK(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH) -INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH) +INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH_MAJOR, MAKE_GUC_FW_PATH_MMP) +INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, MAKE_HUC_FW_PATH_MMP) -/* The below structs and macros are used to iterate across the list of blobs */ +/* + * The next expansion of the table macros (in __uc_fw_auto_select below) provides + * actual data structures with both the filename and the version information. + * These structure arrays are then iterated over to the list of suitable files + * for the current platform and to then attempt to load those files, in the order + * listed, until one is successfully found. + */ struct __packed uc_fw_blob { + const char *path; + bool legacy; u8 major; u8 minor; - const char *path; + u8 patch; }; -#define UC_FW_BLOB(major_, minor_, path_) \ - { .major = major_, .minor = minor_, .path = path_ } +#define UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \ + .major = major_, \ + .minor = minor_, \ + .patch = patch_, \ + .path = path_, + +#define UC_FW_BLOB_NEW(major_, minor_, patch_, path_) \ + { UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \ + .legacy = false } + +#define UC_FW_BLOB_OLD(major_, minor_, patch_, path_) \ + { UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \ + .legacy = true } -#define GUC_FW_BLOB(prefix_, major_, minor_, patch_) \ - UC_FW_BLOB(major_, minor_, \ - MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_)) +#define GUC_FW_BLOB(prefix_, major_, minor_) \ + UC_FW_BLOB_NEW(major_, minor_, 0, \ + MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_)) -#define HUC_FW_BLOB(prefix_, major_, minor_, bld_num_) \ - UC_FW_BLOB(major_, minor_, \ - MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_)) +#define GUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \ + UC_FW_BLOB_OLD(major_, minor_, patch_, \ + MAKE_GUC_FW_PATH_MMP(prefix_, major_, minor_, patch_)) + +#define HUC_FW_BLOB(prefix_) \ + UC_FW_BLOB_NEW(0, 0, 0, MAKE_HUC_FW_PATH_BLANK(prefix_)) + +#define HUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \ + UC_FW_BLOB_OLD(major_, minor_, patch_, \ + MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_)) struct __packed uc_fw_platform_requirement { enum intel_platform p; @@ -152,18 +217,16 @@ static void __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) { static const struct uc_fw_platform_requirement blobs_guc[] = { - INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB) - }; - static const struct uc_fw_platform_requirement blobs_guc_fallback[] = { - INTEL_GUC_FIRMWARE_DEFS_FALLBACK(MAKE_FW_LIST, GUC_FW_BLOB) + INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, GUC_FW_BLOB_MMP) }; static const struct uc_fw_platform_requirement blobs_huc[] = { - INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB) + INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP) }; static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = { [INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) }, [INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) }, }; + static bool verified; const struct uc_fw_platform_requirement *fw_blobs; enum intel_platform p = INTEL_INFO(i915)->platform; u32 fw_count; @@ -184,49 +247,94 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) fw_count = blobs_all[uc_fw->type].count; for (i = 0; i < fw_count && p <= fw_blobs[i].p; i++) { - if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) { - const struct uc_fw_blob *blob = &fw_blobs[i].blob; - uc_fw->path = blob->path; - uc_fw->wanted_path = blob->path; - uc_fw->major_ver_wanted = blob->major; - uc_fw->minor_ver_wanted = blob->minor; - break; - } - } + const struct uc_fw_blob *blob = &fw_blobs[i].blob; - if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) { - const struct uc_fw_platform_requirement *blobs = blobs_guc_fallback; - u32 count = ARRAY_SIZE(blobs_guc_fallback); + if (p != fw_blobs[i].p) + continue; - for (i = 0; i < count && p <= blobs[i].p; i++) { - if (p == blobs[i].p && rev >= blobs[i].rev) { - const struct uc_fw_blob *blob = &blobs[i].blob; + if (rev < fw_blobs[i].rev) + continue; - uc_fw->fallback.path = blob->path; - uc_fw->fallback.major_ver = blob->major; - uc_fw->fallback.minor_ver = blob->minor; - break; - } + if (uc_fw->file_selected.path) { + if (uc_fw->file_selected.path == blob->path) + uc_fw->file_selected.path = NULL; + + continue; } + + uc_fw->file_selected.path = blob->path; + uc_fw->file_wanted.path = blob->path; + uc_fw->file_wanted.major_ver = blob->major; + uc_fw->file_wanted.minor_ver = blob->minor; + break; } /* make sure the list is ordered as expected */ - if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST)) { + if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST) && !verified) { + verified = true; + for (i = 1; i < fw_count; i++) { + /* Next platform is good: */ if (fw_blobs[i].p < fw_blobs[i - 1].p) continue; + /* Next platform revision is good: */ if (fw_blobs[i].p == fw_blobs[i - 1].p && fw_blobs[i].rev < fw_blobs[i - 1].rev) continue; - drm_err(&i915->drm, "Invalid FW blob order: %s r%u comes before %s r%u\n", - intel_platform_name(fw_blobs[i - 1].p), - fw_blobs[i - 1].rev, - intel_platform_name(fw_blobs[i].p), - fw_blobs[i].rev); + /* Platform/revision must be in order: */ + if (fw_blobs[i].p != fw_blobs[i - 1].p || + fw_blobs[i].rev != fw_blobs[i - 1].rev) + goto bad; + + /* Next major version is good: */ + if (fw_blobs[i].blob.major < fw_blobs[i - 1].blob.major) + continue; + + /* New must be before legacy: */ + if (!fw_blobs[i].blob.legacy && fw_blobs[i - 1].blob.legacy) + goto bad; + + /* New to legacy also means 0.0 to X.Y (HuC), or X.0 to X.Y (GuC) */ + if (fw_blobs[i].blob.legacy && !fw_blobs[i - 1].blob.legacy) { + if (!fw_blobs[i - 1].blob.major) + continue; + + if (fw_blobs[i].blob.major == fw_blobs[i - 1].blob.major) + continue; + } + + /* Major versions must be in order: */ + if (fw_blobs[i].blob.major != fw_blobs[i - 1].blob.major) + goto bad; + + /* Next minor version is good: */ + if (fw_blobs[i].blob.minor < fw_blobs[i - 1].blob.minor) + continue; - uc_fw->path = NULL; + /* Minor versions must be in order: */ + if (fw_blobs[i].blob.minor != fw_blobs[i - 1].blob.minor) + goto bad; + + /* Patch versions must be in order: */ + if (fw_blobs[i].blob.patch <= fw_blobs[i - 1].blob.patch) + continue; + +bad: + drm_err(&i915->drm, "\x1B[35;1mInvalid FW blob order: %s r%u %s%d.%d.%d comes before %s r%u %s%d.%d.%d\n", + intel_platform_name(fw_blobs[i - 1].p), fw_blobs[i - 1].rev, + fw_blobs[i - 1].blob.legacy ? "L" : "v", + fw_blobs[i - 1].blob.major, + fw_blobs[i - 1].blob.minor, + fw_blobs[i - 1].blob.patch, + intel_platform_name(fw_blobs[i].p), fw_blobs[i].rev, + fw_blobs[i].blob.legacy ? "L" : "v", + fw_blobs[i].blob.major, + fw_blobs[i].blob.minor, + fw_blobs[i].blob.patch); + + uc_fw->file_selected.path = NULL; } } } @@ -259,7 +367,7 @@ static void __uc_fw_user_override(struct drm_i915_private *i915, struct intel_uc } if (unlikely(path)) { - uc_fw->path = path; + uc_fw->file_selected.path = path; uc_fw->user_overridden = true; } } @@ -283,7 +391,7 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, */ BUILD_BUG_ON(INTEL_UC_FIRMWARE_UNINITIALIZED); GEM_BUG_ON(uc_fw->status); - GEM_BUG_ON(uc_fw->path); + GEM_BUG_ON(uc_fw->file_selected.path); uc_fw->type = type; @@ -292,7 +400,7 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, __uc_fw_user_override(i915, uc_fw); } - intel_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ? + intel_uc_fw_change_status(uc_fw, uc_fw->file_selected.path ? *uc_fw->file_selected.path ? INTEL_UC_FIRMWARE_SELECTED : INTEL_UC_FIRMWARE_DISABLED : INTEL_UC_FIRMWARE_NOT_SUPPORTED); @@ -305,32 +413,32 @@ static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, int e) if (i915_inject_probe_error(i915, e)) { /* non-existing blob */ - uc_fw->path = ""; + uc_fw->file_selected.path = ""; uc_fw->user_overridden = user; } else if (i915_inject_probe_error(i915, e)) { /* require next major version */ - uc_fw->major_ver_wanted += 1; - uc_fw->minor_ver_wanted = 0; + uc_fw->file_wanted.major_ver += 1; + uc_fw->file_wanted.minor_ver = 0; uc_fw->user_overridden = user; } else if (i915_inject_probe_error(i915, e)) { /* require next minor version */ - uc_fw->minor_ver_wanted += 1; + uc_fw->file_wanted.minor_ver += 1; uc_fw->user_overridden = user; - } else if (uc_fw->major_ver_wanted && + } else if (uc_fw->file_wanted.major_ver && i915_inject_probe_error(i915, e)) { /* require prev major version */ - uc_fw->major_ver_wanted -= 1; - uc_fw->minor_ver_wanted = 0; + uc_fw->file_wanted.major_ver -= 1; + uc_fw->file_wanted.minor_ver = 0; uc_fw->user_overridden = user; - } else if (uc_fw->minor_ver_wanted && + } else if (uc_fw->file_wanted.minor_ver && i915_inject_probe_error(i915, e)) { /* require prev minor version - hey, this should work! */ - uc_fw->minor_ver_wanted -= 1; + uc_fw->file_wanted.minor_ver -= 1; uc_fw->user_overridden = user; } else if (user && i915_inject_probe_error(i915, e)) { /* officially unsupported platform */ - uc_fw->major_ver_wanted = 0; - uc_fw->minor_ver_wanted = 0; + uc_fw->file_wanted.major_ver = 0; + uc_fw->file_wanted.minor_ver = 0; uc_fw->user_overridden = true; } } @@ -341,8 +449,8 @@ static int check_gsc_manifest(const struct firmware *fw, u32 *dw = (u32 *)fw->data; u32 version = dw[HUC_GSC_VERSION_DW]; - uc_fw->major_ver_found = FIELD_GET(HUC_GSC_MAJOR_VER_MASK, version); - uc_fw->minor_ver_found = FIELD_GET(HUC_GSC_MINOR_VER_MASK, version); + uc_fw->file_selected.major_ver = FIELD_GET(HUC_GSC_MAJOR_VER_MASK, version); + uc_fw->file_selected.minor_ver = FIELD_GET(HUC_GSC_MINOR_VER_MASK, version); return 0; } @@ -357,7 +465,7 @@ static int check_ccs_header(struct drm_i915_private *i915, /* Check the size of the blob before examining buffer contents */ if (unlikely(fw->size < sizeof(struct uc_css_header))) { drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu < %zu\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, fw->size, sizeof(struct uc_css_header)); return -ENODATA; } @@ -370,7 +478,7 @@ static int check_ccs_header(struct drm_i915_private *i915, if (unlikely(size != sizeof(struct uc_css_header))) { drm_warn(&i915->drm, "%s firmware %s: unexpected header size: %zu != %zu\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, fw->size, sizeof(struct uc_css_header)); return -EPROTO; } @@ -385,7 +493,7 @@ static int check_ccs_header(struct drm_i915_private *i915, size = sizeof(struct uc_css_header) + uc_fw->ucode_size + uc_fw->rsa_size; if (unlikely(fw->size < size)) { drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu < %zu\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, fw->size, size); return -ENOEXEC; } @@ -394,16 +502,16 @@ static int check_ccs_header(struct drm_i915_private *i915, size = __intel_uc_fw_get_upload_size(uc_fw); if (unlikely(size >= i915->wopcm.size)) { drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu > %zu\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, size, (size_t)i915->wopcm.size); return -E2BIG; } /* Get version numbers from the CSS header */ - uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, - css->sw_version); - uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR, - css->sw_version); + uc_fw->file_selected.major_ver = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, + css->sw_version); + uc_fw->file_selected.minor_ver = FIELD_GET(CSS_SW_VERSION_UC_MINOR, + css->sw_version); if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) uc_fw->private_data_size = css->private_data_size; @@ -422,9 +530,11 @@ static int check_ccs_header(struct drm_i915_private *i915, int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) { struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915; + struct intel_uc_fw_file file_ideal; struct device *dev = i915->drm.dev; struct drm_i915_gem_object *obj; const struct firmware *fw = NULL; + bool old_ver = false; int err; GEM_BUG_ON(!i915->wopcm.size); @@ -437,27 +547,33 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) __force_fw_fetch_failures(uc_fw, -EINVAL); __force_fw_fetch_failures(uc_fw, -ESTALE); - err = firmware_request_nowarn(&fw, uc_fw->path, dev); - if (err && !intel_uc_fw_is_overridden(uc_fw) && uc_fw->fallback.path) { - err = firmware_request_nowarn(&fw, uc_fw->fallback.path, dev); - if (!err) { - drm_notice(&i915->drm, - "%s firmware %s is recommended, but only %s was found\n", - intel_uc_fw_type_repr(uc_fw->type), - uc_fw->wanted_path, - uc_fw->fallback.path); - drm_info(&i915->drm, - "Consider updating your linux-firmware pkg or downloading from %s\n", - INTEL_UC_FIRMWARE_URL); - - uc_fw->path = uc_fw->fallback.path; - uc_fw->major_ver_wanted = uc_fw->fallback.major_ver; - uc_fw->minor_ver_wanted = uc_fw->fallback.minor_ver; + err = firmware_request_nowarn(&fw, uc_fw->file_selected.path, dev); + memcpy(&file_ideal, &uc_fw->file_wanted, sizeof(file_ideal)); + if (!err || intel_uc_fw_is_overridden(uc_fw)) + goto done; + + while (err == -ENOENT) { + __uc_fw_auto_select(i915, uc_fw); + if (!uc_fw->file_selected.path) { + /* + * No more options! But set the path back to something + * valid just in case it gets dereferenced. + */ + uc_fw->file_selected.path = file_ideal.path; + + /* Also, preserve the version that was really wanted */ + memcpy(&uc_fw->file_wanted, &file_ideal, sizeof(uc_fw->file_wanted)); + break; } + + err = firmware_request_nowarn(&fw, uc_fw->file_selected.path, dev); } + if (err) goto fail; + old_ver = true; +done: if (uc_fw->loaded_via_gsc) err = check_gsc_manifest(fw, uc_fw); else @@ -465,18 +581,39 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) if (err) goto fail; - if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || - uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { - drm_notice(&i915->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, - uc_fw->major_ver_found, uc_fw->minor_ver_found, - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); - if (!intel_uc_fw_is_overridden(uc_fw)) { - err = -ENOEXEC; - goto fail; + if (uc_fw->file_wanted.major_ver) { + /* Check the file's major version was as it claimed */ + if (uc_fw->file_selected.major_ver != uc_fw->file_wanted.major_ver) { + drm_notice(&i915->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, + uc_fw->file_selected.major_ver, uc_fw->file_selected.minor_ver, + uc_fw->file_wanted.major_ver, uc_fw->file_wanted.minor_ver); + if (!intel_uc_fw_is_overridden(uc_fw)) { + err = -ENOEXEC; + goto fail; + } + } else { + if (uc_fw->file_selected.minor_ver < uc_fw->file_wanted.minor_ver) + old_ver = true; } } + if (old_ver) { + /* Preserve the version that was really wanted */ + memcpy(&uc_fw->file_wanted, &file_ideal, sizeof(uc_fw->file_wanted)); + + drm_notice(&i915->drm, + "%s firmware %s (%d.%d) is recommended, but only %s (%d.%d) was found\n", + intel_uc_fw_type_repr(uc_fw->type), + uc_fw->file_wanted.path, + uc_fw->file_wanted.major_ver, uc_fw->file_wanted.minor_ver, + uc_fw->file_selected.path, + uc_fw->file_selected.major_ver, uc_fw->file_selected.minor_ver); + drm_info(&i915->drm, + "Consider updating your linux-firmware pkg or downloading from %s\n", + INTEL_UC_FIRMWARE_URL); + } + if (HAS_LMEM(i915)) { obj = i915_gem_object_create_lmem_from_data(i915, fw->data, fw->size); if (!IS_ERR(obj)) @@ -503,7 +640,7 @@ fail: INTEL_UC_FIRMWARE_ERROR); i915_probe_error(i915, "%s firmware %s: fetch failed with error %d\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, err); drm_info(&i915->drm, "%s firmware(s) can be downloaded from %s\n", intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL); @@ -645,7 +782,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags) fail: i915_probe_error(gt->i915, "Failed to load %s firmware %s (%d)\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, err); intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_LOAD_FAIL); return err; @@ -864,18 +1001,19 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len) void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p) { drm_printf(p, "%s firmware: %s\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->wanted_path); - if (uc_fw->fallback.path) { - drm_printf(p, "%s firmware fallback: %s\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->fallback.path); - drm_printf(p, "fallback selected: %s\n", - str_yes_no(uc_fw->path == uc_fw->fallback.path)); - } + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path); + if (uc_fw->file_selected.path != uc_fw->file_wanted.path) + drm_printf(p, "%s firmware wanted: %s\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_wanted.path); drm_printf(p, "\tstatus: %s\n", intel_uc_fw_status_repr(uc_fw->status)); - drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n", - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted, - uc_fw->major_ver_found, uc_fw->minor_ver_found); + if (uc_fw->file_wanted.major_ver) + drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n", + uc_fw->file_wanted.major_ver, uc_fw->file_wanted.minor_ver, + uc_fw->file_selected.major_ver, uc_fw->file_selected.minor_ver); + else + drm_printf(p, "\tversion: found %u.%u\n", + uc_fw->file_selected.major_ver, uc_fw->file_selected.minor_ver); drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size); drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 7aa2644400b9..344763c942e3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -64,6 +64,17 @@ enum intel_uc_fw_type { }; #define INTEL_UC_FW_NUM_TYPES 2 +/* + * The firmware build process will generate a version header file with major and + * minor version defined. The versions are built into CSS header of firmware. + * i915 kernel driver set the minimal firmware version required per platform. + */ +struct intel_uc_fw_file { + const char *path; + u16 major_ver; + u16 minor_ver; +}; + /* * This structure encapsulates all the data needed during the process * of fetching, caching, and loading the firmware image into the uC. @@ -74,11 +85,12 @@ struct intel_uc_fw { const enum intel_uc_fw_status status; enum intel_uc_fw_status __status; /* no accidental overwrites */ }; - const char *wanted_path; - const char *path; + struct intel_uc_fw_file file_wanted; + struct intel_uc_fw_file file_selected; bool user_overridden; size_t size; struct drm_i915_gem_object *obj; + /** * @dummy: A vma used in binding the uc fw to ggtt. We can't define this * vma on the stack as it can lead to a stack overflow, so we define it @@ -89,25 +101,8 @@ struct intel_uc_fw { struct i915_vma_resource dummy; struct i915_vma *rsa_data; - /* - * The firmware build process will generate a version header file with major and - * minor version defined. The versions are built into CSS header of firmware. - * i915 kernel driver set the minimal firmware version required per platform. - */ - u16 major_ver_wanted; - u16 minor_ver_wanted; - u16 major_ver_found; - u16 minor_ver_found; - - struct { - const char *path; - u16 major_ver; - u16 minor_ver; - } fallback; - u32 rsa_size; u32 ucode_size; - u32 private_data_size; bool loaded_via_gsc; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 543ba63f958e..16d8b7ba65dc 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1023,8 +1023,10 @@ static void cleanup_params(struct i915_gpu_coredump *error) static void cleanup_uc(struct intel_uc_coredump *uc) { - kfree(uc->guc_fw.path); - kfree(uc->huc_fw.path); + kfree(uc->guc_fw.file_selected.path); + kfree(uc->huc_fw.file_selected.path); + kfree(uc->guc_fw.file_wanted.path); + kfree(uc->huc_fw.file_wanted.path); i915_vma_coredump_free(uc->guc.vma_log); i915_vma_coredump_free(uc->guc.vma_ctb); @@ -1706,12 +1708,10 @@ gt_record_uc(struct intel_gt_coredump *gt, memcpy(&error_uc->guc_fw, &uc->guc.fw, sizeof(uc->guc.fw)); memcpy(&error_uc->huc_fw, &uc->huc.fw, sizeof(uc->huc.fw)); - /* Non-default firmware paths will be specified by the modparam. - * As modparams are generally accesible from the userspace make - * explicit copies of the firmware paths. - */ - error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL); - error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL); + error_uc->guc_fw.file_selected.path = kstrdup(uc->guc.fw.file_selected.path, ALLOW_FAIL); + error_uc->huc_fw.file_selected.path = kstrdup(uc->huc.fw.file_selected.path, ALLOW_FAIL); + error_uc->guc_fw.file_wanted.path = kstrdup(uc->guc.fw.file_wanted.path, ALLOW_FAIL); + error_uc->huc_fw.file_wanted.path = kstrdup(uc->huc.fw.file_wanted.path, ALLOW_FAIL); /* * Save the GuC log and include a timestamp reference for converting the -- cgit v1.2.3 From 65332a5b9fbd5c72c0db009b17ef4304d4c242dd Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 6 Sep 2022 16:01:47 -0700 Subject: drm/i915/uc: Add patch level version number support With the move to un-versioned filenames, it becomes more difficult to know exactly what version of a given firmware is being used. So add the patch level version number to the debugfs output. Also, support matching by patch level when selecting code paths for firmware compatibility. While a patch level change cannot be backwards breaking, it is potentially possible that a new feature only works from a given patch level onwards (even though it was theoretically added in an earlier version that bumped the major or minor version). Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20220906230147.479945-2-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 10 +++---- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 6 ++-- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 36 +++++++++++++++++------ drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 6 ++++ drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h | 8 +++-- 5 files changed, 47 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 04393932623c..64c4e83153f4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1868,7 +1868,7 @@ int intel_guc_submission_init(struct intel_guc *guc) if (guc->submission_initialized) return 0; - if (guc->fw.file_selected.major_ver < 70) { + if (GET_UC_VER(guc) < MAKE_UC_VER(70, 0, 0)) { ret = guc_lrc_desc_pool_create_v69(guc); if (ret) return ret; @@ -2303,7 +2303,7 @@ static int register_context(struct intel_context *ce, bool loop) GEM_BUG_ON(intel_context_is_child(ce)); trace_intel_context_register(ce); - if (guc->fw.file_selected.major_ver >= 70) + if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) ret = register_context_v70(guc, ce, loop); else ret = register_context_v69(guc, ce, loop); @@ -2315,7 +2315,7 @@ static int register_context(struct intel_context *ce, bool loop) set_context_registered(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); - if (guc->fw.file_selected.major_ver >= 70) + if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) guc_context_policy_init_v70(ce, loop); } @@ -2921,7 +2921,7 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc, u16 guc_id, u32 preemption_timeout) { - if (guc->fw.file_selected.major_ver >= 70) { + if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) { struct context_policy policy; __guc_context_policy_start_klv(&policy, guc_id); @@ -3186,7 +3186,7 @@ static int guc_context_alloc(struct intel_context *ce) static void __guc_context_set_prio(struct intel_guc *guc, struct intel_context *ce) { - if (guc->fw.file_selected.major_ver >= 70) { + if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) { struct context_policy policy; __guc_context_policy_start_klv(&policy, ce->guc_id.id); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index d965ac4832d6..abf4e142596d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -435,9 +435,11 @@ static void print_fw_ver(struct intel_uc *uc, struct intel_uc_fw *fw) { struct drm_i915_private *i915 = uc_to_gt(uc)->i915; - drm_info(&i915->drm, "%s firmware %s version %u.%u\n", + drm_info(&i915->drm, "%s firmware %s version %u.%u.%u\n", intel_uc_fw_type_repr(fw->type), fw->file_selected.path, - fw->file_selected.major_ver, fw->file_selected.minor_ver); + fw->file_selected.major_ver, + fw->file_selected.minor_ver, + fw->file_selected.patch_ver); } static int __uc_init_hw(struct intel_uc *uc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 610f36f1b989..af425916cdf6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -447,10 +447,12 @@ static int check_gsc_manifest(const struct firmware *fw, struct intel_uc_fw *uc_fw) { u32 *dw = (u32 *)fw->data; - u32 version = dw[HUC_GSC_VERSION_DW]; + u32 version_hi = dw[HUC_GSC_VERSION_HI_DW]; + u32 version_lo = dw[HUC_GSC_VERSION_LO_DW]; - uc_fw->file_selected.major_ver = FIELD_GET(HUC_GSC_MAJOR_VER_MASK, version); - uc_fw->file_selected.minor_ver = FIELD_GET(HUC_GSC_MINOR_VER_MASK, version); + uc_fw->file_selected.major_ver = FIELD_GET(HUC_GSC_MAJOR_VER_HI_MASK, version_hi); + uc_fw->file_selected.minor_ver = FIELD_GET(HUC_GSC_MINOR_VER_HI_MASK, version_hi); + uc_fw->file_selected.patch_ver = FIELD_GET(HUC_GSC_PATCH_VER_LO_MASK, version_lo); return 0; } @@ -512,6 +514,8 @@ static int check_ccs_header(struct drm_i915_private *i915, css->sw_version); uc_fw->file_selected.minor_ver = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->sw_version); + uc_fw->file_selected.patch_ver = FIELD_GET(CSS_SW_VERSION_UC_PATCH, + css->sw_version); if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) uc_fw->private_data_size = css->private_data_size; @@ -1000,6 +1004,8 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len) */ void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p) { + u32 ver_sel, ver_want; + drm_printf(p, "%s firmware: %s\n", intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path); if (uc_fw->file_selected.path != uc_fw->file_wanted.path) @@ -1007,13 +1013,25 @@ void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p) intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_wanted.path); drm_printf(p, "\tstatus: %s\n", intel_uc_fw_status_repr(uc_fw->status)); - if (uc_fw->file_wanted.major_ver) - drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n", - uc_fw->file_wanted.major_ver, uc_fw->file_wanted.minor_ver, - uc_fw->file_selected.major_ver, uc_fw->file_selected.minor_ver); + ver_sel = MAKE_UC_VER(uc_fw->file_selected.major_ver, + uc_fw->file_selected.minor_ver, + uc_fw->file_selected.patch_ver); + ver_want = MAKE_UC_VER(uc_fw->file_wanted.major_ver, + uc_fw->file_wanted.minor_ver, + uc_fw->file_wanted.patch_ver); + if (ver_sel < ver_want) + drm_printf(p, "\tversion: wanted %u.%u.%u, found %u.%u.%u\n", + uc_fw->file_wanted.major_ver, + uc_fw->file_wanted.minor_ver, + uc_fw->file_wanted.patch_ver, + uc_fw->file_selected.major_ver, + uc_fw->file_selected.minor_ver, + uc_fw->file_selected.patch_ver); else - drm_printf(p, "\tversion: found %u.%u\n", - uc_fw->file_selected.major_ver, uc_fw->file_selected.minor_ver); + drm_printf(p, "\tversion: found %u.%u.%u\n", + uc_fw->file_selected.major_ver, + uc_fw->file_selected.minor_ver, + uc_fw->file_selected.patch_ver); drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size); drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 344763c942e3..cb586f7df270 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -73,6 +73,7 @@ struct intel_uc_fw_file { const char *path; u16 major_ver; u16 minor_ver; + u16 patch_ver; }; /* @@ -108,6 +109,11 @@ struct intel_uc_fw { bool loaded_via_gsc; }; +#define MAKE_UC_VER(maj, min, pat) ((pat) | ((min) << 8) | ((maj) << 16)) +#define GET_UC_VER(uc) (MAKE_UC_VER((uc)->fw.file_selected.major_ver, \ + (uc)->fw.file_selected.minor_ver, \ + (uc)->fw.file_selected.patch_ver)) + #ifdef CONFIG_DRM_I915_DEBUG_GUC void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, enum intel_uc_fw_status status); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h index b05e0e35b734..7a411178bdbf 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h @@ -83,8 +83,10 @@ struct uc_css_header { } __packed; static_assert(sizeof(struct uc_css_header) == 128); -#define HUC_GSC_VERSION_DW 44 -#define HUC_GSC_MAJOR_VER_MASK (0xFF << 0) -#define HUC_GSC_MINOR_VER_MASK (0xFF << 16) +#define HUC_GSC_VERSION_HI_DW 44 +#define HUC_GSC_MAJOR_VER_HI_MASK (0xFF << 0) +#define HUC_GSC_MINOR_VER_HI_MASK (0xFF << 16) +#define HUC_GSC_VERSION_LO_DW 45 +#define HUC_GSC_PATCH_VER_LO_MASK (0xFF << 0) #endif /* _INTEL_UC_FW_ABI_H */ -- cgit v1.2.3 From 3bb6a44251b4d066d73faf43dc17bad05963ae16 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Thu, 1 Sep 2022 11:38:54 -0700 Subject: drm/i915: Rename ggtt_view as gtt_view So far, different views (normal, partial, rotated and remapped) into the same object are only supported for GGTT mappings. But with the upcoming VM_BIND feature, PPGTT will also use the partial view mapping. Hence rename ggtt_view to more generic gtt_view. Signed-off-by: Niranjana Vishwanathapura Acked-by: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20220901183854.3446-1-niranjana.vishwanathapura@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 2 +- drivers/gpu/drm/i915/display/intel_display.h | 2 +- drivers/gpu/drm/i915/display/intel_display_types.h | 2 +- drivers/gpu/drm/i915/display/intel_fb.c | 18 +++--- drivers/gpu/drm/i915/display/intel_fb_pin.c | 4 +- drivers/gpu/drm/i915/display/intel_fb_pin.h | 4 +- drivers/gpu/drm/i915/display/intel_fbdev.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 16 ++--- drivers/gpu/drm/i915/gem/i915_gem_object.h | 2 +- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 4 +- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/i915_debugfs.c | 56 +++++++++--------- drivers/gpu/drm/i915/i915_drv.h | 4 +- drivers/gpu/drm/i915/i915_gem.c | 6 +- drivers/gpu/drm/i915/i915_vma.c | 40 ++++++------- drivers/gpu/drm/i915/i915_vma.h | 18 +++--- drivers/gpu/drm/i915/i915_vma_types.h | 42 ++++++------- drivers/gpu/drm/i915/selftests/i915_vma.c | 68 +++++++++++----------- 19 files changed, 149 insertions(+), 149 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 9f105250f474..85866f9f50ee 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -672,7 +672,7 @@ bool intel_plane_uses_fence(const struct intel_plane_state *plane_state) return DISPLAY_VER(dev_priv) < 4 || (plane->fbc && - plane_state->view.gtt.type == I915_GGTT_VIEW_NORMAL); + plane_state->view.gtt.type == I915_GTT_VIEW_NORMAL); } /* diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 187910d94ec6..e1daa7df3e58 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -45,7 +45,7 @@ struct drm_modeset_acquire_ctx; struct drm_plane; struct drm_plane_state; struct i915_address_space; -struct i915_ggtt_view; +struct i915_gtt_view; struct intel_atomic_state; struct intel_crtc; struct intel_crtc_state; diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index cfd042117b10..e7da4a154c06 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -104,7 +104,7 @@ struct intel_fb_view { * In the normal view the FB object's backing store sg list is used * directly and hence the remap information here is not used. */ - struct i915_ggtt_view gtt; + struct i915_gtt_view gtt; /* * The GTT view (gtt.type) specific information for each FB color diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 9f5a6b79e95b..ff69e5510a13 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -1394,7 +1394,7 @@ static u32 calc_plane_remap_info(const struct intel_framebuffer *fb, int color_p plane_view_height_tiles(fb, color_plane, dims, y)); } - if (view->gtt.type == I915_GGTT_VIEW_ROTATED) { + if (view->gtt.type == I915_GTT_VIEW_ROTATED) { drm_WARN_ON(&i915->drm, remap_info->linear); check_array_bounds(i915, view->gtt.rotated.plane, color_plane); @@ -1419,7 +1419,7 @@ static u32 calc_plane_remap_info(const struct intel_framebuffer *fb, int color_p /* rotate the tile dimensions to match the GTT view */ swap(tile_width, tile_height); } else { - drm_WARN_ON(&i915->drm, view->gtt.type != I915_GGTT_VIEW_REMAPPED); + drm_WARN_ON(&i915->drm, view->gtt.type != I915_GTT_VIEW_REMAPPED); check_array_bounds(i915, view->gtt.remapped.plane, color_plane); @@ -1502,12 +1502,12 @@ calc_plane_normal_size(const struct intel_framebuffer *fb, int color_plane, } static void intel_fb_view_init(struct drm_i915_private *i915, struct intel_fb_view *view, - enum i915_ggtt_view_type view_type) + enum i915_gtt_view_type view_type) { memset(view, 0, sizeof(*view)); view->gtt.type = view_type; - if (view_type == I915_GGTT_VIEW_REMAPPED && IS_ALDERLAKE_P(i915)) + if (view_type == I915_GTT_VIEW_REMAPPED && IS_ALDERLAKE_P(i915)) view->gtt.remapped.plane_alignment = SZ_2M / PAGE_SIZE; } @@ -1529,16 +1529,16 @@ int intel_fill_fb_info(struct drm_i915_private *i915, struct intel_framebuffer * int i, num_planes = fb->base.format->num_planes; unsigned int tile_size = intel_tile_size(i915); - intel_fb_view_init(i915, &fb->normal_view, I915_GGTT_VIEW_NORMAL); + intel_fb_view_init(i915, &fb->normal_view, I915_GTT_VIEW_NORMAL); drm_WARN_ON(&i915->drm, intel_fb_supports_90_270_rotation(fb) && intel_fb_needs_pot_stride_remap(fb)); if (intel_fb_supports_90_270_rotation(fb)) - intel_fb_view_init(i915, &fb->rotated_view, I915_GGTT_VIEW_ROTATED); + intel_fb_view_init(i915, &fb->rotated_view, I915_GTT_VIEW_ROTATED); if (intel_fb_needs_pot_stride_remap(fb)) - intel_fb_view_init(i915, &fb->remapped_view, I915_GGTT_VIEW_REMAPPED); + intel_fb_view_init(i915, &fb->remapped_view, I915_GTT_VIEW_REMAPPED); for (i = 0; i < num_planes; i++) { struct fb_plane_view_dims view_dims; @@ -1619,8 +1619,8 @@ static void intel_plane_remap_gtt(struct intel_plane_state *plane_state) u32 gtt_offset = 0; intel_fb_view_init(i915, &plane_state->view, - drm_rotation_90_or_270(rotation) ? I915_GGTT_VIEW_ROTATED : - I915_GGTT_VIEW_REMAPPED); + drm_rotation_90_or_270(rotation) ? I915_GTT_VIEW_ROTATED : + I915_GTT_VIEW_REMAPPED); src_x = plane_state->uapi.src.x1 >> 16; src_y = plane_state->uapi.src.y1 >> 16; diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c index bd6e7c98e751..c86e5d4ee016 100644 --- a/drivers/gpu/drm/i915/display/intel_fb_pin.c +++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c @@ -18,7 +18,7 @@ static struct i915_vma * intel_pin_fb_obj_dpt(struct drm_framebuffer *fb, - const struct i915_ggtt_view *view, + const struct i915_gtt_view *view, bool uses_fence, unsigned long *out_flags, struct i915_address_space *vm) @@ -79,7 +79,7 @@ err: struct i915_vma * intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, bool phys_cursor, - const struct i915_ggtt_view *view, + const struct i915_gtt_view *view, bool uses_fence, unsigned long *out_flags) { diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.h b/drivers/gpu/drm/i915/display/intel_fb_pin.h index e4fcd0218d9d..de0efaa25905 100644 --- a/drivers/gpu/drm/i915/display/intel_fb_pin.h +++ b/drivers/gpu/drm/i915/display/intel_fb_pin.h @@ -11,12 +11,12 @@ struct drm_framebuffer; struct i915_vma; struct intel_plane_state; -struct i915_ggtt_view; +struct i915_gtt_view; struct i915_vma * intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, bool phys_cursor, - const struct i915_ggtt_view *view, + const struct i915_gtt_view *view, bool uses_fence, unsigned long *out_flags); diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 221336178991..1922f62d04c0 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -198,8 +198,8 @@ static int intelfb_create(struct drm_fb_helper *helper, struct drm_i915_private *dev_priv = to_i915(dev); struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); struct i915_ggtt *ggtt = to_gt(dev_priv)->ggtt; - const struct i915_ggtt_view view = { - .type = I915_GGTT_VIEW_NORMAL, + const struct i915_gtt_view view = { + .type = I915_GTT_VIEW_NORMAL, }; intel_wakeref_t wakeref; struct fb_info *info; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 1674b0c5802b..d44a152ce680 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -397,7 +397,7 @@ struct i915_vma * i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, struct i915_gem_ww_ctx *ww, u32 alignment, - const struct i915_ggtt_view *view, + const struct i915_gtt_view *view, unsigned int flags) { struct drm_i915_private *i915 = to_i915(obj->base.dev); @@ -434,7 +434,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, */ vma = ERR_PTR(-ENOSPC); if ((flags & PIN_MAPPABLE) == 0 && - (!view || view->type == I915_GGTT_VIEW_NORMAL)) + (!view || view->type == I915_GTT_VIEW_NORMAL)) vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment, flags | PIN_MAPPABLE | PIN_NONBLOCK); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 0c5c43852e24..3218981488cc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -194,17 +194,17 @@ int i915_gem_mmap_gtt_version(void) return 4; } -static inline struct i915_ggtt_view +static inline struct i915_gtt_view compute_partial_view(const struct drm_i915_gem_object *obj, pgoff_t page_offset, unsigned int chunk) { - struct i915_ggtt_view view; + struct i915_gtt_view view; if (i915_gem_object_is_tiled(obj)) chunk = roundup(chunk, tile_row_pages(obj) ?: 1); - view.type = I915_GGTT_VIEW_PARTIAL; + view.type = I915_GTT_VIEW_PARTIAL; view.partial.offset = rounddown(page_offset, chunk); view.partial.size = min_t(unsigned int, chunk, @@ -212,7 +212,7 @@ compute_partial_view(const struct drm_i915_gem_object *obj, /* If the partial covers the entire object, just create a normal VMA. */ if (chunk >= obj->base.size >> PAGE_SHIFT) - view.type = I915_GGTT_VIEW_NORMAL; + view.type = I915_GTT_VIEW_NORMAL; return view; } @@ -341,12 +341,12 @@ retry: PIN_NOEVICT); if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) { /* Use a partial view if it is bigger than available space */ - struct i915_ggtt_view view = + struct i915_gtt_view view = compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); unsigned int flags; flags = PIN_MAPPABLE | PIN_NOSEARCH; - if (view.type == I915_GGTT_VIEW_NORMAL) + if (view.type == I915_GTT_VIEW_NORMAL) flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ /* @@ -357,7 +357,7 @@ retry: vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags); if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) { flags = PIN_MAPPABLE; - view.type = I915_GGTT_VIEW_PARTIAL; + view.type = I915_GTT_VIEW_PARTIAL; vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags); } @@ -394,7 +394,7 @@ retry: /* Finally, remap it using the new GTT offset */ ret = remap_io_mapping(area, - area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), + area->vm_start + (vma->gtt_view.partial.offset << PAGE_SHIFT), (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, min_t(u64, vma->size, area->vm_end - area->vm_start), &ggtt->iomap); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 6f0a3ce35567..7317d4102955 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -543,7 +543,7 @@ struct i915_vma * __must_check i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, struct i915_gem_ww_ctx *ww, u32 alignment, - const struct i915_ggtt_view *view, + const struct i915_gtt_view *view, unsigned int flags); void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 3cff08f04f6c..55bf23dc0e54 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -93,7 +93,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, { const unsigned long npages = obj->base.size / PAGE_SIZE; struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_ggtt_view view; + struct i915_gtt_view view; struct i915_vma *vma; unsigned long page; u32 __iomem *io; @@ -210,7 +210,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj, } for_each_prime_number_from(page, 1, npages) { - struct i915_ggtt_view view = + struct i915_gtt_view view = compute_partial_view(obj, page, MIN_CHUNK_PAGES); u32 __iomem *io; struct page *p; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 1211774e1d91..b36674356986 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -776,7 +776,7 @@ static void revoke_mmaps(struct intel_gt *gt) continue; node = &vma->mmo->vma_node; - vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; + vma_offset = vma->gtt_view.partial.offset << PAGE_SHIFT; unmap_mapping_range(gt->i915->drm.anon_inode->i_mapping, drm_vma_node_offset_addr(node) + vma_offset, diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 94e5c29d2ee3..3a8450058548 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -188,47 +188,47 @@ i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) stringify_page_sizes(vma->resource->page_sizes_gtt, NULL, 0)); if (i915_vma_is_ggtt(vma) || i915_vma_is_dpt(vma)) { - switch (vma->ggtt_view.type) { - case I915_GGTT_VIEW_NORMAL: + switch (vma->gtt_view.type) { + case I915_GTT_VIEW_NORMAL: seq_puts(m, ", normal"); break; - case I915_GGTT_VIEW_PARTIAL: + case I915_GTT_VIEW_PARTIAL: seq_printf(m, ", partial [%08llx+%x]", - vma->ggtt_view.partial.offset << PAGE_SHIFT, - vma->ggtt_view.partial.size << PAGE_SHIFT); + vma->gtt_view.partial.offset << PAGE_SHIFT, + vma->gtt_view.partial.size << PAGE_SHIFT); break; - case I915_GGTT_VIEW_ROTATED: + case I915_GTT_VIEW_ROTATED: seq_printf(m, ", rotated [(%ux%u, src_stride=%u, dst_stride=%u, offset=%u), (%ux%u, src_stride=%u, dst_stride=%u, offset=%u)]", - vma->ggtt_view.rotated.plane[0].width, - vma->ggtt_view.rotated.plane[0].height, - vma->ggtt_view.rotated.plane[0].src_stride, - vma->ggtt_view.rotated.plane[0].dst_stride, - vma->ggtt_view.rotated.plane[0].offset, - vma->ggtt_view.rotated.plane[1].width, - vma->ggtt_view.rotated.plane[1].height, - vma->ggtt_view.rotated.plane[1].src_stride, - vma->ggtt_view.rotated.plane[1].dst_stride, - vma->ggtt_view.rotated.plane[1].offset); + vma->gtt_view.rotated.plane[0].width, + vma->gtt_view.rotated.plane[0].height, + vma->gtt_view.rotated.plane[0].src_stride, + vma->gtt_view.rotated.plane[0].dst_stride, + vma->gtt_view.rotated.plane[0].offset, + vma->gtt_view.rotated.plane[1].width, + vma->gtt_view.rotated.plane[1].height, + vma->gtt_view.rotated.plane[1].src_stride, + vma->gtt_view.rotated.plane[1].dst_stride, + vma->gtt_view.rotated.plane[1].offset); break; - case I915_GGTT_VIEW_REMAPPED: + case I915_GTT_VIEW_REMAPPED: seq_printf(m, ", remapped [(%ux%u, src_stride=%u, dst_stride=%u, offset=%u), (%ux%u, src_stride=%u, dst_stride=%u, offset=%u)]", - vma->ggtt_view.remapped.plane[0].width, - vma->ggtt_view.remapped.plane[0].height, - vma->ggtt_view.remapped.plane[0].src_stride, - vma->ggtt_view.remapped.plane[0].dst_stride, - vma->ggtt_view.remapped.plane[0].offset, - vma->ggtt_view.remapped.plane[1].width, - vma->ggtt_view.remapped.plane[1].height, - vma->ggtt_view.remapped.plane[1].src_stride, - vma->ggtt_view.remapped.plane[1].dst_stride, - vma->ggtt_view.remapped.plane[1].offset); + vma->gtt_view.remapped.plane[0].width, + vma->gtt_view.remapped.plane[0].height, + vma->gtt_view.remapped.plane[0].src_stride, + vma->gtt_view.remapped.plane[0].dst_stride, + vma->gtt_view.remapped.plane[0].offset, + vma->gtt_view.remapped.plane[1].width, + vma->gtt_view.remapped.plane[1].height, + vma->gtt_view.remapped.plane[1].src_stride, + vma->gtt_view.remapped.plane[1].dst_stride, + vma->gtt_view.remapped.plane[1].offset); break; default: - MISSING_CASE(vma->ggtt_view.type); + MISSING_CASE(vma->gtt_view.type); break; } } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 66871be58a47..0f3566901542 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1468,12 +1468,12 @@ static inline void i915_gem_drain_workqueue(struct drm_i915_private *i915) struct i915_vma * __must_check i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj, struct i915_gem_ww_ctx *ww, - const struct i915_ggtt_view *view, + const struct i915_gtt_view *view, u64 size, u64 alignment, u64 flags); struct i915_vma * __must_check i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view, + const struct i915_gtt_view *view, u64 size, u64 alignment, u64 flags); int i915_gem_object_unbind(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 702e5b89be22..a3373699835d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -885,7 +885,7 @@ static void discard_ggtt_vma(struct i915_vma *vma) struct i915_vma * i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj, struct i915_gem_ww_ctx *ww, - const struct i915_ggtt_view *view, + const struct i915_gtt_view *view, u64 size, u64 alignment, u64 flags) { struct drm_i915_private *i915 = to_i915(obj->base.dev); @@ -896,7 +896,7 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj, GEM_WARN_ON(!ww); if (flags & PIN_MAPPABLE && - (!view || view->type == I915_GGTT_VIEW_NORMAL)) { + (!view || view->type == I915_GTT_VIEW_NORMAL)) { /* * If the required space is larger than the available * aperture, we will not able to find a slot for the @@ -987,7 +987,7 @@ new_vma: struct i915_vma * __must_check i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view, + const struct i915_gtt_view *view, u64 size, u64 alignment, u64 flags) { struct i915_gem_ww_ctx ww; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 260371716490..e300f8070c1d 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -109,7 +109,7 @@ static void __i915_vma_retire(struct i915_active *ref) static struct i915_vma * vma_create(struct drm_i915_gem_object *obj, struct i915_address_space *vm, - const struct i915_ggtt_view *view) + const struct i915_gtt_view *view) { struct i915_vma *pos = ERR_PTR(-E2BIG); struct i915_vma *vma; @@ -141,9 +141,9 @@ vma_create(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&vma->obj_link); RB_CLEAR_NODE(&vma->obj_node); - if (view && view->type != I915_GGTT_VIEW_NORMAL) { - vma->ggtt_view = *view; - if (view->type == I915_GGTT_VIEW_PARTIAL) { + if (view && view->type != I915_GTT_VIEW_NORMAL) { + vma->gtt_view = *view; + if (view->type == I915_GTT_VIEW_PARTIAL) { GEM_BUG_ON(range_overflows_t(u64, view->partial.offset, view->partial.size, @@ -151,10 +151,10 @@ vma_create(struct drm_i915_gem_object *obj, vma->size = view->partial.size; vma->size <<= PAGE_SHIFT; GEM_BUG_ON(vma->size > obj->base.size); - } else if (view->type == I915_GGTT_VIEW_ROTATED) { + } else if (view->type == I915_GTT_VIEW_ROTATED) { vma->size = intel_rotation_info_size(&view->rotated); vma->size <<= PAGE_SHIFT; - } else if (view->type == I915_GGTT_VIEW_REMAPPED) { + } else if (view->type == I915_GTT_VIEW_REMAPPED) { vma->size = intel_remapped_info_size(&view->remapped); vma->size <<= PAGE_SHIFT; } @@ -248,7 +248,7 @@ err_vma: static struct i915_vma * i915_vma_lookup(struct drm_i915_gem_object *obj, struct i915_address_space *vm, - const struct i915_ggtt_view *view) + const struct i915_gtt_view *view) { struct rb_node *rb; @@ -286,7 +286,7 @@ i915_vma_lookup(struct drm_i915_gem_object *obj, struct i915_vma * i915_vma_instance(struct drm_i915_gem_object *obj, struct i915_address_space *vm, - const struct i915_ggtt_view *view) + const struct i915_gtt_view *view) { struct i915_vma *vma; @@ -1203,7 +1203,7 @@ err_st_alloc: } static noinline struct sg_table * -intel_partial_pages(const struct i915_ggtt_view *view, +intel_partial_pages(const struct i915_gtt_view *view, struct drm_i915_gem_object *obj) { struct sg_table *st; @@ -1247,33 +1247,33 @@ __i915_vma_get_pages(struct i915_vma *vma) */ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); - switch (vma->ggtt_view.type) { + switch (vma->gtt_view.type) { default: - GEM_BUG_ON(vma->ggtt_view.type); + GEM_BUG_ON(vma->gtt_view.type); fallthrough; - case I915_GGTT_VIEW_NORMAL: + case I915_GTT_VIEW_NORMAL: pages = vma->obj->mm.pages; break; - case I915_GGTT_VIEW_ROTATED: + case I915_GTT_VIEW_ROTATED: pages = - intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); + intel_rotate_pages(&vma->gtt_view.rotated, vma->obj); break; - case I915_GGTT_VIEW_REMAPPED: + case I915_GTT_VIEW_REMAPPED: pages = - intel_remap_pages(&vma->ggtt_view.remapped, vma->obj); + intel_remap_pages(&vma->gtt_view.remapped, vma->obj); break; - case I915_GGTT_VIEW_PARTIAL: - pages = intel_partial_pages(&vma->ggtt_view, vma->obj); + case I915_GTT_VIEW_PARTIAL: + pages = intel_partial_pages(&vma->gtt_view, vma->obj); break; } if (IS_ERR(pages)) { drm_err(&vma->vm->i915->drm, "Failed to get pages for VMA view type %u (%ld)!\n", - vma->ggtt_view.type, PTR_ERR(pages)); + vma->gtt_view.type, PTR_ERR(pages)); return PTR_ERR(pages); } @@ -1806,7 +1806,7 @@ void i915_vma_revoke_mmap(struct i915_vma *vma) GEM_BUG_ON(!vma->obj->userfault_count); node = &vma->mmo->vma_node; - vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; + vma_offset = vma->gtt_view.partial.offset << PAGE_SHIFT; unmap_mapping_range(vma->vm->i915->drm.anon_inode->i_mapping, drm_vma_node_offset_addr(node) + vma_offset, vma->size, diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 33a58f605d75..aecd9c64486b 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -43,7 +43,7 @@ struct i915_vma * i915_vma_instance(struct drm_i915_gem_object *obj, struct i915_address_space *vm, - const struct i915_ggtt_view *view); + const struct i915_gtt_view *view); void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags); #define I915_VMA_RELEASE_MAP BIT(0) @@ -160,7 +160,7 @@ static inline void i915_vma_put(struct i915_vma *vma) static inline long i915_vma_compare(struct i915_vma *vma, struct i915_address_space *vm, - const struct i915_ggtt_view *view) + const struct i915_gtt_view *view) { ptrdiff_t cmp; @@ -170,8 +170,8 @@ i915_vma_compare(struct i915_vma *vma, if (cmp) return cmp; - BUILD_BUG_ON(I915_GGTT_VIEW_NORMAL != 0); - cmp = vma->ggtt_view.type; + BUILD_BUG_ON(I915_GTT_VIEW_NORMAL != 0); + cmp = vma->gtt_view.type; if (!view) return cmp; @@ -181,7 +181,7 @@ i915_vma_compare(struct i915_vma *vma, assert_i915_gem_gtt_types(); - /* ggtt_view.type also encodes its size so that we both distinguish + /* gtt_view.type also encodes its size so that we both distinguish * different views using it as a "type" and also use a compact (no * accessing of uninitialised padding bytes) memcmp without storing * an extra parameter or adding more code. @@ -191,14 +191,14 @@ i915_vma_compare(struct i915_vma *vma, * we assert above that all branches have the same address, and that * each branch has a unique type/size. */ - BUILD_BUG_ON(I915_GGTT_VIEW_NORMAL >= I915_GGTT_VIEW_PARTIAL); - BUILD_BUG_ON(I915_GGTT_VIEW_PARTIAL >= I915_GGTT_VIEW_ROTATED); - BUILD_BUG_ON(I915_GGTT_VIEW_ROTATED >= I915_GGTT_VIEW_REMAPPED); + BUILD_BUG_ON(I915_GTT_VIEW_NORMAL >= I915_GTT_VIEW_PARTIAL); + BUILD_BUG_ON(I915_GTT_VIEW_PARTIAL >= I915_GTT_VIEW_ROTATED); + BUILD_BUG_ON(I915_GTT_VIEW_ROTATED >= I915_GTT_VIEW_REMAPPED); BUILD_BUG_ON(offsetof(typeof(*view), rotated) != offsetof(typeof(*view), partial)); BUILD_BUG_ON(offsetof(typeof(*view), rotated) != offsetof(typeof(*view), remapped)); - return memcmp(&vma->ggtt_view.partial, &view->partial, view->type); + return memcmp(&vma->gtt_view.partial, &view->partial, view->type); } struct i915_vma_work *i915_vma_work(void); diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h index be6e028c3b57..ec0f6c9f57d0 100644 --- a/drivers/gpu/drm/i915/i915_vma_types.h +++ b/drivers/gpu/drm/i915/i915_vma_types.h @@ -67,30 +67,30 @@ enum i915_cache_level; * Implementation and usage * * GGTT views are implemented using VMAs and are distinguished via enum - * i915_ggtt_view_type and struct i915_ggtt_view. + * i915_gtt_view_type and struct i915_gtt_view. * * A new flavour of core GEM functions which work with GGTT bound objects were * added with the _ggtt_ infix, and sometimes with _view postfix to avoid - * renaming in large amounts of code. They take the struct i915_ggtt_view + * renaming in large amounts of code. They take the struct i915_gtt_view * parameter encapsulating all metadata required to implement a view. * * As a helper for callers which are only interested in the normal view, - * globally const i915_ggtt_view_normal singleton instance exists. All old core + * globally const i915_gtt_view_normal singleton instance exists. All old core * GEM API functions, the ones not taking the view parameter, are operating on, * or with the normal GGTT view. * * Code wanting to add or use a new GGTT view needs to: * * 1. Add a new enum with a suitable name. - * 2. Extend the metadata in the i915_ggtt_view structure if required. + * 2. Extend the metadata in the i915_gtt_view structure if required. * 3. Add support to i915_get_vma_pages(). * * New views are required to build a scatter-gather table from within the - * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and + * i915_get_vma_pages function. This table is stored in the vma.gtt_view and * exists for the lifetime of an VMA. * * Core API is designed to have copy semantics which means that passed in - * struct i915_ggtt_view does not need to be persistent (left around after + * struct i915_gtt_view does not need to be persistent (left around after * calling the core API functions). * */ @@ -130,11 +130,11 @@ struct intel_partial_info { unsigned int size; } __packed; -enum i915_ggtt_view_type { - I915_GGTT_VIEW_NORMAL = 0, - I915_GGTT_VIEW_ROTATED = sizeof(struct intel_rotation_info), - I915_GGTT_VIEW_PARTIAL = sizeof(struct intel_partial_info), - I915_GGTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info), +enum i915_gtt_view_type { + I915_GTT_VIEW_NORMAL = 0, + I915_GTT_VIEW_ROTATED = sizeof(struct intel_rotation_info), + I915_GTT_VIEW_PARTIAL = sizeof(struct intel_partial_info), + I915_GTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info), }; static inline void assert_i915_gem_gtt_types(void) @@ -152,18 +152,18 @@ static inline void assert_i915_gem_gtt_types(void) /* As we encode the size of each branch inside the union into its type, * we have to be careful that each branch has a unique size. */ - switch ((enum i915_ggtt_view_type)0) { - case I915_GGTT_VIEW_NORMAL: - case I915_GGTT_VIEW_PARTIAL: - case I915_GGTT_VIEW_ROTATED: - case I915_GGTT_VIEW_REMAPPED: + switch ((enum i915_gtt_view_type)0) { + case I915_GTT_VIEW_NORMAL: + case I915_GTT_VIEW_PARTIAL: + case I915_GTT_VIEW_ROTATED: + case I915_GTT_VIEW_REMAPPED: /* gcc complains if these are identical cases */ break; } } -struct i915_ggtt_view { - enum i915_ggtt_view_type type; +struct i915_gtt_view { + enum i915_gtt_view_type type; union { /* Members need to contain no holes/padding */ struct intel_partial_info partial; @@ -280,11 +280,11 @@ struct i915_vma { /** * Support different GGTT views into the same object. * This means there can be multiple VMA mappings per object and per VM. - * i915_ggtt_view_type is used to distinguish between those entries. - * The default one of zero (I915_GGTT_VIEW_NORMAL) is default and also + * i915_gtt_view_type is used to distinguish between those entries. + * The default one of zero (I915_GTT_VIEW_NORMAL) is default and also * assumed in GEM functions which take no ggtt view parameter. */ - struct i915_ggtt_view ggtt_view; + struct i915_gtt_view gtt_view; /** This object's place on the active/inactive lists */ struct list_head vm_link; diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index e3821398a5b0..71b52d5efef4 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -51,9 +51,9 @@ static bool assert_vma(struct i915_vma *vma, ok = false; } - if (vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) { + if (vma->gtt_view.type != I915_GTT_VIEW_NORMAL) { pr_err("VMA created with wrong type [%d]\n", - vma->ggtt_view.type); + vma->gtt_view.type); ok = false; } @@ -63,7 +63,7 @@ static bool assert_vma(struct i915_vma *vma, static struct i915_vma * checked_vma_instance(struct drm_i915_gem_object *obj, struct i915_address_space *vm, - const struct i915_ggtt_view *view) + const struct i915_gtt_view *view) { struct i915_vma *vma; bool ok = true; @@ -91,7 +91,7 @@ checked_vma_instance(struct drm_i915_gem_object *obj, } if (i915_vma_compare(vma, vma->vm, - i915_vma_is_ggtt(vma) ? &vma->ggtt_view : NULL)) { + i915_vma_is_ggtt(vma) ? &vma->gtt_view : NULL)) { pr_err("i915_vma_compare failed with itself\n"); return ERR_PTR(-EINVAL); } @@ -530,12 +530,12 @@ assert_remapped(struct drm_i915_gem_object *obj, return sg; } -static unsigned int remapped_size(enum i915_ggtt_view_type view_type, +static unsigned int remapped_size(enum i915_gtt_view_type view_type, const struct intel_remapped_plane_info *a, const struct intel_remapped_plane_info *b) { - if (view_type == I915_GGTT_VIEW_ROTATED) + if (view_type == I915_GTT_VIEW_ROTATED) return a->dst_stride * a->width + b->dst_stride * b->width; else return a->dst_stride * a->height + b->dst_stride * b->height; @@ -569,9 +569,9 @@ static int igt_vma_rotate_remap(void *arg) { } }, *a, *b; - enum i915_ggtt_view_type types[] = { - I915_GGTT_VIEW_ROTATED, - I915_GGTT_VIEW_REMAPPED, + enum i915_gtt_view_type types[] = { + I915_GTT_VIEW_ROTATED, + I915_GTT_VIEW_REMAPPED, 0, }, *t; const unsigned int max_pages = 64; @@ -588,7 +588,7 @@ static int igt_vma_rotate_remap(void *arg) for (t = types; *t; t++) { for (a = planes; a->width; a++) { for (b = planes + ARRAY_SIZE(planes); b-- != planes; ) { - struct i915_ggtt_view view = { + struct i915_gtt_view view = { .type = *t, .remapped.plane[0] = *a, .remapped.plane[1] = *b, @@ -602,11 +602,11 @@ static int igt_vma_rotate_remap(void *arg) max_offset = max_pages - max_offset; if (!plane_info[0].dst_stride) - plane_info[0].dst_stride = view.type == I915_GGTT_VIEW_ROTATED ? + plane_info[0].dst_stride = view.type == I915_GTT_VIEW_ROTATED ? plane_info[0].height : plane_info[0].width; if (!plane_info[1].dst_stride) - plane_info[1].dst_stride = view.type == I915_GGTT_VIEW_ROTATED ? + plane_info[1].dst_stride = view.type == I915_GTT_VIEW_ROTATED ? plane_info[1].height : plane_info[1].width; @@ -630,7 +630,7 @@ static int igt_vma_rotate_remap(void *arg) expected_pages = remapped_size(view.type, &plane_info[0], &plane_info[1]); - if (view.type == I915_GGTT_VIEW_ROTATED && + if (view.type == I915_GTT_VIEW_ROTATED && vma->size != expected_pages * PAGE_SIZE) { pr_err("VMA is wrong size, expected %lu, found %llu\n", PAGE_SIZE * expected_pages, vma->size); @@ -638,7 +638,7 @@ static int igt_vma_rotate_remap(void *arg) goto out_object; } - if (view.type == I915_GGTT_VIEW_REMAPPED && + if (view.type == I915_GTT_VIEW_REMAPPED && vma->size > expected_pages * PAGE_SIZE) { pr_err("VMA is wrong size, expected %lu, found %llu\n", PAGE_SIZE * expected_pages, vma->size); @@ -668,13 +668,13 @@ static int igt_vma_rotate_remap(void *arg) sg = vma->pages->sgl; for (n = 0; n < ARRAY_SIZE(view.rotated.plane); n++) { - if (view.type == I915_GGTT_VIEW_ROTATED) + if (view.type == I915_GTT_VIEW_ROTATED) sg = assert_rotated(obj, &view.rotated, n, sg); else sg = assert_remapped(obj, &view.remapped, n, sg); if (IS_ERR(sg)) { pr_err("Inconsistent %s VMA pages for plane %d: [(%d, %d, %d, %d, %d), (%d, %d, %d, %d, %d)]\n", - view.type == I915_GGTT_VIEW_ROTATED ? + view.type == I915_GTT_VIEW_ROTATED ? "rotated" : "remapped", n, plane_info[0].width, plane_info[0].height, @@ -741,7 +741,7 @@ static bool assert_partial(struct drm_i915_gem_object *obj, } static bool assert_pin(struct i915_vma *vma, - struct i915_ggtt_view *view, + struct i915_gtt_view *view, u64 size, const char *name) { @@ -759,8 +759,8 @@ static bool assert_pin(struct i915_vma *vma, ok = false; } - if (view && view->type != I915_GGTT_VIEW_NORMAL) { - if (memcmp(&vma->ggtt_view, view, sizeof(*view))) { + if (view && view->type != I915_GTT_VIEW_NORMAL) { + if (memcmp(&vma->gtt_view, view, sizeof(*view))) { pr_err("(%s) VMA mismatch upon creation!\n", name); ok = false; @@ -772,9 +772,9 @@ static bool assert_pin(struct i915_vma *vma, ok = false; } } else { - if (vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) { + if (vma->gtt_view.type != I915_GTT_VIEW_NORMAL) { pr_err("Not the normal ggtt view! Found %d\n", - vma->ggtt_view.type); + vma->gtt_view.type); ok = false; } @@ -818,14 +818,14 @@ static int igt_vma_partial(void *arg) nvma = 0; for_each_prime_number_from(sz, 1, npages) { for_each_prime_number_from(offset, 0, npages - sz) { - struct i915_ggtt_view view; + struct i915_gtt_view view; - view.type = I915_GGTT_VIEW_PARTIAL; + view.type = I915_GTT_VIEW_PARTIAL; view.partial.offset = offset; view.partial.size = sz; if (sz == npages) - view.type = I915_GGTT_VIEW_NORMAL; + view.type = I915_GTT_VIEW_NORMAL; vma = checked_vma_instance(obj, vm, &view); if (IS_ERR(vma)) { @@ -976,9 +976,9 @@ static int igt_vma_remapped_gtt(void *arg) { } }, *p; - enum i915_ggtt_view_type types[] = { - I915_GGTT_VIEW_ROTATED, - I915_GGTT_VIEW_REMAPPED, + enum i915_gtt_view_type types[] = { + I915_GTT_VIEW_ROTATED, + I915_GTT_VIEW_REMAPPED, 0, }, *t; struct drm_i915_gem_object *obj; @@ -996,7 +996,7 @@ static int igt_vma_remapped_gtt(void *arg) for (t = types; *t; t++) { for (p = planes; p->width; p++) { - struct i915_ggtt_view view = { + struct i915_gtt_view view = { .type = *t, .rotated.plane[0] = *p, }; @@ -1012,7 +1012,7 @@ static int igt_vma_remapped_gtt(void *arg) goto out; if (!plane_info[0].dst_stride) - plane_info[0].dst_stride = *t == I915_GGTT_VIEW_ROTATED ? + plane_info[0].dst_stride = *t == I915_GTT_VIEW_ROTATED ? p->height : p->width; vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); @@ -1021,7 +1021,7 @@ static int igt_vma_remapped_gtt(void *arg) goto out; } - GEM_BUG_ON(vma->ggtt_view.type != *t); + GEM_BUG_ON(vma->gtt_view.type != *t); map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); @@ -1035,7 +1035,7 @@ static int igt_vma_remapped_gtt(void *arg) unsigned int offset; u32 val = y << 16 | x; - if (*t == I915_GGTT_VIEW_ROTATED) + if (*t == I915_GTT_VIEW_ROTATED) offset = (x * plane_info[0].dst_stride + y) * PAGE_SIZE; else offset = (y * plane_info[0].dst_stride + x) * PAGE_SIZE; @@ -1052,7 +1052,7 @@ static int igt_vma_remapped_gtt(void *arg) goto out; } - GEM_BUG_ON(vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL); + GEM_BUG_ON(vma->gtt_view.type != I915_GTT_VIEW_NORMAL); map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); @@ -1067,7 +1067,7 @@ static int igt_vma_remapped_gtt(void *arg) u32 exp = y << 16 | x; u32 val; - if (*t == I915_GGTT_VIEW_ROTATED) + if (*t == I915_GTT_VIEW_ROTATED) src_idx = rotated_index(&view.rotated, 0, x, y); else src_idx = remapped_index(&view.remapped, 0, x, y); @@ -1076,7 +1076,7 @@ static int igt_vma_remapped_gtt(void *arg) val = ioread32(&map[offset / sizeof(*map)]); if (val != exp) { pr_err("%s VMA write test failed, expected 0x%x, found 0x%x\n", - *t == I915_GGTT_VIEW_ROTATED ? "Rotated" : "Remapped", + *t == I915_GTT_VIEW_ROTATED ? "Rotated" : "Remapped", exp, val); i915_vma_unpin_iomap(vma); err = -EINVAL; -- cgit v1.2.3 From 04f7eb3d4582a0a4da67c86e55fda7de2df86d91 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 7 Sep 2022 19:26:41 +0200 Subject: drm/i915: Set correct domains values at _i915_vma_move_to_active Fix regression introduced by commit: "drm/i915: Individualize fences before adding to dma_resv obj" which sets obj->read_domains to 0 for both read and write paths. Also set obj->write_domain to 0 on read path which was removed by the commit. References: https://gitlab.freedesktop.org/drm/intel/-/issues/6639 Fixes: 420a07b841d0 ("drm/i915: Individualize fences before adding to dma_resv obj") Signed-off-by: Nirmoy Das Cc: # v5.16+ Cc: Matthew Auld Cc: Andrzej Hajda Reviewed-by: Andrzej Hajda Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220907172641.12555-1-nirmoy.das@intel.com --- drivers/gpu/drm/i915/i915_vma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index e300f8070c1d..f17c09ead7d7 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1882,12 +1882,13 @@ int _i915_vma_move_to_active(struct i915_vma *vma, enum dma_resv_usage usage; int idx; - obj->read_domains = 0; if (flags & EXEC_OBJECT_WRITE) { usage = DMA_RESV_USAGE_WRITE; obj->write_domain = I915_GEM_DOMAIN_RENDER; + obj->read_domains = 0; } else { usage = DMA_RESV_USAGE_READ; + obj->write_domain = 0; } dma_fence_array_for_each(curr, idx, fence) -- cgit v1.2.3