From 4e0d64dba816adf18c17488d38ede67a3d0e9b40 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 17 May 2018 22:26:30 +0100 Subject: drm/i915: Move request->ctx aside In the next patch, we want to store the intel_context pointer inside i915_request, as it is frequently access via a convoluted dance when submitting the request to hw. Having two context pointers inside i915_request leads to confusion so first rename the existing i915_gem_context pointer to i915_request.gem_context. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180517212633.24934-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 8928894dd9c7..fe8810a6a339 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -384,7 +384,7 @@ static void __retire_engine_request(struct intel_engine_cs *engine, */ if (engine->last_retired_context) intel_context_unpin(engine->last_retired_context, engine); - engine->last_retired_context = rq->ctx; + engine->last_retired_context = rq->gem_context; } static void __retire_engine_upto(struct intel_engine_cs *engine, @@ -455,8 +455,8 @@ static void i915_request_retire(struct i915_request *request) i915_request_remove_from_client(request); /* Retirement decays the ban score as it is a sign of ctx progress */ - atomic_dec_if_positive(&request->ctx->ban_score); - intel_context_unpin(request->ctx, request->engine); + atomic_dec_if_positive(&request->gem_context->ban_score); + intel_context_unpin(request->gem_context, request->engine); __retire_engine_upto(request->engine, request); @@ -760,7 +760,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) INIT_LIST_HEAD(&rq->active_list); rq->i915 = i915; rq->engine = engine; - rq->ctx = ctx; + rq->gem_context = ctx; rq->ring = ring; rq->timeline = ring->timeline; GEM_BUG_ON(rq->timeline == &engine->timeline); @@ -814,7 +814,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) goto err_unwind; /* Keep a second pin for the dual retirement along engine and ring */ - __intel_context_pin(rq->ctx, engine); + __intel_context_pin(rq->gem_context, engine); /* Check that we didn't interrupt ourselves with a new request */ GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); @@ -1113,7 +1113,7 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) local_bh_disable(); rcu_read_lock(); /* RCU serialisation for set-wedged protection */ if (engine->schedule) - engine->schedule(request, &request->ctx->sched); + engine->schedule(request, &request->gem_context->sched); rcu_read_unlock(); i915_sw_fence_commit(&request->submit); local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ -- cgit v1.2.3 From 1fc44d9b1afb0afe46acd99bdfdf793805a850e1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 17 May 2018 22:26:32 +0100 Subject: drm/i915: Store a pointer to intel_context in i915_request To ease the frequent and ugly pointer dance of &request->gem_context->engine[request->engine->id] during request submission, store that pointer as request->hw_context. One major advantage that we will exploit later is that this decouples the logical context state from the engine itself. v2: Set mock_context->ops so we don't crash and burn in selftests. Cleanups from Tvrtko. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Acked-by: Zhenyu Wang Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180517212633.24934-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/mmio_context.c | 6 +- drivers/gpu/drm/i915/gvt/mmio_context.h | 2 +- drivers/gpu/drm/i915/gvt/scheduler.c | 141 ++++++++++---------------- drivers/gpu/drm/i915/gvt/scheduler.h | 1 - drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 12 +-- drivers/gpu/drm/i915/i915_gem_context.c | 17 ++-- drivers/gpu/drm/i915/i915_gem_context.h | 21 ++-- drivers/gpu/drm/i915/i915_gpu_error.c | 3 +- drivers/gpu/drm/i915/i915_perf.c | 25 ++--- drivers/gpu/drm/i915/i915_request.c | 34 +++---- drivers/gpu/drm/i915/i915_request.h | 1 + drivers/gpu/drm/i915/intel_engine_cs.c | 54 ++++++---- drivers/gpu/drm/i915/intel_guc_submission.c | 10 +- drivers/gpu/drm/i915/intel_lrc.c | 125 +++++++++++++---------- drivers/gpu/drm/i915/intel_lrc.h | 7 -- drivers/gpu/drm/i915/intel_ringbuffer.c | 100 +++++++++++------- drivers/gpu/drm/i915/intel_ringbuffer.h | 9 +- drivers/gpu/drm/i915/selftests/mock_context.c | 7 ++ drivers/gpu/drm/i915/selftests/mock_engine.c | 41 +++++--- 20 files changed, 321 insertions(+), 296 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 0f949554d118..708170e61625 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -446,9 +446,9 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, #define CTX_CONTEXT_CONTROL_VAL 0x03 -bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id) +bool is_inhibit_context(struct intel_context *ce) { - u32 *reg_state = ctx->__engine[ring_id].lrc_reg_state; + const u32 *reg_state = ce->lrc_reg_state; u32 inhibit_mask = _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); @@ -501,7 +501,7 @@ static void switch_mmio(struct intel_vgpu *pre, * itself. */ if (mmio->in_context && - !is_inhibit_context(s->shadow_ctx, ring_id)) + !is_inhibit_context(&s->shadow_ctx->__engine[ring_id])) continue; if (mmio->mask) diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.h b/drivers/gpu/drm/i915/gvt/mmio_context.h index 0439eb8057a8..5c3b9ff9f96a 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.h +++ b/drivers/gpu/drm/i915/gvt/mmio_context.h @@ -49,7 +49,7 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre, void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt); -bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id); +bool is_inhibit_context(struct intel_context *ce); int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu, struct i915_request *req); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 17f9f8d7e148..e1760030dda1 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -54,11 +54,8 @@ static void set_context_pdp_root_pointer( static void update_shadow_pdps(struct intel_vgpu_workload *workload) { - struct intel_vgpu *vgpu = workload->vgpu; - int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->__engine[ring_id].state->obj; + workload->req->hw_context->state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; @@ -128,9 +125,8 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) struct intel_vgpu *vgpu = workload->vgpu; struct intel_gvt *gvt = vgpu->gvt; int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->__engine[ring_id].state->obj; + workload->req->hw_context->state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; void *dst; @@ -280,10 +276,8 @@ static int shadow_context_status_change(struct notifier_block *nb, return NOTIFY_OK; } -static void shadow_context_descriptor_update(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +static void shadow_context_descriptor_update(struct intel_context *ce) { - struct intel_context *ce = to_intel_context(ctx, engine); u64 desc = 0; desc = ce->lrc_desc; @@ -292,7 +286,7 @@ static void shadow_context_descriptor_update(struct i915_gem_context *ctx, * like GEN8_CTX_* cached in desc_template */ desc &= U64_MAX << 12; - desc |= ctx->desc_template & ((1ULL << 12) - 1); + desc |= ce->gem_context->desc_template & ((1ULL << 12) - 1); ce->lrc_desc = desc; } @@ -300,12 +294,11 @@ static void shadow_context_descriptor_update(struct i915_gem_context *ctx, static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; + struct i915_request *req = workload->req; void *shadow_ring_buffer_va; u32 *cs; - struct i915_request *req = workload->req; - if (IS_KABYLAKE(req->i915) && - is_inhibit_context(req->gem_context, req->engine->id)) + if (IS_KABYLAKE(req->i915) && is_inhibit_context(req->hw_context)) intel_vgpu_restore_inhibit_context(vgpu, req); /* allocate shadow ring buffer */ @@ -353,60 +346,56 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) struct intel_vgpu_submission *s = &vgpu->submission; struct i915_gem_context *shadow_ctx = s->shadow_ctx; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - int ring_id = workload->ring_id; - struct intel_engine_cs *engine = dev_priv->engine[ring_id]; - struct intel_ring *ring; + struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id]; + struct intel_context *ce; int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); - if (workload->shadowed) + if (workload->req) return 0; + /* pin shadow context by gvt even the shadow context will be pinned + * when i915 alloc request. That is because gvt will update the guest + * context from shadow context when workload is completed, and at that + * moment, i915 may already unpined the shadow context to make the + * shadow_ctx pages invalid. So gvt need to pin itself. After update + * the guest context, gvt can unpin the shadow_ctx safely. + */ + ce = intel_context_pin(shadow_ctx, engine); + if (IS_ERR(ce)) { + gvt_vgpu_err("fail to pin shadow context\n"); + return PTR_ERR(ce); + } + shadow_ctx->desc_template &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT); shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; - if (!test_and_set_bit(ring_id, s->shadow_ctx_desc_updated)) - shadow_context_descriptor_update(shadow_ctx, - dev_priv->engine[ring_id]); + if (!test_and_set_bit(workload->ring_id, s->shadow_ctx_desc_updated)) + shadow_context_descriptor_update(ce); ret = intel_gvt_scan_and_shadow_ringbuffer(workload); if (ret) - goto err_scan; + goto err_unpin; if ((workload->ring_id == RCS) && (workload->wa_ctx.indirect_ctx.size != 0)) { ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx); if (ret) - goto err_scan; - } - - /* pin shadow context by gvt even the shadow context will be pinned - * when i915 alloc request. That is because gvt will update the guest - * context from shadow context when workload is completed, and at that - * moment, i915 may already unpined the shadow context to make the - * shadow_ctx pages invalid. So gvt need to pin itself. After update - * the guest context, gvt can unpin the shadow_ctx safely. - */ - ring = intel_context_pin(shadow_ctx, engine); - if (IS_ERR(ring)) { - ret = PTR_ERR(ring); - gvt_vgpu_err("fail to pin shadow context\n"); - goto err_shadow; + goto err_shadow; } ret = populate_shadow_context(workload); if (ret) - goto err_unpin; - workload->shadowed = true; + goto err_shadow; + return 0; -err_unpin: - intel_context_unpin(shadow_ctx, engine); err_shadow: release_shadow_wa_ctx(&workload->wa_ctx); -err_scan: +err_unpin: + intel_context_unpin(ce); return ret; } @@ -414,7 +403,6 @@ static int intel_gvt_generate_request(struct intel_vgpu_workload *workload) { int ring_id = workload->ring_id; struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; - struct intel_engine_cs *engine = dev_priv->engine[ring_id]; struct i915_request *rq; struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; @@ -437,7 +425,6 @@ static int intel_gvt_generate_request(struct intel_vgpu_workload *workload) return 0; err_unpin: - intel_context_unpin(shadow_ctx, engine); release_shadow_wa_ctx(&workload->wa_ctx); return ret; } @@ -517,21 +504,13 @@ err: return ret; } -static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) +static void update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) { - struct intel_vgpu_workload *workload = container_of(wa_ctx, - struct intel_vgpu_workload, - wa_ctx); - int ring_id = workload->ring_id; - struct intel_vgpu_submission *s = &workload->vgpu->submission; - struct i915_gem_context *shadow_ctx = s->shadow_ctx; - struct drm_i915_gem_object *ctx_obj = - shadow_ctx->__engine[ring_id].state->obj; - struct execlist_ring_context *shadow_ring_context; - struct page *page; - - page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); - shadow_ring_context = kmap_atomic(page); + struct intel_vgpu_workload *workload = + container_of(wa_ctx, struct intel_vgpu_workload, wa_ctx); + struct i915_request *rq = workload->req; + struct execlist_ring_context *shadow_ring_context = + (struct execlist_ring_context *)rq->hw_context->lrc_reg_state; shadow_ring_context->bb_per_ctx_ptr.val = (shadow_ring_context->bb_per_ctx_ptr.val & @@ -539,9 +518,6 @@ static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) shadow_ring_context->rcs_indirect_ctx.val = (shadow_ring_context->rcs_indirect_ctx.val & (~INDIRECT_CTX_ADDR_MASK)) | wa_ctx->indirect_ctx.shadow_gma; - - kunmap_atomic(shadow_ring_context); - return 0; } static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) @@ -670,12 +646,9 @@ err_unpin_mm: static int dispatch_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; - struct intel_vgpu_submission *s = &vgpu->submission; - struct i915_gem_context *shadow_ctx = s->shadow_ctx; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; int ring_id = workload->ring_id; - struct intel_engine_cs *engine = dev_priv->engine[ring_id]; - int ret = 0; + int ret; gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n", ring_id, workload); @@ -687,10 +660,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) goto out; ret = prepare_workload(workload); - if (ret) { - intel_context_unpin(shadow_ctx, engine); - goto out; - } out: if (ret) @@ -765,27 +734,23 @@ out: static void update_guest_context(struct intel_vgpu_workload *workload) { + struct i915_request *rq = workload->req; struct intel_vgpu *vgpu = workload->vgpu; struct intel_gvt *gvt = vgpu->gvt; - struct intel_vgpu_submission *s = &vgpu->submission; - struct i915_gem_context *shadow_ctx = s->shadow_ctx; - int ring_id = workload->ring_id; - struct drm_i915_gem_object *ctx_obj = - shadow_ctx->__engine[ring_id].state->obj; + struct drm_i915_gem_object *ctx_obj = rq->hw_context->state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; void *src; unsigned long context_gpa, context_page_num; int i; - gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id, - workload->ctx_desc.lrca); - - context_page_num = gvt->dev_priv->engine[ring_id]->context_size; + gvt_dbg_sched("ring id %d workload lrca %x\n", rq->engine->id, + workload->ctx_desc.lrca); + context_page_num = rq->engine->context_size; context_page_num = context_page_num >> PAGE_SHIFT; - if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS) + if (IS_BROADWELL(gvt->dev_priv) && rq->engine->id == RCS) context_page_num = 19; i = 2; @@ -858,6 +823,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) scheduler->current_workload[ring_id]; struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; + struct i915_request *rq; int event; mutex_lock(&gvt->lock); @@ -866,11 +832,8 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) * switch to make sure request is completed. * For the workload w/o request, directly complete the workload. */ - if (workload->req) { - struct drm_i915_private *dev_priv = - workload->vgpu->gvt->dev_priv; - struct intel_engine_cs *engine = - dev_priv->engine[workload->ring_id]; + rq = fetch_and_zero(&workload->req); + if (rq) { wait_event(workload->shadow_ctx_status_wq, !atomic_read(&workload->shadow_ctx_active)); @@ -886,8 +849,6 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) workload->status = 0; } - i915_request_put(fetch_and_zero(&workload->req)); - if (!workload->status && !(vgpu->resetting_eng & ENGINE_MASK(ring_id))) { update_guest_context(workload); @@ -896,10 +857,13 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) INTEL_GVT_EVENT_MAX) intel_vgpu_trigger_virtual_event(vgpu, event); } - mutex_lock(&dev_priv->drm.struct_mutex); + /* unpin shadow ctx as the shadow_ctx update is done */ - intel_context_unpin(s->shadow_ctx, engine); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_lock(&rq->i915->drm.struct_mutex); + intel_context_unpin(rq->hw_context); + mutex_unlock(&rq->i915->drm.struct_mutex); + + i915_request_put(rq); } gvt_dbg_sched("ring id %d complete workload %p status %d\n", @@ -1270,7 +1234,6 @@ alloc_workload(struct intel_vgpu *vgpu) atomic_set(&workload->shadow_ctx_active, 0); workload->status = -EINPROGRESS; - workload->shadowed = false; workload->vgpu = vgpu; return workload; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 6c644782193e..21eddab4a9cd 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -83,7 +83,6 @@ struct intel_vgpu_workload { struct i915_request *req; /* if this workload has been dispatched to i915? */ bool dispatched; - bool shadowed; int status; struct intel_vgpu_mm *shadow_mm; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 34c125e2d90c..e33c380b43e3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1950,6 +1950,7 @@ struct drm_i915_private { */ struct i915_perf_stream *exclusive_stream; + struct intel_context *pinned_ctx; u32 specific_ctx_id; struct hrtimer poll_check_timer; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a20f8db5729d..03874b50ada9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3181,14 +3181,14 @@ void i915_gem_reset(struct drm_i915_private *dev_priv, i915_retire_requests(dev_priv); for_each_engine(engine, dev_priv, id) { - struct i915_gem_context *ctx; + struct intel_context *ce; i915_gem_reset_engine(engine, engine->hangcheck.active_request, stalled_mask & ENGINE_MASK(id)); - ctx = fetch_and_zero(&engine->last_retired_context); - if (ctx) - intel_context_unpin(ctx, engine); + ce = fetch_and_zero(&engine->last_retired_context); + if (ce) + intel_context_unpin(ce); /* * Ostensibily, we always want a context loaded for powersaving, @@ -4897,13 +4897,13 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) static void assert_kernel_context_is_current(struct drm_i915_private *i915) { - struct i915_gem_context *kernel_context = i915->kernel_context; + struct i915_gem_context *kctx = i915->kernel_context; struct intel_engine_cs *engine; enum intel_engine_id id; for_each_engine(engine, i915, id) { GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); - GEM_BUG_ON(engine->last_retired_context != kernel_context); + GEM_BUG_ON(engine->last_retired_context->gem_context != kctx); } } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 9e70f4dfa703..b69b18ef8120 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -127,14 +127,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { struct intel_context *ce = &ctx->__engine[n]; - if (!ce->state) - continue; - - WARN_ON(ce->pin_count); - if (ce->ring) - intel_ring_free(ce->ring); - - __i915_gem_object_release_unless_active(ce->state->obj); + if (ce->ops) + ce->ops->destroy(ce); } kfree(ctx->name); @@ -266,6 +260,7 @@ __create_hw_context(struct drm_i915_private *dev_priv, struct drm_i915_file_private *file_priv) { struct i915_gem_context *ctx; + unsigned int n; int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -283,6 +278,12 @@ __create_hw_context(struct drm_i915_private *dev_priv, ctx->i915 = dev_priv; ctx->sched.priority = I915_PRIORITY_NORMAL; + for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { + struct intel_context *ce = &ctx->__engine[n]; + + ce->gem_context = ctx; + } + INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); INIT_LIST_HEAD(&ctx->handles_list); diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index ace3b129c189..749a4ff566f5 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -45,6 +45,11 @@ struct intel_ring; #define DEFAULT_CONTEXT_HANDLE 0 +struct intel_context_ops { + void (*unpin)(struct intel_context *ce); + void (*destroy)(struct intel_context *ce); +}; + /** * struct i915_gem_context - client state * @@ -144,11 +149,14 @@ struct i915_gem_context { /** engine: per-engine logical HW state */ struct intel_context { + struct i915_gem_context *gem_context; struct i915_vma *state; struct intel_ring *ring; u32 *lrc_reg_state; u64 lrc_desc; int pin_count; + + const struct intel_context_ops *ops; } __engine[I915_NUM_ENGINES]; /** ring_size: size for allocating the per-engine ring buffer */ @@ -263,25 +271,22 @@ to_intel_context(struct i915_gem_context *ctx, return &ctx->__engine[engine->id]; } -static inline struct intel_ring * +static inline struct intel_context * intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { return engine->context_pin(engine, ctx); } -static inline void __intel_context_pin(struct i915_gem_context *ctx, - const struct intel_engine_cs *engine) +static inline void __intel_context_pin(struct intel_context *ce) { - struct intel_context *ce = to_intel_context(ctx, engine); - GEM_BUG_ON(!ce->pin_count); ce->pin_count++; } -static inline void intel_context_unpin(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +static inline void intel_context_unpin(struct intel_context *ce) { - engine->context_unpin(engine, ctx); + GEM_BUG_ON(!ce->ops); + ce->ops->unpin(ce); } /* i915_gem_context.c */ diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 37c9a42654ba..47721437a4c5 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1485,8 +1485,7 @@ static void gem_record_rings(struct i915_gpu_state *error) ee->ctx = i915_error_object_create(i915, - to_intel_context(ctx, - engine)->state); + request->hw_context->state); error->simulated |= i915_gem_context_no_error_capture(ctx); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 019bd2d073ad..4f0eb84b3c00 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1221,7 +1221,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id; } else { struct intel_engine_cs *engine = dev_priv->engine[RCS]; - struct intel_ring *ring; + struct intel_context *ce; int ret; ret = i915_mutex_lock_interruptible(&dev_priv->drm); @@ -1234,19 +1234,19 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * * NB: implied RCS engine... */ - ring = intel_context_pin(stream->ctx, engine); + ce = intel_context_pin(stream->ctx, engine); mutex_unlock(&dev_priv->drm.struct_mutex); - if (IS_ERR(ring)) - return PTR_ERR(ring); + if (IS_ERR(ce)) + return PTR_ERR(ce); + dev_priv->perf.oa.pinned_ctx = ce; /* * Explicitly track the ID (instead of calling * i915_ggtt_offset() on the fly) considering the difference * with gen8+ and execlists */ - dev_priv->perf.oa.specific_ctx_id = - i915_ggtt_offset(to_intel_context(stream->ctx, engine)->state); + dev_priv->perf.oa.specific_ctx_id = i915_ggtt_offset(ce->state); } return 0; @@ -1262,17 +1262,14 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) static void oa_put_render_ctx_id(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_context *ce; - if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { - dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; - } else { - struct intel_engine_cs *engine = dev_priv->engine[RCS]; + dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; + ce = fetch_and_zero(&dev_priv->perf.oa.pinned_ctx); + if (ce) { mutex_lock(&dev_priv->drm.struct_mutex); - - dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; - intel_context_unpin(stream->ctx, engine); - + intel_context_unpin(ce); mutex_unlock(&dev_priv->drm.struct_mutex); } } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index fe8810a6a339..fc499bcbd105 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -383,8 +383,8 @@ static void __retire_engine_request(struct intel_engine_cs *engine, * the subsequent request. */ if (engine->last_retired_context) - intel_context_unpin(engine->last_retired_context, engine); - engine->last_retired_context = rq->gem_context; + intel_context_unpin(engine->last_retired_context); + engine->last_retired_context = rq->hw_context; } static void __retire_engine_upto(struct intel_engine_cs *engine, @@ -456,7 +456,7 @@ static void i915_request_retire(struct i915_request *request) /* Retirement decays the ban score as it is a sign of ctx progress */ atomic_dec_if_positive(&request->gem_context->ban_score); - intel_context_unpin(request->gem_context, request->engine); + intel_context_unpin(request->hw_context); __retire_engine_upto(request->engine, request); @@ -657,7 +657,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { struct drm_i915_private *i915 = engine->i915; struct i915_request *rq; - struct intel_ring *ring; + struct intel_context *ce; int ret; lockdep_assert_held(&i915->drm.struct_mutex); @@ -681,22 +681,21 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - ring = intel_context_pin(ctx, engine); - if (IS_ERR(ring)) - return ERR_CAST(ring); - GEM_BUG_ON(!ring); + ce = intel_context_pin(ctx, engine); + if (IS_ERR(ce)) + return ERR_CAST(ce); ret = reserve_gt(i915); if (ret) goto err_unpin; - ret = intel_ring_wait_for_space(ring, MIN_SPACE_FOR_ADD_REQUEST); + ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST); if (ret) goto err_unreserve; /* Move our oldest request to the slab-cache (if not in use!) */ - rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link); - if (!list_is_last(&rq->ring_link, &ring->request_list) && + rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); + if (!list_is_last(&rq->ring_link, &ce->ring->request_list) && i915_request_completed(rq)) i915_request_retire(rq); @@ -761,8 +760,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) rq->i915 = i915; rq->engine = engine; rq->gem_context = ctx; - rq->ring = ring; - rq->timeline = ring->timeline; + rq->hw_context = ce; + rq->ring = ce->ring; + rq->timeline = ce->ring->timeline; GEM_BUG_ON(rq->timeline == &engine->timeline); spin_lock_init(&rq->lock); @@ -814,14 +814,14 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) goto err_unwind; /* Keep a second pin for the dual retirement along engine and ring */ - __intel_context_pin(rq->gem_context, engine); + __intel_context_pin(ce); /* Check that we didn't interrupt ourselves with a new request */ GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); return rq; err_unwind: - rq->ring->emit = rq->head; + ce->ring->emit = rq->head; /* Make sure we didn't add ourselves to external state before freeing */ GEM_BUG_ON(!list_empty(&rq->active_list)); @@ -832,7 +832,7 @@ err_unwind: err_unreserve: unreserve_gt(i915); err_unpin: - intel_context_unpin(ctx, engine); + intel_context_unpin(ce); return ERR_PTR(ret); } @@ -1018,8 +1018,8 @@ i915_request_await_object(struct i915_request *to, void __i915_request_add(struct i915_request *request, bool flush_caches) { struct intel_engine_cs *engine = request->engine; - struct intel_ring *ring = request->ring; struct i915_timeline *timeline = request->timeline; + struct intel_ring *ring = request->ring; struct i915_request *prev; u32 *cs; int err; diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index dddecd9ffd0c..1bbbb7a9fa03 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -95,6 +95,7 @@ struct i915_request { */ struct i915_gem_context *gem_context; struct intel_engine_cs *engine; + struct intel_context *hw_context; struct intel_ring *ring; struct i915_timeline *timeline; struct intel_signal_node signaling; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 9e618aab6568..26f9f8aab949 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -645,6 +645,12 @@ static int init_phys_status_page(struct intel_engine_cs *engine) return 0; } +static void __intel_context_unpin(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + intel_context_unpin(to_intel_context(ctx, engine)); +} + /** * intel_engines_init_common - initialize cengine state which might require hw access * @engine: Engine to initialize. @@ -658,7 +664,8 @@ static int init_phys_status_page(struct intel_engine_cs *engine) */ int intel_engine_init_common(struct intel_engine_cs *engine) { - struct intel_ring *ring; + struct drm_i915_private *i915 = engine->i915; + struct intel_context *ce; int ret; engine->set_default_submission(engine); @@ -670,18 +677,18 @@ int intel_engine_init_common(struct intel_engine_cs *engine) * be available. To avoid this we always pin the default * context. */ - ring = intel_context_pin(engine->i915->kernel_context, engine); - if (IS_ERR(ring)) - return PTR_ERR(ring); + ce = intel_context_pin(i915->kernel_context, engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); /* * Similarly the preempt context must always be available so that * we can interrupt the engine at any time. */ - if (engine->i915->preempt_context) { - ring = intel_context_pin(engine->i915->preempt_context, engine); - if (IS_ERR(ring)) { - ret = PTR_ERR(ring); + if (i915->preempt_context) { + ce = intel_context_pin(i915->preempt_context, engine); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); goto err_unpin_kernel; } } @@ -690,7 +697,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine) if (ret) goto err_unpin_preempt; - if (HWS_NEEDS_PHYSICAL(engine->i915)) + if (HWS_NEEDS_PHYSICAL(i915)) ret = init_phys_status_page(engine); else ret = init_status_page(engine); @@ -702,10 +709,11 @@ int intel_engine_init_common(struct intel_engine_cs *engine) err_breadcrumbs: intel_engine_fini_breadcrumbs(engine); err_unpin_preempt: - if (engine->i915->preempt_context) - intel_context_unpin(engine->i915->preempt_context, engine); + if (i915->preempt_context) + __intel_context_unpin(i915->preempt_context, engine); + err_unpin_kernel: - intel_context_unpin(engine->i915->kernel_context, engine); + __intel_context_unpin(i915->kernel_context, engine); return ret; } @@ -718,6 +726,8 @@ err_unpin_kernel: */ void intel_engine_cleanup_common(struct intel_engine_cs *engine) { + struct drm_i915_private *i915 = engine->i915; + intel_engine_cleanup_scratch(engine); if (HWS_NEEDS_PHYSICAL(engine->i915)) @@ -732,9 +742,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) if (engine->default_state) i915_gem_object_put(engine->default_state); - if (engine->i915->preempt_context) - intel_context_unpin(engine->i915->preempt_context, engine); - intel_context_unpin(engine->i915->kernel_context, engine); + if (i915->preempt_context) + __intel_context_unpin(i915->preempt_context, engine); + __intel_context_unpin(i915->kernel_context, engine); i915_timeline_fini(&engine->timeline); } @@ -1007,8 +1017,8 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv) */ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) { - const struct i915_gem_context * const kernel_context = - engine->i915->kernel_context; + const struct intel_context *kernel_context = + to_intel_context(engine->i915->kernel_context, engine); struct i915_request *rq; lockdep_assert_held(&engine->i915->drm.struct_mutex); @@ -1020,7 +1030,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) */ rq = __i915_gem_active_peek(&engine->timeline.last_request); if (rq) - return rq->gem_context == kernel_context; + return rq->hw_context == kernel_context; else return engine->last_retired_context == kernel_context; } @@ -1107,16 +1117,16 @@ void intel_engines_unpark(struct drm_i915_private *i915) */ void intel_engine_lost_context(struct intel_engine_cs *engine) { - struct i915_gem_context *ctx; + struct intel_context *ce; lockdep_assert_held(&engine->i915->drm.struct_mutex); engine->legacy_active_context = NULL; engine->legacy_active_ppgtt = NULL; - ctx = fetch_and_zero(&engine->last_retired_context); - if (ctx) - intel_context_unpin(ctx, engine); + ce = fetch_and_zero(&engine->last_retired_context); + if (ce) + intel_context_unpin(ce); } bool intel_engine_can_store_dword(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index a432a193f3c4..133367a17863 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -513,9 +513,7 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) { struct intel_guc_client *client = guc->execbuf_client; struct intel_engine_cs *engine = rq->engine; - u32 ctx_desc = - lower_32_bits(intel_lr_context_descriptor(rq->gem_context, - engine)); + u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc); u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); spin_lock(&client->wq_lock); @@ -553,8 +551,8 @@ static void inject_preempt_context(struct work_struct *work) preempt_work[engine->id]); struct intel_guc_client *client = guc->preempt_client; struct guc_stage_desc *stage_desc = __get_stage_desc(client); - u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(client->owner, - engine)); + u32 ctx_desc = lower_32_bits(to_intel_context(client->owner, + engine)->lrc_desc); u32 data[7]; /* @@ -726,7 +724,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) struct i915_request *rq, *rn; list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { - if (last && rq->gem_context != last->gem_context) { + if (last && rq->hw_context != last->hw_context) { if (port == last_port) { __list_del_many(&p->requests, &rq->sched.link); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 1e9cc55d785c..b97c5d4c7877 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -164,7 +164,8 @@ #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS) static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, - struct intel_engine_cs *engine); + struct intel_engine_cs *engine, + struct intel_context *ce); static void execlists_init_reg_state(u32 *reg_state, struct i915_gem_context *ctx, struct intel_engine_cs *engine, @@ -189,12 +190,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, !i915_request_completed(last)); } -/** - * intel_lr_context_descriptor_update() - calculate & cache the descriptor - * descriptor for a pinned context - * @ctx: Context to work on - * @engine: Engine the descriptor will be used with - * +/* * The context descriptor encodes various attributes of a context, * including its GTT address and some flags. Because it's fairly * expensive to calculate, we'll just do it once and cache the result, @@ -222,9 +218,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, */ static void intel_lr_context_descriptor_update(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) + struct intel_engine_cs *engine, + struct intel_context *ce) { - struct intel_context *ce = to_intel_context(ctx, engine); u64 desc; BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH))); @@ -418,8 +414,7 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) static u64 execlists_update_context(struct i915_request *rq) { - struct intel_context *ce = - to_intel_context(rq->gem_context, rq->engine); + struct intel_context *ce = rq->hw_context; struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; @@ -496,14 +491,14 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); } -static bool ctx_single_port_submission(const struct i915_gem_context *ctx) +static bool ctx_single_port_submission(const struct intel_context *ce) { return (IS_ENABLED(CONFIG_DRM_I915_GVT) && - i915_gem_context_force_single_submission(ctx)); + i915_gem_context_force_single_submission(ce->gem_context)); } -static bool can_merge_ctx(const struct i915_gem_context *prev, - const struct i915_gem_context *next) +static bool can_merge_ctx(const struct intel_context *prev, + const struct intel_context *next) { if (prev != next) return false; @@ -680,8 +675,8 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine) * second request, and so we never need to tell the * hardware about the first. */ - if (last && !can_merge_ctx(rq->gem_context, - last->gem_context)) { + if (last && + !can_merge_ctx(rq->hw_context, last->hw_context)) { /* * If we are on the second port and cannot * combine this request with the last, then we @@ -700,14 +695,14 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine) * the same context (even though a different * request) to the second port. */ - if (ctx_single_port_submission(last->gem_context) || - ctx_single_port_submission(rq->gem_context)) { + if (ctx_single_port_submission(last->hw_context) || + ctx_single_port_submission(rq->hw_context)) { __list_del_many(&p->requests, &rq->sched.link); goto done; } - GEM_BUG_ON(last->gem_context == rq->gem_context); + GEM_BUG_ON(last->hw_context == rq->hw_context); if (submit) port_assign(port, last); @@ -1339,6 +1334,37 @@ static void execlists_schedule(struct i915_request *request, spin_unlock_irq(&engine->timeline.lock); } +static void execlists_context_destroy(struct intel_context *ce) +{ + GEM_BUG_ON(!ce->state); + GEM_BUG_ON(ce->pin_count); + + intel_ring_free(ce->ring); + __i915_gem_object_release_unless_active(ce->state->obj); +} + +static void __execlists_context_unpin(struct intel_context *ce) +{ + intel_ring_unpin(ce->ring); + + ce->state->obj->pin_global--; + i915_gem_object_unpin_map(ce->state->obj); + i915_vma_unpin(ce->state); + + i915_gem_context_put(ce->gem_context); +} + +static void execlists_context_unpin(struct intel_context *ce) +{ + lockdep_assert_held(&ce->gem_context->i915->drm.struct_mutex); + GEM_BUG_ON(ce->pin_count == 0); + + if (--ce->pin_count) + return; + + __execlists_context_unpin(ce); +} + static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma) { unsigned int flags; @@ -1362,21 +1388,15 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma) return i915_vma_pin(vma, 0, GEN8_LR_CONTEXT_ALIGN, flags); } -static struct intel_ring * -execlists_context_pin(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) +static struct intel_context * +__execlists_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx, + struct intel_context *ce) { - struct intel_context *ce = to_intel_context(ctx, engine); void *vaddr; int ret; - lockdep_assert_held(&ctx->i915->drm.struct_mutex); - - if (likely(ce->pin_count++)) - goto out; - GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ - - ret = execlists_context_deferred_alloc(ctx, engine); + ret = execlists_context_deferred_alloc(ctx, engine, ce); if (ret) goto err; GEM_BUG_ON(!ce->state); @@ -1395,7 +1415,7 @@ execlists_context_pin(struct intel_engine_cs *engine, if (ret) goto unpin_map; - intel_lr_context_descriptor_update(ctx, engine); + intel_lr_context_descriptor_update(ctx, engine, ce); ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = @@ -1404,8 +1424,7 @@ execlists_context_pin(struct intel_engine_cs *engine, ce->state->obj->pin_global++; i915_gem_context_get(ctx); -out: - return ce->ring; + return ce; unpin_map: i915_gem_object_unpin_map(ce->state->obj); @@ -1416,33 +1435,33 @@ err: return ERR_PTR(ret); } -static void execlists_context_unpin(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) +static const struct intel_context_ops execlists_context_ops = { + .unpin = execlists_context_unpin, + .destroy = execlists_context_destroy, +}; + +static struct intel_context * +execlists_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) { struct intel_context *ce = to_intel_context(ctx, engine); lockdep_assert_held(&ctx->i915->drm.struct_mutex); - GEM_BUG_ON(ce->pin_count == 0); - if (--ce->pin_count) - return; - - intel_ring_unpin(ce->ring); + if (likely(ce->pin_count++)) + return ce; + GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ - ce->state->obj->pin_global--; - i915_gem_object_unpin_map(ce->state->obj); - i915_vma_unpin(ce->state); + ce->ops = &execlists_context_ops; - i915_gem_context_put(ctx); + return __execlists_context_pin(engine, ctx, ce); } static int execlists_request_alloc(struct i915_request *request) { - struct intel_context *ce = - to_intel_context(request->gem_context, request->engine); int ret; - GEM_BUG_ON(!ce->pin_count); + GEM_BUG_ON(!request->hw_context->pin_count); /* Flush enough space to reduce the likelihood of waiting after * we start building the request - in which case we will just @@ -1956,7 +1975,7 @@ static void execlists_reset(struct intel_engine_cs *engine, * future request will be after userspace has had the opportunity * to recreate its own state. */ - regs = to_intel_context(request->gem_context, engine)->lrc_reg_state; + regs = request->hw_context->lrc_reg_state; if (engine->default_state) { void *defaults; @@ -2327,8 +2346,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->reset.finish = execlists_reset_finish; engine->context_pin = execlists_context_pin; - engine->context_unpin = execlists_context_unpin; - engine->request_alloc = execlists_request_alloc; engine->emit_flush = gen8_emit_flush; @@ -2563,7 +2580,7 @@ static void execlists_init_reg_state(u32 *regs, struct drm_i915_private *dev_priv = engine->i915; struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt; u32 base = engine->mmio_base; - bool rcs = engine->id == RCS; + bool rcs = engine->class == RENDER_CLASS; /* A context is actually a big batch buffer with several * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The @@ -2710,10 +2727,10 @@ err_unpin_ctx: } static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) + struct intel_engine_cs *engine, + struct intel_context *ce) { struct drm_i915_gem_object *ctx_obj; - struct intel_context *ce = to_intel_context(ctx, engine); struct i915_vma *vma; uint32_t context_size; struct intel_ring *ring; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 4ec7d8dd13c8..1593194e930c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -104,11 +104,4 @@ struct i915_gem_context; void intel_lr_context_resume(struct drm_i915_private *dev_priv); -static inline uint64_t -intel_lr_context_descriptor(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - return to_intel_context(ctx, engine)->lrc_desc; -} - #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 53703012ec75..0c0c9f531e4e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -571,8 +571,7 @@ static void reset_ring(struct intel_engine_cs *engine, */ if (request) { struct drm_i915_private *dev_priv = request->i915; - struct intel_context *ce = - to_intel_context(request->gem_context, engine); + struct intel_context *ce = request->hw_context; struct i915_hw_ppgtt *ppgtt; if (ce->state) { @@ -1186,7 +1185,31 @@ intel_ring_free(struct intel_ring *ring) kfree(ring); } -static int context_pin(struct intel_context *ce) +static void intel_ring_context_destroy(struct intel_context *ce) +{ + GEM_BUG_ON(ce->pin_count); + + if (ce->state) + __i915_gem_object_release_unless_active(ce->state->obj); +} + +static void intel_ring_context_unpin(struct intel_context *ce) +{ + lockdep_assert_held(&ce->gem_context->i915->drm.struct_mutex); + GEM_BUG_ON(ce->pin_count == 0); + + if (--ce->pin_count) + return; + + if (ce->state) { + ce->state->obj->pin_global--; + i915_vma_unpin(ce->state); + } + + i915_gem_context_put(ce->gem_context); +} + +static int __context_pin(struct intel_context *ce) { struct i915_vma *vma = ce->state; int ret; @@ -1275,25 +1298,19 @@ err_obj: return ERR_PTR(err); } -static struct intel_ring * -intel_ring_context_pin(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) +static struct intel_context * +__ring_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx, + struct intel_context *ce) { - struct intel_context *ce = to_intel_context(ctx, engine); - int ret; - - lockdep_assert_held(&ctx->i915->drm.struct_mutex); - - if (likely(ce->pin_count++)) - goto out; - GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ + int err; if (!ce->state && engine->context_size) { struct i915_vma *vma; vma = alloc_context_vma(engine); if (IS_ERR(vma)) { - ret = PTR_ERR(vma); + err = PTR_ERR(vma); goto err; } @@ -1301,8 +1318,8 @@ intel_ring_context_pin(struct intel_engine_cs *engine, } if (ce->state) { - ret = context_pin(ce); - if (ret) + err = __context_pin(ce); + if (err) goto err; ce->state->obj->pin_global++; @@ -1310,32 +1327,37 @@ intel_ring_context_pin(struct intel_engine_cs *engine, i915_gem_context_get(ctx); -out: /* One ringbuffer to rule them all */ - return engine->buffer; + GEM_BUG_ON(!engine->buffer); + ce->ring = engine->buffer; + + return ce; err: ce->pin_count = 0; - return ERR_PTR(ret); + return ERR_PTR(err); } -static void intel_ring_context_unpin(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) +static const struct intel_context_ops ring_context_ops = { + .unpin = intel_ring_context_unpin, + .destroy = intel_ring_context_destroy, +}; + +static struct intel_context * +intel_ring_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) { struct intel_context *ce = to_intel_context(ctx, engine); lockdep_assert_held(&ctx->i915->drm.struct_mutex); - GEM_BUG_ON(ce->pin_count == 0); - if (--ce->pin_count) - return; + if (likely(ce->pin_count++)) + return ce; + GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ - if (ce->state) { - ce->state->obj->pin_global--; - i915_vma_unpin(ce->state); - } + ce->ops = &ring_context_ops; - i915_gem_context_put(ctx); + return __ring_context_pin(engine, ctx, ce); } static int intel_init_ring_buffer(struct intel_engine_cs *engine) @@ -1346,10 +1368,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) intel_engine_setup_common(engine); - err = intel_engine_init_common(engine); - if (err) - goto err; - timeline = i915_timeline_create(engine->i915, engine->name); if (IS_ERR(timeline)) { err = PTR_ERR(timeline); @@ -1371,8 +1389,14 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) GEM_BUG_ON(engine->buffer); engine->buffer = ring; + err = intel_engine_init_common(engine); + if (err) + goto err_unpin; + return 0; +err_unpin: + intel_ring_unpin(ring); err_ring: intel_ring_free(ring); err: @@ -1458,7 +1482,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) *cs++ = MI_NOOP; *cs++ = MI_SET_CONTEXT; - *cs++ = i915_ggtt_offset(to_intel_context(rq->gem_context, engine)->state) | flags; + *cs++ = i915_ggtt_offset(rq->hw_context->state) | flags; /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv @@ -1549,7 +1573,7 @@ static int switch_context(struct i915_request *rq) hw_flags = MI_FORCE_RESTORE; } - if (to_intel_context(to_ctx, engine)->state && + if (rq->hw_context->state && (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) { GEM_BUG_ON(engine->id != RCS); @@ -1597,7 +1621,7 @@ static int ring_request_alloc(struct i915_request *request) { int ret; - GEM_BUG_ON(!to_intel_context(request->gem_context, request->engine)->pin_count); + GEM_BUG_ON(!request->hw_context->pin_count); /* Flush enough space to reduce the likelihood of waiting after * we start building the request - in which case we will just @@ -2028,8 +2052,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->reset.finish = reset_finish; engine->context_pin = intel_ring_context_pin; - engine->context_unpin = intel_ring_context_unpin; - engine->request_alloc = ring_request_alloc; engine->emit_breadcrumb = i9xx_emit_breadcrumb; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 2b16185e36c4..20c4e13efc0d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -436,10 +436,9 @@ struct intel_engine_cs { void (*set_default_submission)(struct intel_engine_cs *engine); - struct intel_ring *(*context_pin)(struct intel_engine_cs *engine, - struct i915_gem_context *ctx); - void (*context_unpin)(struct intel_engine_cs *engine, - struct i915_gem_context *ctx); + struct intel_context *(*context_pin)(struct intel_engine_cs *engine, + struct i915_gem_context *ctx); + int (*request_alloc)(struct i915_request *rq); int (*init_context)(struct i915_request *rq); @@ -555,7 +554,7 @@ struct intel_engine_cs { * to the kernel context and trash it as the save may not happen * before the hardware is powered down. */ - struct i915_gem_context *last_retired_context; + struct intel_context *last_retired_context; /* We track the current MI_SET_CONTEXT in order to eliminate * redudant context switches. This presumes that requests are not diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c index 501becc47c0c..8904f1ce64e3 100644 --- a/drivers/gpu/drm/i915/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -30,6 +30,7 @@ mock_context(struct drm_i915_private *i915, const char *name) { struct i915_gem_context *ctx; + unsigned int n; int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -43,6 +44,12 @@ mock_context(struct drm_i915_private *i915, INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); INIT_LIST_HEAD(&ctx->handles_list); + for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { + struct intel_context *ce = &ctx->__engine[n]; + + ce->gem_context = ctx; + } + ret = ida_simple_get(&i915->contexts.hw_ida, 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); if (ret < 0) diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 26bf29d97007..33eddfc1f8ce 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -72,25 +72,37 @@ static void hw_delay_complete(struct timer_list *t) spin_unlock(&engine->hw_lock); } -static struct intel_ring * -mock_context_pin(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) +static void mock_context_unpin(struct intel_context *ce) { - struct intel_context *ce = to_intel_context(ctx, engine); + if (--ce->pin_count) + return; - if (!ce->pin_count++) - i915_gem_context_get(ctx); + i915_gem_context_put(ce->gem_context); +} - return engine->buffer; +static void mock_context_destroy(struct intel_context *ce) +{ + GEM_BUG_ON(ce->pin_count); } -static void mock_context_unpin(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) +static const struct intel_context_ops mock_context_ops = { + .unpin = mock_context_unpin, + .destroy = mock_context_destroy, +}; + +static struct intel_context * +mock_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) { struct intel_context *ce = to_intel_context(ctx, engine); - if (!--ce->pin_count) - i915_gem_context_put(ctx); + if (!ce->pin_count++) { + i915_gem_context_get(ctx); + ce->ring = engine->buffer; + ce->ops = &mock_context_ops; + } + + return ce; } static int mock_request_alloc(struct i915_request *request) @@ -185,7 +197,6 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.status_page.page_addr = (void *)(engine + 1); engine->base.context_pin = mock_context_pin; - engine->base.context_unpin = mock_context_unpin; engine->base.request_alloc = mock_request_alloc; engine->base.emit_flush = mock_emit_flush; engine->base.emit_breadcrumb = mock_emit_breadcrumb; @@ -238,11 +249,13 @@ void mock_engine_free(struct intel_engine_cs *engine) { struct mock_engine *mock = container_of(engine, typeof(*mock), base); + struct intel_context *ce; GEM_BUG_ON(timer_pending(&mock->hw_delay)); - if (engine->last_retired_context) - intel_context_unpin(engine->last_retired_context, engine); + ce = fetch_and_zero(&engine->last_retired_context); + if (ce) + intel_context_unpin(ce); mock_ring_free(engine->buffer); -- cgit v1.2.3 From 09a4c02e58c1b3d9748f78242962b7f63c68477e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 24 May 2018 09:11:35 +0100 Subject: drm/i915: Look for an active kernel context before switching We were not very carefully checking to see if an older request on the engine was an earlier switch-to-kernel-context before deciding to emit a new switch. The end result would be that we could get into a permanent loop of trying to emit a new request to perform the switch simply to flush the existing switch. What we need is a means of tracking the completion of each timeline versus the kernel context, that is to detect if a more recent request has been submitted that would result in a switch away from the kernel context. To realise this, we need only to look in our syncmap on the kernel context and check that we have synchronized against all active rings. v2: Since all ringbuffer clients currently share the same timeline, we do have to use the gem_context to distinguish clients. As a bonus, include all the tracing used to debug the death inside suspend. v3: Test, test, test. Construct a selftest to exercise and assert the expected behaviour that multiple switch-to-contexts do not emit redundant requests. Reported-by: Mika Kuoppala Fixes: a89d1f921c15 ("drm/i915: Split i915_gem_timeline into individual timelines") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180524081135.15278-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 7 + drivers/gpu/drm/i915/i915_gem.h | 3 + drivers/gpu/drm/i915/i915_gem_context.c | 86 ++++++++++-- drivers/gpu/drm/i915/i915_request.c | 5 +- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 144 +++++++++++++++++++++ .../gpu/drm/i915/selftests/i915_mock_selftests.h | 1 + 6 files changed, 231 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 03874b50ada9..05f44ca35a06 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3703,6 +3703,9 @@ static int wait_for_engines(struct drm_i915_private *i915) int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) { + GEM_TRACE("flags=%x (%s)\n", + flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked"); + /* If the device is asleep, we have no requests outstanding */ if (!READ_ONCE(i915->gt.awake)) return 0; @@ -3719,6 +3722,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) return err; } i915_retire_requests(i915); + GEM_BUG_ON(i915->gt.active_requests); return wait_for_engines(i915); } else { @@ -4901,6 +4905,7 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915) struct intel_engine_cs *engine; enum intel_engine_id id; + GEM_BUG_ON(i915->gt.active_requests); for_each_engine(engine, i915, id) { GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); GEM_BUG_ON(engine->last_retired_context->gem_context != kctx); @@ -4932,6 +4937,8 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) struct drm_device *dev = &dev_priv->drm; int ret; + GEM_TRACE("\n"); + intel_runtime_pm_get(dev_priv); intel_suspend_gt_powersave(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 5bf24cfc218c..62ee4e385365 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -63,9 +63,12 @@ struct drm_i915_private; #if IS_ENABLED(CONFIG_DRM_I915_TRACE_GEM) #define GEM_TRACE(...) trace_printk(__VA_ARGS__) #define GEM_TRACE_DUMP() ftrace_dump(DUMP_ALL) +#define GEM_TRACE_DUMP_ON(expr) \ + do { if (expr) ftrace_dump(DUMP_ALL); } while (0) #else #define GEM_TRACE(...) do { } while (0) #define GEM_TRACE_DUMP() do { } while (0) +#define GEM_TRACE_DUMP_ON(expr) BUILD_BUG_ON_INVALID(expr) #endif #define I915_NUM_ENGINES 8 diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index b69b18ef8120..45393f6e0208 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -576,30 +576,72 @@ last_request_on_engine(struct i915_timeline *timeline, { struct i915_request *rq; - if (timeline == &engine->timeline) - return NULL; + GEM_BUG_ON(timeline == &engine->timeline); rq = i915_gem_active_raw(&timeline->last_request, &engine->i915->drm.struct_mutex); - if (rq && rq->engine == engine) + if (rq && rq->engine == engine) { + GEM_TRACE("last request for %s on engine %s: %llx:%d\n", + timeline->name, engine->name, + rq->fence.context, rq->fence.seqno); + GEM_BUG_ON(rq->timeline != timeline); return rq; + } return NULL; } -static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) +static bool engine_has_kernel_context_barrier(struct intel_engine_cs *engine) { - struct list_head * const active_rings = &engine->i915->gt.active_rings; + struct drm_i915_private *i915 = engine->i915; + const struct intel_context * const ce = + to_intel_context(i915->kernel_context, engine); + struct i915_timeline *barrier = ce->ring->timeline; struct intel_ring *ring; + bool any_active = false; - lockdep_assert_held(&engine->i915->drm.struct_mutex); + lockdep_assert_held(&i915->drm.struct_mutex); + list_for_each_entry(ring, &i915->gt.active_rings, active_link) { + struct i915_request *rq; + + rq = last_request_on_engine(ring->timeline, engine); + if (!rq) + continue; - list_for_each_entry(ring, active_rings, active_link) { - if (last_request_on_engine(ring->timeline, engine)) + any_active = true; + + if (rq->gem_context == i915->kernel_context) + continue; + + /* + * Was this request submitted after the previous + * switch-to-kernel-context? + */ + if (!i915_timeline_sync_is_later(barrier, &rq->fence)) { + GEM_TRACE("%s needs barrier for %llx:%d\n", + ring->timeline->name, + rq->fence.context, + rq->fence.seqno); return false; + } + + GEM_TRACE("%s has barrier after %llx:%d\n", + ring->timeline->name, + rq->fence.context, + rq->fence.seqno); } - return intel_engine_has_kernel_context(engine); + /* + * If any other timeline was still active and behind the last barrier, + * then our last switch-to-kernel-context must still be queued and + * will run last (leaving the engine in the kernel context when it + * eventually idles). + */ + if (any_active) + return true; + + /* The engine is idle; check that it is idling in the kernel context. */ + return engine->last_retired_context == ce; } int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915) @@ -607,7 +649,10 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915) struct intel_engine_cs *engine; enum intel_engine_id id; + GEM_TRACE("\n"); + lockdep_assert_held(&i915->drm.struct_mutex); + GEM_BUG_ON(!i915->kernel_context); i915_retire_requests(i915); @@ -615,9 +660,12 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915) struct intel_ring *ring; struct i915_request *rq; - if (engine_has_idle_kernel_context(engine)) + GEM_BUG_ON(!to_intel_context(i915->kernel_context, engine)); + if (engine_has_kernel_context_barrier(engine)) continue; + GEM_TRACE("emit barrier on %s\n", engine->name); + rq = i915_request_alloc(engine, i915->kernel_context); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -627,10 +675,20 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915) struct i915_request *prev; prev = last_request_on_engine(ring->timeline, engine); - if (prev) - i915_sw_fence_await_sw_fence_gfp(&rq->submit, - &prev->submit, - I915_FENCE_GFP); + if (!prev) + continue; + + if (prev->gem_context == i915->kernel_context) + continue; + + GEM_TRACE("add barrier on %s for %llx:%d\n", + engine->name, + prev->fence.context, + prev->fence.seqno); + i915_sw_fence_await_sw_fence_gfp(&rq->submit, + &prev->submit, + I915_FENCE_GFP); + i915_timeline_sync_set(rq->timeline, &prev->fence); } /* diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index fc499bcbd105..f187250e60c6 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -320,6 +320,7 @@ static void advance_ring(struct i915_request *request) * is just about to be. Either works, if we miss the last two * noops - they are safe to be replayed on a reset. */ + GEM_TRACE("marking %s as inactive\n", ring->timeline->name); tail = READ_ONCE(request->tail); list_del(&ring->active_link); } else { @@ -1095,8 +1096,10 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) i915_gem_active_set(&timeline->last_request, request); list_add_tail(&request->ring_link, &ring->request_list); - if (list_is_first(&request->ring_link, &ring->request_list)) + if (list_is_first(&request->ring_link, &ring->request_list)) { + GEM_TRACE("marking %s as active\n", ring->timeline->name); list_add(&ring->active_link, &request->i915->gt.active_rings); + } request->emitted_jiffies = jiffies; /* diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index ddb03f009232..b39392a00a6f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -26,6 +26,7 @@ #include "igt_flush_test.h" #include "mock_drm.h" +#include "mock_gem_device.h" #include "huge_gem_object.h" #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) @@ -420,6 +421,130 @@ out_unlock: return err; } +static __maybe_unused const char * +__engine_name(struct drm_i915_private *i915, unsigned int engines) +{ + struct intel_engine_cs *engine; + unsigned int tmp; + + if (engines == ALL_ENGINES) + return "all"; + + for_each_engine_masked(engine, i915, engines, tmp) + return engine->name; + + return "none"; +} + +static int __igt_switch_to_kernel_context(struct drm_i915_private *i915, + struct i915_gem_context *ctx, + unsigned int engines) +{ + struct intel_engine_cs *engine; + unsigned int tmp; + int err; + + GEM_TRACE("Testing %s\n", __engine_name(i915, engines)); + for_each_engine_masked(engine, i915, engines, tmp) { + struct i915_request *rq; + + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_add(rq); + } + + err = i915_gem_switch_to_kernel_context(i915); + if (err) + return err; + + for_each_engine_masked(engine, i915, engines, tmp) { + if (!engine_has_kernel_context_barrier(engine)) { + pr_err("kernel context not last on engine %s!\n", + engine->name); + return -EINVAL; + } + } + + err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + if (err) + return err; + + GEM_BUG_ON(i915->gt.active_requests); + for_each_engine_masked(engine, i915, engines, tmp) { + if (engine->last_retired_context->gem_context != i915->kernel_context) { + pr_err("engine %s not idling in kernel context!\n", + engine->name); + return -EINVAL; + } + } + + err = i915_gem_switch_to_kernel_context(i915); + if (err) + return err; + + if (i915->gt.active_requests) { + pr_err("switch-to-kernel-context emitted %d requests even though it should already be idling in the kernel context\n", + i915->gt.active_requests); + return -EINVAL; + } + + for_each_engine_masked(engine, i915, engines, tmp) { + if (!intel_engine_has_kernel_context(engine)) { + pr_err("kernel context not last on engine %s!\n", + engine->name); + return -EINVAL; + } + } + + return 0; +} + +static int igt_switch_to_kernel_context(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + enum intel_engine_id id; + int err; + + /* + * A core premise of switching to the kernel context is that + * if an engine is already idling in the kernel context, we + * do not emit another request and wake it up. The other being + * that we do indeed end up idling in the kernel context. + */ + + mutex_lock(&i915->drm.struct_mutex); + ctx = kernel_context(i915); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + /* First check idling each individual engine */ + for_each_engine(engine, i915, id) { + err = __igt_switch_to_kernel_context(i915, ctx, BIT(id)); + if (err) + goto out_unlock; + } + + /* Now en masse */ + err = __igt_switch_to_kernel_context(i915, ctx, ALL_ENGINES); + if (err) + goto out_unlock; + +out_unlock: + GEM_TRACE_DUMP_ON(err); + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + mutex_unlock(&i915->drm.struct_mutex); + + kernel_context_close(ctx); + return err; +} + static int fake_aliasing_ppgtt_enable(struct drm_i915_private *i915) { struct drm_i915_gem_object *obj; @@ -447,9 +572,28 @@ static void fake_aliasing_ppgtt_disable(struct drm_i915_private *i915) i915_gem_fini_aliasing_ppgtt(i915); } +int i915_gem_context_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_switch_to_kernel_context), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + + drm_dev_unref(&i915->drm); + return err; +} + int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) { static const struct i915_subtest tests[] = { + SUBTEST(igt_switch_to_kernel_context), SUBTEST(igt_ctx_exec), }; bool fake_alias = false; diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index d16d74178e9d..1b70208eeea7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -24,3 +24,4 @@ selftest(vma, i915_vma_mock_selftests) selftest(evict, i915_gem_evict_mock_selftests) selftest(gtt, i915_gem_gtt_mock_selftests) selftest(hugepages, i915_gem_huge_page_mock_selftests) +selftest(contexts, i915_gem_context_mock_selftests) -- cgit v1.2.3 From b3ee09a4de33259a89d30aca6b2ebb0bc26640af Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Jun 2018 12:08:44 +0100 Subject: drm/i915/ringbuffer: Fix context restore upon reset The discovery with trying to enable full-ppgtt was that we were completely failing to the load both the mm and context following the reset. Although we were performing mmio to set the PP_DIR (per-process GTT) and CCID (context), these were taking no effect (the assumption was that this would trigger reload of the context and restore the page tables). It was not until we performed the LRI + MI_SET_CONTEXT in a following context switch would anything occur. Since we are then required to reset the context image and PP_DIR using CS commands, we place those commands into every batch. The hardware should recognise the no-ops and eliminate the expensive context loads, but we still have to pay the cost of using cross-powerwell register writes. In practice, this has no effect on actual context switch times, and only adds a few hundred nanoseconds to no-op switches. We can improve the latter by eliminating the w/a around known no-op switches, but there is an ulterior motive to keeping them. Always emitting the context switch at the beginning of the request (and relying on HW to skip unneeded switches) does have one key advantage. Should we implement request reordering on Haswell, we will not know in advance what the previous executing context was on the GPU and so we would not be able to elide the MI_SET_CONTEXT commands ourselves and always have to emit them. Having our hand forced now actually prepares us for later. Now since that context and mm follow the request, we no longer (and not for a long time since requests took over!) require a trace point to tell when we write the switch into the ring, since it is always. (This is even more important when you remember that simply writing into the ring bears no relation to the current mm.) v2: Sandybridge has to agree to use LRI as well. Testcase: igt/drv_selftests/live_hangcheck Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Matthew Auld Cc: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180611110845.31890-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 45 ------------ drivers/gpu/drm/i915/i915_gem_gtt.h | 2 - drivers/gpu/drm/i915/i915_request.c | 2 + drivers/gpu/drm/i915/i915_request.h | 3 + drivers/gpu/drm/i915/i915_trace.h | 33 --------- drivers/gpu/drm/i915/intel_engine_cs.c | 3 - drivers/gpu/drm/i915/intel_ringbuffer.c | 125 ++++++++++++++++---------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 9 --- 8 files changed, 66 insertions(+), 156 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7ccfdbc8f9b4..ac75e0c5735c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1712,45 +1712,6 @@ static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt, wmb(); } -static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt) -{ - GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); - return ppgtt->pd.base.ggtt_offset << 10; -} - -static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct i915_request *rq) -{ - struct intel_engine_cs *engine = rq->engine; - u32 *cs; - - /* NB: TLBs must be flushed and invalidated before a switch */ - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(2); - *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); - *cs++ = PP_DIR_DCLV_2G; - *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); - *cs++ = get_pd_offset(ppgtt); - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - return 0; -} - -static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct i915_request *rq) -{ - struct intel_engine_cs *engine = rq->engine; - struct drm_i915_private *dev_priv = rq->i915; - - I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); - I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); - return 0; -} - static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; @@ -2024,12 +1985,6 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) ppgtt->vm.dma = &i915->drm.pdev->dev; ppgtt->vm.pte_encode = ggtt->vm.pte_encode; - if (IS_GEN6(i915)) - ppgtt->switch_mm = gen6_mm_switch; - else if (IS_GEN7(i915)) - ppgtt->switch_mm = gen7_mm_switch; - else - BUG(); err = gen6_ppgtt_alloc(ppgtt); if (err) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 16307ba7e303..e70f6abcd0f2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -406,8 +406,6 @@ struct i915_hw_ppgtt { gen6_pte_t __iomem *pd_addr; - int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, - struct i915_request *rq); void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); }; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index f187250e60c6..9092f5464c24 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -817,6 +817,8 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) /* Keep a second pin for the dual retirement along engine and ring */ __intel_context_pin(ce); + rq->infix = rq->ring->emit; /* end of header; start of user payload */ + /* Check that we didn't interrupt ourselves with a new request */ GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); return rq; diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 491ff81d0fea..0e9aba53d0e4 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -134,6 +134,9 @@ struct i915_request { /** Position in the ring of the start of the request */ u32 head; + /** Position in the ring of the start of the user packets */ + u32 infix; + /** * Position in the ring of the start of the postfix. * This is required to calculate the maximum available ring space diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 1472f48ab2e8..b50c6b829715 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -973,39 +973,6 @@ DEFINE_EVENT(i915_context, i915_context_free, TP_ARGS(ctx) ); -/** - * DOC: switch_mm tracepoint - * - * This tracepoint allows tracking of the mm switch, which is an important point - * in the lifetime of the vm in the legacy submission path. This tracepoint is - * called only if full ppgtt is enabled. - */ -TRACE_EVENT(switch_mm, - TP_PROTO(struct intel_engine_cs *engine, struct i915_gem_context *to), - - TP_ARGS(engine, to), - - TP_STRUCT__entry( - __field(u16, class) - __field(u16, instance) - __field(struct i915_gem_context *, to) - __field(struct i915_address_space *, vm) - __field(u32, dev) - ), - - TP_fast_assign( - __entry->class = engine->uabi_class; - __entry->instance = engine->instance; - __entry->to = to; - __entry->vm = to->ppgtt ? &to->ppgtt->vm : NULL; - __entry->dev = engine->i915->drm.primary->index; - ), - - TP_printk("dev=%u, engine=%u:%u, ctx=%p, ctx_vm=%p", - __entry->dev, __entry->class, __entry->instance, __entry->to, - __entry->vm) -); - #endif /* _I915_TRACE_H_ */ /* This part must be outside protection */ diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 2ec2e60dc670..d1cf8b4926ab 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1168,9 +1168,6 @@ void intel_engine_lost_context(struct intel_engine_cs *engine) lockdep_assert_held(&engine->i915->drm.struct_mutex); - engine->legacy_active_context = NULL; - engine->legacy_active_ppgtt = NULL; - ce = fetch_and_zero(&engine->last_retired_context); if (ce) intel_context_unpin(ce); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 5bc53a5f4504..d72a6a5ff3ac 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -541,11 +541,23 @@ static struct i915_request *reset_prepare(struct intel_engine_cs *engine) return i915_gem_find_active_request(engine); } -static void reset_ring(struct intel_engine_cs *engine, - struct i915_request *request) +static void skip_request(struct i915_request *rq) { - GEM_TRACE("%s seqno=%x\n", - engine->name, request ? request->global_seqno : 0); + void *vaddr = rq->ring->vaddr; + u32 head; + + head = rq->infix; + if (rq->postfix < head) { + memset32(vaddr + head, MI_NOOP, + (rq->ring->size - head) / sizeof(u32)); + head = 0; + } + memset32(vaddr + head, MI_NOOP, (rq->postfix - head) / sizeof(u32)); +} + +static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq) +{ + GEM_TRACE("%s seqno=%x\n", engine->name, rq ? rq->global_seqno : 0); /* * RC6 must be prevented until the reset is complete and the engine @@ -569,43 +581,11 @@ static void reset_ring(struct intel_engine_cs *engine, * If the request was innocent, we try to replay the request with * the restored context. */ - if (request) { - struct drm_i915_private *dev_priv = request->i915; - struct intel_context *ce = request->hw_context; - struct i915_hw_ppgtt *ppgtt; - - if (ce->state) { - I915_WRITE(CCID, - i915_ggtt_offset(ce->state) | - BIT(8) /* must be set! */ | - CCID_EXTENDED_STATE_SAVE | - CCID_EXTENDED_STATE_RESTORE | - CCID_EN); - } - - ppgtt = request->gem_context->ppgtt ?: engine->i915->mm.aliasing_ppgtt; - if (ppgtt) { - u32 pd_offset = ppgtt->pd.base.ggtt_offset << 10; - - I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); - I915_WRITE(RING_PP_DIR_BASE(engine), pd_offset); - - /* Wait for the PD reload to complete */ - if (intel_wait_for_register(dev_priv, - RING_PP_DIR_BASE(engine), - BIT(0), 0, - 10)) - DRM_ERROR("Wait for reload of ppgtt page-directory timed out\n"); - - ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); - } - + if (rq) { /* If the rq hung, jump to its breadcrumb and skip the batch */ - if (request->fence.error == -EIO) - request->ring->head = request->postfix; - } else { - engine->legacy_active_context = NULL; - engine->legacy_active_ppgtt = NULL; + rq->ring->head = intel_ring_wrap(rq->ring, rq->head); + if (rq->fence.error == -EIO) + skip_request(rq); } } @@ -1446,6 +1426,29 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) intel_ring_reset(engine->buffer, 0); } +static int load_pd_dir(struct i915_request *rq, + const struct i915_hw_ppgtt *ppgtt) +{ + const struct intel_engine_cs * const engine = rq->engine; + u32 *cs; + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); + *cs++ = PP_DIR_DCLV_2G; + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); + *cs++ = ppgtt->pd.base.ggtt_offset << 10; + + intel_ring_advance(rq, cs); + + return 0; +} + static inline int mi_set_context(struct i915_request *rq, u32 flags) { struct drm_i915_private *i915 = rq->i915; @@ -1590,31 +1593,28 @@ static int remap_l3(struct i915_request *rq, int slice) static int switch_context(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; - struct i915_gem_context *to_ctx = rq->gem_context; - struct i915_hw_ppgtt *to_mm = - to_ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; - struct i915_gem_context *from_ctx = engine->legacy_active_context; - struct i915_hw_ppgtt *from_mm = engine->legacy_active_ppgtt; + struct i915_gem_context *ctx = rq->gem_context; + struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; + unsigned int unwind_mm = 0; u32 hw_flags = 0; int ret, i; lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); - if (to_mm != from_mm || - (to_mm && intel_engine_flag(engine) & to_mm->pd_dirty_rings)) { - trace_switch_mm(engine, to_ctx); - ret = to_mm->switch_mm(to_mm, rq); + if (ppgtt) { + ret = load_pd_dir(rq, ppgtt); if (ret) goto err; - to_mm->pd_dirty_rings &= ~intel_engine_flag(engine); - engine->legacy_active_ppgtt = to_mm; - hw_flags = MI_FORCE_RESTORE; + if (intel_engine_flag(engine) & ppgtt->pd_dirty_rings) { + unwind_mm = intel_engine_flag(engine); + ppgtt->pd_dirty_rings &= ~unwind_mm; + hw_flags = MI_FORCE_RESTORE; + } } - if (rq->hw_context->state && - (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) { + if (rq->hw_context->state) { GEM_BUG_ON(engine->id != RCS); /* @@ -1624,35 +1624,32 @@ static int switch_context(struct i915_request *rq) * as nothing actually executes using the kernel context; it * is purely used for flushing user contexts. */ - if (i915_gem_context_is_kernel(to_ctx)) + if (i915_gem_context_is_kernel(ctx)) hw_flags = MI_RESTORE_INHIBIT; ret = mi_set_context(rq, hw_flags); if (ret) goto err_mm; - - engine->legacy_active_context = to_ctx; } - if (to_ctx->remap_slice) { + if (ctx->remap_slice) { for (i = 0; i < MAX_L3_SLICES; i++) { - if (!(to_ctx->remap_slice & BIT(i))) + if (!(ctx->remap_slice & BIT(i))) continue; ret = remap_l3(rq, i); if (ret) - goto err_ctx; + goto err_mm; } - to_ctx->remap_slice = 0; + ctx->remap_slice = 0; } return 0; -err_ctx: - engine->legacy_active_context = from_ctx; err_mm: - engine->legacy_active_ppgtt = from_mm; + if (unwind_mm) + ppgtt->pd_dirty_rings |= unwind_mm; err: return ret; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index acef385c4c80..b44c67849749 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -557,15 +557,6 @@ struct intel_engine_cs { */ struct intel_context *last_retired_context; - /* We track the current MI_SET_CONTEXT in order to eliminate - * redudant context switches. This presumes that requests are not - * reordered! Or when they are the tracking is updated along with - * the emission of individual requests into the legacy command - * stream (ring). - */ - struct i915_gem_context *legacy_active_context; - struct i915_hw_ppgtt *legacy_active_ppgtt; - /* status_notifier: list of callbacks for context-switch changes */ struct atomic_notifier_head context_status_notifier; -- cgit v1.2.3 From 697b9a8714cb4631fd0526b3c78955d5422c24ba Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 12 Jun 2018 11:51:35 +0100 Subject: drm/i915: Make closing request flush mandatory For symmetry, simplicity and ensuring the request is always truly idle upon its completion, always emit the closing flush prior to emitting the request breadcrumb. Previously, we would only emit the flush if we had started a user batch, but this just leaves all the other paths open to speculation (do they affect the GPU caches or not?) With mm switching, a key requirement is that the GPU is flushed and invalidated before hand, so for absolute safety, we want that closing flush be mandatory. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180612105135.4459-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_context.c | 9 +-------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 ++-- drivers/gpu/drm/i915/i915_request.c | 18 ++---------------- drivers/gpu/drm/i915/i915_request.h | 4 +--- drivers/gpu/drm/i915/selftests/huge_pages.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_coherency.c | 4 ++-- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 4 ++-- drivers/gpu/drm/i915/selftests/i915_request.c | 2 +- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 16 ++++++++-------- drivers/gpu/drm/i915/selftests/intel_lrc.c | 2 +- drivers/gpu/drm/i915/selftests/intel_workarounds.c | 2 +- 12 files changed, 24 insertions(+), 47 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 93efd92362db..8dd4d35655af 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3213,7 +3213,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv, rq = i915_request_alloc(engine, dev_priv->kernel_context); if (!IS_ERR(rq)) - __i915_request_add(rq, false); + i915_request_add(rq); } } @@ -5332,7 +5332,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) if (engine->init_context) err = engine->init_context(rq); - __i915_request_add(rq, true); + i915_request_add(rq); if (err) goto err_active; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index b2c7ac1b074d..ef6ea4bcd773 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -700,14 +700,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915) i915_timeline_sync_set(rq->timeline, &prev->fence); } - /* - * Force a flush after the switch to ensure that all rendering - * and operations prior to switching to the kernel context hits - * memory. This should be guaranteed by the previous request, - * but an extra layer of paranoia before we declare the system - * idle (on suspend etc) is advisable! - */ - __i915_request_add(rq, true); + i915_request_add(rq); } return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 2d2eb3075960..60dc2a865f5f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -921,7 +921,7 @@ static void reloc_gpu_flush(struct reloc_cache *cache) i915_gem_object_unpin_map(cache->rq->batch->obj); i915_gem_chipset_flush(cache->rq->i915); - __i915_request_add(cache->rq, true); + i915_request_add(cache->rq); cache->rq = NULL; } @@ -2438,7 +2438,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, trace_i915_request_queue(eb.request, eb.batch_flags); err = eb_submit(&eb); err_request: - __i915_request_add(eb.request, err == 0); + i915_request_add(eb.request); add_to_client(eb.request, file); if (fences) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 9092f5464c24..e1dbb544046f 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1018,14 +1018,13 @@ i915_request_await_object(struct i915_request *to, * request is not being tracked for completion but the work itself is * going to happen on the hardware. This would be a Bad Thing(tm). */ -void __i915_request_add(struct i915_request *request, bool flush_caches) +void i915_request_add(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; struct i915_timeline *timeline = request->timeline; struct intel_ring *ring = request->ring; struct i915_request *prev; u32 *cs; - int err; GEM_TRACE("%s fence %llx:%d\n", engine->name, request->fence.context, request->fence.seqno); @@ -1046,20 +1045,7 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) * know that it is time to use that space up. */ request->reserved_space = 0; - - /* - * Emit any outstanding flushes - execbuf can fail to emit the flush - * after having emitted the batchbuffer command. Hence we need to fix - * things up similar to emitting the lazy request. The difference here - * is that the flush _must_ happen before the next request, no matter - * what. - */ - if (flush_caches) { - err = engine->emit_flush(request, EMIT_FLUSH); - - /* Not allowed to fail! */ - WARN(err, "engine->emit_flush() failed: %d!\n", err); - } + engine->emit_flush(request, EMIT_FLUSH); /* * Record the position of the start of the breadcrumb so that diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 0e9aba53d0e4..7ee220ded9c9 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -253,9 +253,7 @@ int i915_request_await_object(struct i915_request *to, int i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence); -void __i915_request_add(struct i915_request *rq, bool flush_caches); -#define i915_request_add(rq) \ - __i915_request_add(rq, false) +void i915_request_add(struct i915_request *rq); void __i915_request_submit(struct i915_request *request); void i915_request_submit(struct i915_request *request); diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index 7846ea4a99bc..fbe4324116d7 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -1003,7 +1003,7 @@ static int gpu_write(struct i915_vma *vma, reservation_object_unlock(vma->resv); err_request: - __i915_request_add(rq, err == 0); + i915_request_add(rq); return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index 340a98c0c804..a4900091ae3d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -199,7 +199,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) { - __i915_request_add(rq, false); + i915_request_add(rq); i915_vma_unpin(vma); return PTR_ERR(cs); } @@ -229,7 +229,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, reservation_object_add_excl_fence(obj->resv, &rq->fence); reservation_object_unlock(obj->resv); - __i915_request_add(rq, true); + i915_request_add(rq); return 0; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 708e8d721448..836f1af8b833 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -182,12 +182,12 @@ static int gpu_fill(struct drm_i915_gem_object *obj, reservation_object_add_excl_fence(obj->resv, &rq->fence); reservation_object_unlock(obj->resv); - __i915_request_add(rq, true); + i915_request_add(rq); return 0; err_request: - __i915_request_add(rq, false); + i915_request_add(rq); err_batch: i915_vma_unpin(batch); err_vma: diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index a3a89aadeccb..f5d00332bb31 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -466,7 +466,7 @@ empty_request(struct intel_engine_cs *engine, goto out_request; out_request: - __i915_request_add(request, err == 0); + i915_request_add(request); return err ? ERR_PTR(err) : request; } diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 390a157b37c3..fe7d3190ebfe 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -245,7 +245,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) err = emit_recurse_batch(h, rq); if (err) { - __i915_request_add(rq, false); + i915_request_add(rq); return ERR_PTR(err); } @@ -318,7 +318,7 @@ static int igt_hang_sanitycheck(void *arg) *h.batch = MI_BATCH_BUFFER_END; i915_gem_chipset_flush(i915); - __i915_request_add(rq, true); + i915_request_add(rq); timeout = i915_request_wait(rq, I915_WAIT_LOCKED, @@ -464,7 +464,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) } i915_request_get(rq); - __i915_request_add(rq, true); + i915_request_add(rq); mutex_unlock(&i915->drm.struct_mutex); if (!wait_until_running(&h, rq)) { @@ -742,7 +742,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, } i915_request_get(rq); - __i915_request_add(rq, true); + i915_request_add(rq); mutex_unlock(&i915->drm.struct_mutex); if (!wait_until_running(&h, rq)) { @@ -942,7 +942,7 @@ static int igt_wait_reset(void *arg) } i915_request_get(rq); - __i915_request_add(rq, true); + i915_request_add(rq); if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(i915->drm.dev); @@ -1037,7 +1037,7 @@ static int igt_reset_queue(void *arg) } i915_request_get(prev); - __i915_request_add(prev, true); + i915_request_add(prev); count = 0; do { @@ -1051,7 +1051,7 @@ static int igt_reset_queue(void *arg) } i915_request_get(rq); - __i915_request_add(rq, true); + i915_request_add(rq); /* * XXX We don't handle resetting the kernel context @@ -1184,7 +1184,7 @@ static int igt_handle_error(void *arg) } i915_request_get(rq); - __i915_request_add(rq, true); + i915_request_add(rq); if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(i915->drm.dev); diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index 0b6da08c8cae..ea27c7cfbf96 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -155,7 +155,7 @@ spinner_create_request(struct spinner *spin, err = emit_recurse_batch(spin, rq, arbitration_command); if (err) { - __i915_request_add(rq, false); + i915_request_add(rq); return ERR_PTR(err); } diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index f1cfb0fb6bea..e1ea2d2bedd2 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -75,7 +75,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) i915_gem_object_get(result); i915_gem_object_set_active_reference(result); - __i915_request_add(rq, true); + i915_request_add(rq); i915_vma_unpin(vma); return result; -- cgit v1.2.3 From 78796877c37cb2c3898c4bcd2a12238d83858287 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 Jun 2018 21:13:03 +0100 Subject: drm/i915: Move the irq_counter inside the spinlock Rather than have multiple locked instructions inside the notify_ring() irq handler, move them inside the spinlock and reduce their intrinsic locking. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180627201304.15817-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_irq.c | 4 ++-- drivers/gpu/drm/i915/i915_request.c | 4 ++-- drivers/gpu/drm/i915/intel_breadcrumbs.c | 11 +++++++---- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 4 files changed, 12 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 4f137eeaf395..c81b4c1877cc 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1162,8 +1162,6 @@ static void notify_ring(struct intel_engine_cs *engine) if (unlikely(!engine->breadcrumbs.irq_armed)) return; - atomic_inc(&engine->irq_count); - rcu_read_lock(); spin_lock(&engine->breadcrumbs.irq_lock); @@ -1198,6 +1196,8 @@ static void notify_ring(struct intel_engine_cs *engine) tsk = wait->tsk; } } + + engine->breadcrumbs.irq_count++; } else { if (engine->breadcrumbs.irq_armed) __intel_engine_disarm_breadcrumbs(engine); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index e1dbb544046f..39b296878ba2 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1196,7 +1196,7 @@ static bool __i915_spin_request(const struct i915_request *rq, * takes to sleep on a request, on the order of a microsecond. */ - irq = atomic_read(&engine->irq_count); + irq = READ_ONCE(engine->breadcrumbs.irq_count); timeout_us += local_clock_us(&cpu); do { if (i915_seqno_passed(intel_engine_get_seqno(engine), seqno)) @@ -1208,7 +1208,7 @@ static bool __i915_spin_request(const struct i915_request *rq, * assume we won't see one in the near future but require * the engine->seqno_barrier() to fixup coherency. */ - if (atomic_read(&engine->irq_count) != irq) + if (READ_ONCE(engine->breadcrumbs.irq_count) != irq) break; if (signal_pending_state(state, current)) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 86a987b8ac66..1db6ba7d926e 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -98,12 +98,14 @@ static void intel_breadcrumbs_hangcheck(struct timer_list *t) struct intel_engine_cs *engine = from_timer(engine, t, breadcrumbs.hangcheck); struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned int irq_count; if (!b->irq_armed) return; - if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) { - b->hangcheck_interrupts = atomic_read(&engine->irq_count); + irq_count = READ_ONCE(b->irq_count); + if (b->hangcheck_interrupts != irq_count) { + b->hangcheck_interrupts = irq_count; mod_timer(&b->hangcheck, wait_timeout()); return; } @@ -272,13 +274,14 @@ static bool use_fake_irq(const struct intel_breadcrumbs *b) if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) return false; - /* Only start with the heavy weight fake irq timer if we have not + /* + * Only start with the heavy weight fake irq timer if we have not * seen any interrupts since enabling it the first time. If the * interrupts are still arriving, it means we made a mistake in our * engine->seqno_barrier(), a timing error that should be transient * and unlikely to reoccur. */ - return atomic_read(&engine->irq_count) == b->hangcheck_interrupts; + return READ_ONCE(b->irq_count) == b->hangcheck_interrupts; } static void enable_fake_irq(struct intel_breadcrumbs *b) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a0bc7a8222b4..44ac90ec540c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -345,7 +345,6 @@ struct intel_engine_cs { struct drm_i915_gem_object *default_state; void *pinned_default_state; - atomic_t irq_count; unsigned long irq_posted; #define ENGINE_IRQ_BREADCRUMB 0 #define ENGINE_IRQ_EXECLIST 1 @@ -380,6 +379,7 @@ struct intel_engine_cs { unsigned int hangcheck_interrupts; unsigned int irq_enabled; + unsigned int irq_count; bool irq_armed : 1; I915_SELFTEST_DECLARE(bool mock : 1); -- cgit v1.2.3 From e3be4079ea91c8b7bcb97cf44889ec5663c55fb4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 Jun 2018 21:13:04 +0100 Subject: drm/i915: Only signal from interrupt when requested Avoid calling dma_fence_signal() from inside the interrupt if we haven't enabled signaling on the request. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180627201304.15817-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_irq.c | 8 ++++++-- drivers/gpu/drm/i915/i915_request.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 ++--- 3 files changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c81b4c1877cc..4be56aec99b3 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1182,7 +1182,8 @@ static void notify_ring(struct intel_engine_cs *engine) if (i915_seqno_passed(seqno, wait->seqno)) { struct i915_request *waiter = wait->request; - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + if (waiter && + !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &waiter->fence.flags) && intel_wait_check_request(wait, waiter)) rq = i915_request_get(waiter); @@ -1205,8 +1206,11 @@ static void notify_ring(struct intel_engine_cs *engine) spin_unlock(&engine->breadcrumbs.irq_lock); if (rq) { - dma_fence_signal(&rq->fence); + spin_lock(&rq->lock); + dma_fence_signal_locked(&rq->fence); GEM_BUG_ON(!i915_request_completed(rq)); + spin_unlock(&rq->lock); + i915_request_put(rq); } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 39b296878ba2..a2f7e9358450 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1285,7 +1285,7 @@ long i915_request_wait(struct i915_request *rq, if (flags & I915_WAIT_LOCKED) add_wait_queue(errq, &reset); - intel_wait_init(&wait, rq); + intel_wait_init(&wait); restart: do { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 44ac90ec540c..78f01a35823a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -928,11 +928,10 @@ static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); -static inline void intel_wait_init(struct intel_wait *wait, - struct i915_request *rq) +static inline void intel_wait_init(struct intel_wait *wait) { wait->tsk = current; - wait->request = rq; + wait->request = NULL; } static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) -- cgit v1.2.3 From 6dd7526f6f6c73961eecec8a4b9b717d414010f8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 11:39:43 +0100 Subject: drm/i915: Export i915_request_skip() In the next patch, we will want to start skipping requests on failing to complete their payloads. So export the utility function current used to make requests inoperable following a failed gpu reset. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180706103947.15919-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 25 +++---------------------- drivers/gpu/drm/i915/i915_request.c | 21 +++++++++++++++++++++ drivers/gpu/drm/i915/i915_request.h | 2 ++ 3 files changed, 26 insertions(+), 22 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index be63e8bbb6d2..2e05cf114083 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3085,25 +3085,6 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) return err; } -static void skip_request(struct i915_request *request) -{ - void *vaddr = request->ring->vaddr; - u32 head; - - /* As this request likely depends on state from the lost - * context, clear out all the user operations leaving the - * breadcrumb at the end (so we get the fence notifications). - */ - head = request->head; - if (request->postfix < head) { - memset(vaddr + head, 0, request->ring->size - head); - head = 0; - } - memset(vaddr + head, 0, request->postfix - head); - - dma_fence_set_error(&request->fence, -EIO); -} - static void engine_skip_context(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; @@ -3118,10 +3099,10 @@ static void engine_skip_context(struct i915_request *request) list_for_each_entry_continue(request, &engine->timeline.requests, link) if (request->gem_context == hung_ctx) - skip_request(request); + i915_request_skip(request, -EIO); list_for_each_entry(request, &timeline->requests, link) - skip_request(request); + i915_request_skip(request, -EIO); spin_unlock(&timeline->lock); spin_unlock_irqrestore(&engine->timeline.lock, flags); @@ -3164,7 +3145,7 @@ i915_gem_reset_request(struct intel_engine_cs *engine, if (stalled) { i915_gem_context_mark_guilty(request->gem_context); - skip_request(request); + i915_request_skip(request, -EIO); /* If this context is now banned, skip all pending requests. */ if (i915_gem_context_is_banned(request->gem_context)) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a2f7e9358450..7ae08b68121e 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1013,6 +1013,27 @@ i915_request_await_object(struct i915_request *to, return ret; } +void i915_request_skip(struct i915_request *rq, int error) +{ + void *vaddr = rq->ring->vaddr; + u32 head; + + GEM_BUG_ON(!IS_ERR_VALUE((long)error)); + dma_fence_set_error(&rq->fence, error); + + /* + * As this request likely depends on state from the lost + * context, clear out all the user operations leaving the + * breadcrumb at the end (so we get the fence notifications). + */ + head = rq->infix; + if (rq->postfix < head) { + memset(vaddr + head, 0, rq->ring->size - head); + head = 0; + } + memset(vaddr + head, 0, rq->postfix - head); +} + /* * NB: This function is not allowed to fail. Doing so would mean the the * request is not being tracked for completion but the work itself is diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 7ee220ded9c9..a355a081485f 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -258,6 +258,8 @@ void i915_request_add(struct i915_request *rq); void __i915_request_submit(struct i915_request *request); void i915_request_submit(struct i915_request *request); +void i915_request_skip(struct i915_request *request, int error); + void __i915_request_unsubmit(struct i915_request *request); void i915_request_unsubmit(struct i915_request *request); -- cgit v1.2.3 From 890fd185d53037d05d10a0825950c4b038e39d4a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 22:07:10 +0100 Subject: drm/i915: Replace nested subclassing with explicit subclasses In the next patch, we will want a third distinct class of timeline that may overlap with the current pair of client and engine timeline classes. Rather than use the ad hoc markup of SINGLE_DEPTH_NESTING, initialise the different timeline classes with an explicit subclass. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180706210710.16251-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_request.c | 2 +- drivers/gpu/drm/i915/i915_timeline.h | 2 ++ drivers/gpu/drm/i915/intel_engine_cs.c | 1 + drivers/gpu/drm/i915/selftests/mock_engine.c | 2 ++ 5 files changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2e05cf114083..1a9dab302433 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3095,7 +3095,7 @@ static void engine_skip_context(struct i915_request *request) GEM_BUG_ON(timeline == &engine->timeline); spin_lock_irqsave(&engine->timeline.lock, flags); - spin_lock_nested(&timeline->lock, SINGLE_DEPTH_NESTING); + spin_lock(&timeline->lock); list_for_each_entry_continue(request, &engine->timeline.requests, link) if (request->gem_context == hung_ctx) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 7ae08b68121e..3248369dbcfb 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -503,7 +503,7 @@ static void move_to_timeline(struct i915_request *request, GEM_BUG_ON(request->timeline == &request->engine->timeline); lockdep_assert_held(&request->engine->timeline.lock); - spin_lock_nested(&request->timeline->lock, SINGLE_DEPTH_NESTING); + spin_lock(&request->timeline->lock); list_move_tail(&request->link, &timeline->requests); spin_unlock(&request->timeline->lock); } diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index dc2a4632faa7..a2c2c3ab5fb0 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -37,6 +37,8 @@ struct i915_timeline { u32 seqno; spinlock_t lock; +#define TIMELINE_CLIENT 0 /* default subclass */ +#define TIMELINE_ENGINE 1 /** * List of breadcrumbs associated with GPU requests currently diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index e2f562853aee..0ac497275a51 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -483,6 +483,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) void intel_engine_setup_common(struct intel_engine_cs *engine) { i915_timeline_init(engine->i915, &engine->timeline, engine->name); + lockdep_set_subclass(&engine->timeline.lock, TIMELINE_ENGINE); intel_engine_init_execlist(engine); intel_engine_init_hangcheck(engine); diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index c2a0451336cf..22a73da45ad5 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -200,6 +200,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.submit_request = mock_submit_request; i915_timeline_init(i915, &engine->base.timeline, engine->base.name); + lockdep_set_subclass(&engine->base.timeline.lock, TIMELINE_ENGINE); + intel_engine_init_breadcrumbs(&engine->base); engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */ -- cgit v1.2.3 From ec625fb932bb057e2d3c2ed28eee56a827385ab8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Jul 2018 13:20:42 +0100 Subject: drm/i915: Provide a timeout to i915_gem_wait_for_idle() Usually we have no idea about the upper bound we need to wait to catch up with userspace when idling the device, but in a few situations we know the system was idle beforehand and can provide a short timeout in order to very quickly catch a failure, long before hangcheck kicks in. In the following patches, we will use the timeout to curtain two overly long waits, where we know we can expect the GPU to complete within a reasonable time or declare it broken. In particular, with a broken GPU we expect it to fail during the initial GPU setup where do a couple of context switches to record the defaults. This is a task that takes a few milliseconds even on the slowest of devices, but we may have to wait 60s for hangcheck to give in and declare the machine inoperable. In this a case where any gpu hang is unacceptable, both from a timeliness and practical standpoint. The other improvement is that in selftests, we do not need to arm an independent timer to inject a wedge, as we can just limit the timeout on the wait directly. v2: Include the timeout parameter in the trace. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180709122044.7028-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 6 ++- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 48 ++++++++++++++--------- drivers/gpu/drm/i915/i915_gem_evict.c | 3 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/i915_gem_shrinker.c | 11 ++++-- drivers/gpu/drm/i915/i915_perf.c | 4 +- drivers/gpu/drm/i915/i915_request.c | 6 ++- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 4 +- drivers/gpu/drm/i915/selftests/i915_request.c | 4 +- drivers/gpu/drm/i915/selftests/igt_flush_test.c | 2 +- 11 files changed, 59 insertions(+), 33 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 544e5e7f011f..099f97ef2303 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4105,7 +4105,8 @@ fault_irq_set(struct drm_i915_private *i915, err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED | - I915_WAIT_INTERRUPTIBLE); + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); if (err) goto err_unlock; @@ -4210,7 +4211,8 @@ i915_drop_caches_set(void *data, u64 val) if (val & DROP_ACTIVE) ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED); + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); if (val & DROP_RETIRE) i915_retire_requests(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c79008177708..fcb8f49a9b8a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3157,7 +3157,7 @@ void i915_gem_init_swizzling(struct drm_i915_private *dev_priv); void i915_gem_fini(struct drm_i915_private *dev_priv); void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv); int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, - unsigned int flags); + unsigned int flags, long timeout); int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv); void i915_gem_suspend_late(struct drm_i915_private *dev_priv); void i915_gem_resume(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1a9dab302433..91d705a67d38 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2267,7 +2267,9 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) /* Attempt to reap some mmap space from dead objects */ do { - err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE); + err = i915_gem_wait_for_idle(dev_priv, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); if (err) break; @@ -3742,14 +3744,14 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return ret; } -static int wait_for_timeline(struct i915_timeline *tl, unsigned int flags) +static long wait_for_timeline(struct i915_timeline *tl, + unsigned int flags, long timeout) { struct i915_request *rq; - long ret; rq = i915_gem_active_get_unlocked(&tl->last_request); if (!rq) - return 0; + return timeout; /* * "Race-to-idle". @@ -3763,10 +3765,10 @@ static int wait_for_timeline(struct i915_timeline *tl, unsigned int flags) if (flags & I915_WAIT_FOR_IDLE_BOOST) gen6_rps_boost(rq, NULL); - ret = i915_request_wait(rq, flags, MAX_SCHEDULE_TIMEOUT); + timeout = i915_request_wait(rq, flags, timeout); i915_request_put(rq); - return ret < 0 ? ret : 0; + return timeout; } static int wait_for_engines(struct drm_i915_private *i915) @@ -3782,10 +3784,12 @@ static int wait_for_engines(struct drm_i915_private *i915) return 0; } -int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) +int i915_gem_wait_for_idle(struct drm_i915_private *i915, + unsigned int flags, long timeout) { - GEM_TRACE("flags=%x (%s)\n", - flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked"); + GEM_TRACE("flags=%x (%s), timeout=%ld%s\n", + flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", + timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : ""); /* If the device is asleep, we have no requests outstanding */ if (!READ_ONCE(i915->gt.awake)) @@ -3798,9 +3802,9 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) lockdep_assert_held(&i915->drm.struct_mutex); list_for_each_entry(tl, &i915->gt.timelines, link) { - err = wait_for_timeline(tl, flags); - if (err) - return err; + timeout = wait_for_timeline(tl, flags, timeout); + if (timeout < 0) + return timeout; } err = wait_for_engines(i915); @@ -3812,12 +3816,13 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) } else { struct intel_engine_cs *engine; enum intel_engine_id id; - int err; for_each_engine(engine, i915, id) { - err = wait_for_timeline(&engine->timeline, flags); - if (err) - return err; + struct i915_timeline *tl = &engine->timeline; + + timeout = wait_for_timeline(tl, flags, timeout); + if (timeout < 0) + return timeout; } } @@ -5052,7 +5057,8 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED | - I915_WAIT_FOR_IDLE_BOOST); + I915_WAIT_FOR_IDLE_BOOST, + MAX_SCHEDULE_TIMEOUT); if (ret && ret != -EIO) goto err_unlock; @@ -5356,7 +5362,9 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) if (err) goto err_active; - err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + err = i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); if (err) goto err_active; @@ -5421,7 +5429,9 @@ err_active: if (WARN_ON(i915_gem_switch_to_kernel_context(i915))) goto out_ctx; - if (WARN_ON(i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED))) + if (WARN_ON(i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT))) goto out_ctx; i915_gem_contexts_lost(i915); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 54814a196ee4..02b83a5ed96c 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -69,7 +69,8 @@ static int ggtt_flush(struct drm_i915_private *i915) err = i915_gem_wait_for_idle(i915, I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED); + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); if (err) return err; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 4db31aaaa9d3..210baf3c8d11 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2793,7 +2793,7 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, struct i915_ggtt *ggtt = &dev_priv->ggtt; if (unlikely(ggtt->do_idle_maps)) { - if (i915_gem_wait_for_idle(dev_priv, 0)) { + if (i915_gem_wait_for_idle(dev_priv, 0, MAX_SCHEDULE_TIMEOUT)) { DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); /* Wait a bit, in hopes it avoids the hang */ udelay(10); diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 55e84e71f526..c61f5b80fee3 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -172,7 +172,9 @@ i915_gem_shrink(struct drm_i915_private *i915, * we will free as much as we can and hope to get a second chance. */ if (flags & I915_SHRINK_ACTIVE) - i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); trace_i915_gem_shrink(i915, target, flags); i915_retire_requests(i915); @@ -392,7 +394,8 @@ shrinker_lock_uninterruptible(struct drm_i915_private *i915, bool *unlock, unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms); do { - if (i915_gem_wait_for_idle(i915, 0) == 0 && + if (i915_gem_wait_for_idle(i915, + 0, MAX_SCHEDULE_TIMEOUT) == 0 && shrinker_lock(i915, unlock)) break; @@ -466,7 +469,9 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr return NOTIFY_DONE; /* Force everything onto the inactive lists */ - ret = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + ret = i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); if (ret) goto out; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 447407fee3b8..6bf10952c724 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1836,7 +1836,9 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, * So far the best way to work around this issue seems to be draining * the GPU from any submitted work. */ - ret = i915_gem_wait_for_idle(dev_priv, wait_flags); + ret = i915_gem_wait_for_idle(dev_priv, + wait_flags, + MAX_SCHEDULE_TIMEOUT); if (ret) goto out; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 3248369dbcfb..5c2c93cbab12 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -206,7 +206,8 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) /* Carefully retire all requests without writing to the rings */ ret = i915_gem_wait_for_idle(i915, I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED); + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); if (ret) return ret; @@ -735,7 +736,8 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) /* Ratelimit ourselves to prevent oom from malicious clients */ ret = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED | - I915_WAIT_INTERRUPTIBLE); + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); if (ret) goto err_unreserve; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 5fbe15f4effd..ab2590242033 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -478,7 +478,9 @@ static int __igt_switch_to_kernel_context(struct drm_i915_private *i915, } } - err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + err = i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); if (err) return err; diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 43995fc3534d..c4aac6141e04 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -286,7 +286,9 @@ static int begin_live_test(struct live_test *t, t->func = func; t->name = name; - err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + err = i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); if (err) { pr_err("%s(%s): failed to idle before, with err=%d!", func, name, err); diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index 0d06f559243f..09ab037ce803 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -64,7 +64,7 @@ int igt_flush_test(struct drm_i915_private *i915, unsigned int flags) } wedge_on_timeout(&w, i915, HZ) - i915_gem_wait_for_idle(i915, flags); + i915_gem_wait_for_idle(i915, flags, MAX_SCHEDULE_TIMEOUT); return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; } -- cgit v1.2.3