From 3b2cc8ab64bf5d643ac9a917653e0cb6fce32ffc Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Wed, 11 Jun 2014 16:17:16 +0100 Subject: drm/i915/bdw: Do not write the Semaphore Sync Registers in GEN8+ These do not exist anymore. Spotted while reading through intel_ringbuffer.c Signed-off-by: Oscar Mateo Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 279488addf3f..0eaaaec78bae 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1746,14 +1746,15 @@ int intel_ring_cacheline_align(struct intel_engine_cs *ring) void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno) { - struct drm_i915_private *dev_priv = ring->dev->dev_private; + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; BUG_ON(ring->outstanding_lazy_seqno); - if (INTEL_INFO(ring->dev)->gen >= 6) { + if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) { I915_WRITE(RING_SYNC_0(ring->mmio_base), 0); I915_WRITE(RING_SYNC_1(ring->mmio_base), 0); - if (HAS_VEBOX(ring->dev)) + if (HAS_VEBOX(dev)) I915_WRITE(RING_SYNC_2(ring->mmio_base), 0); } -- cgit v1.2.3 From 24f3a8cf7766e52a087904b4346794c7b410f957 Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Tue, 17 Jun 2014 10:59:42 +0530 Subject: drm/i915: Added write-enable pte bit supportt This adds support for a write-enable bit in the entry of GTT. This is handled via a read-only flag in the GEM buffer object which is then used to see how to set the bit when writing the GTT entries. Currently by default the Batch buffer & Ring buffers are marked as read only. v2: Moved the pte override code for read-only bit to 'byt_pte_encode'. (Chris) Fixed the issue of leaving 'gt_old_ro' as unused. (Chris) v3: Removed the 'gt_old_ro' field, now setting RO bit only for Ring Buffers(Daniel). v4: Added a new 'flags' parameter to all the pte(gen6) encode & insert_entries functions, in lieu of overloading the cache_level enum (Daniel). v5: Removed the superfluous VLV check & changed the definition location of PTE_READ_ONLY flag (Imre) Reviewed-by: Imre Deak Signed-off-by: Akash Goel Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 6 +++++ drivers/gpu/drm/i915/i915_gem_gtt.c | 48 ++++++++++++++++++++------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 5 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 3 +++ 4 files changed, 41 insertions(+), 21 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index faef035f5a49..c5b02159898b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1660,6 +1660,12 @@ struct drm_i915_gem_object { unsigned int pin_mappable:1; unsigned int pin_display:1; + /* + * Is the object to be mapped as read-only to the GPU + * Only honoured if hardware has relevant pte bit + */ + unsigned long gt_ro:1; + /* * Is the GPU currently using a fence to access this buffer, */ diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 6f5d5a1e7c32..743ddfcf2696 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -116,7 +116,7 @@ static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev, static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, enum i915_cache_level level, - bool valid) + bool valid, u32 unused) { gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; pte |= GEN6_PTE_ADDR_ENCODE(addr); @@ -138,7 +138,7 @@ static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr, enum i915_cache_level level, - bool valid) + bool valid, u32 unused) { gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; pte |= GEN6_PTE_ADDR_ENCODE(addr); @@ -162,7 +162,7 @@ static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr, static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr, enum i915_cache_level level, - bool valid) + bool valid, u32 flags) { gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; pte |= GEN6_PTE_ADDR_ENCODE(addr); @@ -170,7 +170,8 @@ static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr, /* Mark the page as writeable. Other platforms don't have a * setting for read-only/writable, so this matches that behavior. */ - pte |= BYT_PTE_WRITEABLE; + if (!(flags & PTE_READ_ONLY)) + pte |= BYT_PTE_WRITEABLE; if (level != I915_CACHE_NONE) pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; @@ -180,7 +181,7 @@ static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr, static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr, enum i915_cache_level level, - bool valid) + bool valid, u32 unused) { gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; pte |= HSW_PTE_ADDR_ENCODE(addr); @@ -193,7 +194,7 @@ static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr, static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, enum i915_cache_level level, - bool valid) + bool valid, u32 unused) { gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; pte |= HSW_PTE_ADDR_ENCODE(addr); @@ -307,7 +308,7 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, uint64_t start, - enum i915_cache_level cache_level) + enum i915_cache_level cache_level, u32 unused) { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); @@ -645,7 +646,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) uint32_t pd_entry; int pte, pde; - scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); + scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0); pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); @@ -947,7 +948,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; unsigned last_pte, i; - scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); + scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0); while (num_entries) { last_pte = first_pte + num_entries; @@ -970,7 +971,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, uint64_t start, - enum i915_cache_level cache_level) + enum i915_cache_level cache_level, u32 flags) { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); @@ -987,7 +988,8 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, pt_vaddr[act_pte] = vm->pte_encode(sg_page_iter_dma_address(&sg_iter), - cache_level, true); + cache_level, true, flags); + if (++act_pte == I915_PPGTT_PT_ENTRIES) { kunmap_atomic(pt_vaddr); pt_vaddr = NULL; @@ -1224,8 +1226,12 @@ ppgtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) { + /* Currently applicable only to VLV */ + if (vma->obj->gt_ro) + flags |= PTE_READ_ONLY; + vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start, - cache_level); + cache_level, flags); } static void ppgtt_unbind_vma(struct i915_vma *vma) @@ -1400,7 +1406,7 @@ static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte) static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, uint64_t start, - enum i915_cache_level level) + enum i915_cache_level level, u32 unused) { struct drm_i915_private *dev_priv = vm->dev->dev_private; unsigned first_entry = start >> PAGE_SHIFT; @@ -1446,7 +1452,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, static void gen6_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, uint64_t start, - enum i915_cache_level level) + enum i915_cache_level level, u32 flags) { struct drm_i915_private *dev_priv = vm->dev->dev_private; unsigned first_entry = start >> PAGE_SHIFT; @@ -1458,7 +1464,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { addr = sg_page_iter_dma_address(&sg_iter); - iowrite32(vm->pte_encode(addr, level, true), >t_entries[i]); + iowrite32(vm->pte_encode(addr, level, true, flags), >t_entries[i]); i++; } @@ -1470,7 +1476,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, */ if (i != 0) WARN_ON(readl(>t_entries[i-1]) != - vm->pte_encode(addr, level, true)); + vm->pte_encode(addr, level, true, flags)); /* This next bit makes the above posting read even more important. We * want to flush the TLBs only after we're certain all the PTE updates @@ -1524,7 +1530,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, first_entry, num_entries, max_entries)) num_entries = max_entries; - scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch); + scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch, 0); for (i = 0; i < num_entries; i++) iowrite32(scratch_pte, >t_base[i]); @@ -1573,6 +1579,10 @@ static void ggtt_bind_vma(struct i915_vma *vma, struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj = vma->obj; + /* Currently applicable only to VLV */ + if (obj->gt_ro) + flags |= PTE_READ_ONLY; + /* If there is no aliasing PPGTT, or the caller needs a global mapping, * or we have a global mapping already but the cacheability flags have * changed, set the global PTEs. @@ -1589,7 +1599,7 @@ static void ggtt_bind_vma(struct i915_vma *vma, (cache_level != obj->cache_level)) { vma->vm->insert_entries(vma->vm, obj->pages, vma->node.start, - cache_level); + cache_level, flags); obj->has_global_gtt_mapping = 1; } } @@ -1601,7 +1611,7 @@ static void ggtt_bind_vma(struct i915_vma *vma, appgtt->base.insert_entries(&appgtt->base, vma->obj->pages, vma->node.start, - cache_level); + cache_level, flags); vma->obj->has_aliasing_ppgtt_mapping = 1; } } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 1b96a06be3cb..8d6f7c18c404 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -154,6 +154,7 @@ struct i915_vma { void (*unbind_vma)(struct i915_vma *vma); /* Map an object into an address space with the given cache flags. */ #define GLOBAL_BIND (1<<0) +#define PTE_READ_ONLY (1<<1) void (*bind_vma)(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); @@ -197,7 +198,7 @@ struct i915_address_space { /* FIXME: Need a more generic return type */ gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr, enum i915_cache_level level, - bool valid); /* Create a valid PTE */ + bool valid, u32 flags); /* Create a valid PTE */ void (*clear_range)(struct i915_address_space *vm, uint64_t start, uint64_t length, @@ -205,7 +206,7 @@ struct i915_address_space { void (*insert_entries)(struct i915_address_space *vm, struct sg_table *st, uint64_t start, - enum i915_cache_level cache_level); + enum i915_cache_level cache_level, u32 flags); void (*cleanup)(struct i915_address_space *vm); }; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0eaaaec78bae..b96edaf905de 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1397,6 +1397,9 @@ static int allocate_ring_buffer(struct intel_engine_cs *ring) if (obj == NULL) return -ENOMEM; + /* mark ring buffers as read-only from GPU side by default */ + obj->gt_ro = 1; + ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE); if (ret) goto err_unref; -- cgit v1.2.3 From 9c33baa6b3bbb01c1a88dceba986b20e6642cf31 Mon Sep 17 00:00:00 2001 From: Konrad Zapalowicz Date: Thu, 19 Jun 2014 19:07:15 +0200 Subject: drivers/i915: Fix unnoticed failure of init_ring_common() This commit add check for return value of init_ring_common() in the init_render_ring(). Now, when failure is detected the error code is propagated to the caller instead of being ignored. Signed-off-by: Konrad Zapalowicz Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b96edaf905de..2faef2605e97 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -604,6 +604,8 @@ static int init_render_ring(struct intel_engine_cs *ring) struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; int ret = init_ring_common(ring); + if (ret) + return ret; /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ if (INTEL_INFO(dev)->gen >= 4 && INTEL_INFO(dev)->gen < 7) -- cgit v1.2.3 From f7b64236854b0862e9967090f227cf08d81c71f4 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 1 Jul 2014 02:41:36 -0700 Subject: drm/i915: Fix VCS2's ring name. It just fix a typo. v2: removing underscore to let this like all other ring names (Oscar) Cc: Oscar Mateo Reviewed-by (v1): Ben Widawsky Signed-off-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 2faef2605e97..22c2b9a217b8 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2224,7 +2224,7 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) return -EINVAL; } - ring->name = "bds2_ring"; + ring->name = "bsd2 ring"; ring->id = VCS2; ring->write_tail = ring_write_tail; -- cgit v1.2.3 From 707d9cf9935cfba2d62dd80dc01dc5dc4530d4ca Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Mon, 30 Jun 2014 09:53:36 -0700 Subject: drm/i915: gen specific ring init Gen8 has already had some differentiation with how it handles rings. Semaphores bring yet more differences, and now is as good a time as any to do the split. Also, since gen8 doesn't actually use semaphores up until this point, put the proper "NULL" values in for the mbox info. v2: v1 had a stale commit message v3: Move everything in the is_semaphore_enabled() check v4: VCS2 rebase Remove double assignment of signal in render ring (Ville) v5: Adding missed VCS2 signal init on gen8+ (Rodrigo) Reviewed-by: Rodrigo Vivi Signed-off-by: Ben Widawsky Signed-off-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 227 +++++++++++++++++++++----------- 1 file changed, 151 insertions(+), 76 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 22c2b9a217b8..7ca1b946d8de 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -729,7 +729,11 @@ gen6_add_request(struct intel_engine_cs *ring) { int ret; - ret = ring->semaphore.signal(ring, 4); + if (ring->semaphore.signal) + ret = ring->semaphore.signal(ring, 4); + else + ret = intel_ring_begin(ring, 4); + if (ret) return ret; @@ -1952,40 +1956,59 @@ int intel_init_render_ring_buffer(struct drm_device *dev) ring->id = RCS; ring->mmio_base = RENDER_RING_BASE; - if (INTEL_INFO(dev)->gen >= 6) { + if (INTEL_INFO(dev)->gen >= 8) { + ring->add_request = gen6_add_request; + ring->flush = gen8_render_ring_flush; + ring->irq_get = gen8_ring_get_irq; + ring->irq_put = gen8_ring_put_irq; + ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT; + ring->get_seqno = gen6_ring_get_seqno; + ring->set_seqno = ring_set_seqno; + if (i915_semaphore_is_enabled(dev)) { + ring->semaphore.sync_to = gen6_ring_sync; + ring->semaphore.signal = gen6_signal; + ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC; + ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC; + ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC; + ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC; + ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; + } + } else if (INTEL_INFO(dev)->gen >= 6) { ring->add_request = gen6_add_request; ring->flush = gen7_render_ring_flush; if (INTEL_INFO(dev)->gen == 6) ring->flush = gen6_render_ring_flush; - if (INTEL_INFO(dev)->gen >= 8) { - ring->flush = gen8_render_ring_flush; - ring->irq_get = gen8_ring_get_irq; - ring->irq_put = gen8_ring_put_irq; - } else { - ring->irq_get = gen6_ring_get_irq; - ring->irq_put = gen6_ring_put_irq; - } + ring->irq_get = gen6_ring_get_irq; + ring->irq_put = gen6_ring_put_irq; ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT; ring->get_seqno = gen6_ring_get_seqno; ring->set_seqno = ring_set_seqno; - ring->semaphore.sync_to = gen6_ring_sync; - ring->semaphore.signal = gen6_signal; - /* - * The current semaphore is only applied on pre-gen8 platform. - * And there is no VCS2 ring on the pre-gen8 platform. So the - * semaphore between RCS and VCS2 is initialized as INVALID. - * Gen8 will initialize the sema between VCS2 and RCS later. - */ - ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_RV; - ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_RB; - ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_RVE; - ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[VCS] = GEN6_VRSYNC; - ring->semaphore.mbox.signal[BCS] = GEN6_BRSYNC; - ring->semaphore.mbox.signal[VECS] = GEN6_VERSYNC; - ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; + if (i915_semaphore_is_enabled(dev)) { + ring->semaphore.sync_to = gen6_ring_sync; + ring->semaphore.signal = gen6_signal; + /* + * The current semaphore is only applied on pre-gen8 + * platform. And there is no VCS2 ring on the pre-gen8 + * platform. So the semaphore between RCS and VCS2 is + * initialized as INVALID. Gen8 will initialize the + * sema between VCS2 and RCS later. + */ + ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_RV; + ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_RB; + ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_RVE; + ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC; + ring->semaphore.mbox.signal[VCS] = GEN6_VRSYNC; + ring->semaphore.mbox.signal[BCS] = GEN6_BRSYNC; + ring->semaphore.mbox.signal[VECS] = GEN6_VERSYNC; + ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; + } } else if (IS_GEN5(dev)) { ring->add_request = pc_render_add_request; ring->flush = gen4_render_ring_flush; @@ -2013,6 +2036,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) ring->irq_enable_mask = I915_USER_INTERRUPT; } ring->write_tail = ring_write_tail; + if (IS_HASWELL(dev)) ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer; else if (IS_GEN8(dev)) @@ -2163,31 +2187,49 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) ring->irq_put = gen8_ring_put_irq; ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; + if (i915_semaphore_is_enabled(dev)) { + ring->semaphore.sync_to = gen6_ring_sync; + ring->semaphore.signal = gen6_signal; + /* + * The current semaphore is only applied on + * pre-gen8 platform. And there is no VCS2 ring + * on the pre-gen8 platform. So the semaphore + * between VCS and VCS2 is initialized as + * INVALID. Gen8 will initialize the sema + * between VCS2 and VCS later. + */ + ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC; + ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC; + ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC; + ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC; + ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; + } } else { ring->irq_enable_mask = GT_BSD_USER_INTERRUPT; ring->irq_get = gen6_ring_get_irq; ring->irq_put = gen6_ring_put_irq; ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; + if (i915_semaphore_is_enabled(dev)) { + ring->semaphore.sync_to = gen6_ring_sync; + ring->semaphore.signal = gen6_signal; + ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VR; + ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VB; + ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_VVE; + ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.signal[RCS] = GEN6_RVSYNC; + ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC; + ring->semaphore.mbox.signal[BCS] = GEN6_BVSYNC; + ring->semaphore.mbox.signal[VECS] = GEN6_VEVSYNC; + ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; + } } - ring->semaphore.sync_to = gen6_ring_sync; - ring->semaphore.signal = gen6_signal; - /* - * The current semaphore is only applied on pre-gen8 platform. - * And there is no VCS2 ring on the pre-gen8 platform. So the - * semaphore between VCS and VCS2 is initialized as INVALID. - * Gen8 will initialize the sema between VCS2 and VCS later. - */ - ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VR; - ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VB; - ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_VVE; - ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.signal[RCS] = GEN6_RVSYNC; - ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[BCS] = GEN6_BVSYNC; - ring->semaphore.mbox.signal[VECS] = GEN6_VEVSYNC; - ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; } else { ring->mmio_base = BSD_RING_BASE; ring->flush = bsd_ring_flush; @@ -2283,30 +2325,47 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) ring->irq_get = gen8_ring_get_irq; ring->irq_put = gen8_ring_put_irq; ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; + if (i915_semaphore_is_enabled(dev)) { + ring->semaphore.sync_to = gen6_ring_sync; + ring->semaphore.signal = gen6_signal; + ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC; + ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC; + ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC; + ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC; + ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; + } } else { ring->irq_enable_mask = GT_BLT_USER_INTERRUPT; ring->irq_get = gen6_ring_get_irq; ring->irq_put = gen6_ring_put_irq; ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; + if (i915_semaphore_is_enabled(dev)) { + ring->semaphore.signal = gen6_signal; + ring->semaphore.sync_to = gen6_ring_sync; + /* + * The current semaphore is only applied on pre-gen8 + * platform. And there is no VCS2 ring on the pre-gen8 + * platform. So the semaphore between BCS and VCS2 is + * initialized as INVALID. Gen8 will initialize the + * sema between BCS and VCS2 later. + */ + ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_BR; + ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_BV; + ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_BVE; + ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.signal[RCS] = GEN6_RBSYNC; + ring->semaphore.mbox.signal[VCS] = GEN6_VBSYNC; + ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC; + ring->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC; + ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; + } } - ring->semaphore.sync_to = gen6_ring_sync; - ring->semaphore.signal = gen6_signal; - /* - * The current semaphore is only applied on pre-gen8 platform. And - * there is no VCS2 ring on the pre-gen8 platform. So the semaphore - * between BCS and VCS2 is initialized as INVALID. - * Gen8 will initialize the sema between BCS and VCS2 later. - */ - ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_BR; - ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_BV; - ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_BVE; - ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.signal[RCS] = GEN6_RBSYNC; - ring->semaphore.mbox.signal[VCS] = GEN6_VBSYNC; - ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC; - ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; ring->init = init_ring_common; return intel_init_ring_buffer(dev, ring); @@ -2333,24 +2392,40 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) ring->irq_get = gen8_ring_get_irq; ring->irq_put = gen8_ring_put_irq; ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; + if (i915_semaphore_is_enabled(dev)) { + ring->semaphore.sync_to = gen6_ring_sync; + ring->semaphore.signal = gen6_signal; + ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC; + ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC; + ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC; + ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC; + ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; + } } else { ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; ring->irq_get = hsw_vebox_get_irq; ring->irq_put = hsw_vebox_put_irq; ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; + if (i915_semaphore_is_enabled(dev)) { + ring->semaphore.sync_to = gen6_ring_sync; + ring->semaphore.signal = gen6_signal; + ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VER; + ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_VEV; + ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VEB; + ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; + ring->semaphore.mbox.signal[RCS] = GEN6_RVESYNC; + ring->semaphore.mbox.signal[VCS] = GEN6_VVESYNC; + ring->semaphore.mbox.signal[BCS] = GEN6_BVESYNC; + ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC; + ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; + } } - ring->semaphore.sync_to = gen6_ring_sync; - ring->semaphore.signal = gen6_signal; - ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VER; - ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_VEV; - ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VEB; - ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.signal[RCS] = GEN6_RVESYNC; - ring->semaphore.mbox.signal[VCS] = GEN6_VVESYNC; - ring->semaphore.mbox.signal[BCS] = GEN6_BVESYNC; - ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; ring->init = init_ring_common; return intel_init_ring_buffer(dev, ring); -- cgit v1.2.3 From a1444b79fec2e95b705c5505c1f609306538dc49 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Mon, 30 Jun 2014 09:53:35 -0700 Subject: drm/i915: Make semaphore updates more precise MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the ring mask we now have an easy way to know the number of rings in the system, and therefore can accurately predict the number of dwords to emit for semaphore signalling. This was not possible (easily) previously. There should be no functional impact, simply fewer instructions emitted. While we're here, simply do the round up to 2 instead of the fancier rounding we did before, which rounding up per mbox, ie 4. This also allows us to drop the unnecessary MI_NOOP, so not really 4, 3. v2: Use 3 dwords instead of 4 (Ville) Do the proper calculation to get the number of dwords to emit (Ville) Conditionally set .sync_to when semaphores are enabled (Ville) v3: Rebased on VCS2 Replace hweight_long with hweight32 (Ville) v4: Pull out the accidentally squashed hunk from the next patch after rebase (Daniel). v5: Fix conflict after rebase (Rodrigo) Reviewed-by: Rodrigo Vivi Reviewed-by: Ville Syrjälä (v1) Signed-off-by: Ben Widawsky Signed-off-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 7ca1b946d8de..e9c8814bed36 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -679,23 +679,16 @@ static int gen6_signal(struct intel_engine_cs *signaller, struct drm_device *dev = signaller->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *useless; - int i, ret; + int i, ret, num_rings; - /* NB: In order to be able to do semaphore MBOX updates for varying - * number of rings, it's easiest if we round up each individual update - * to a multiple of 2 (since ring updates must always be a multiple of - * 2) even though the actual update only requires 3 dwords. - */ -#define MBOX_UPDATE_DWORDS 4 - if (i915_semaphore_is_enabled(dev)) - num_dwords += ((I915_NUM_RINGS-1) * MBOX_UPDATE_DWORDS); - else - return intel_ring_begin(signaller, num_dwords); +#define MBOX_UPDATE_DWORDS 3 + num_rings = hweight32(INTEL_INFO(dev)->ring_mask); + num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2); +#undef MBOX_UPDATE_DWORDS ret = intel_ring_begin(signaller, num_dwords); if (ret) return ret; -#undef MBOX_UPDATE_DWORDS for_each_ring(useless, dev_priv, i) { u32 mbox_reg = signaller->semaphore.mbox.signal[i]; @@ -703,15 +696,13 @@ static int gen6_signal(struct intel_engine_cs *signaller, intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit(signaller, mbox_reg); intel_ring_emit(signaller, signaller->outstanding_lazy_seqno); - intel_ring_emit(signaller, MI_NOOP); - } else { - intel_ring_emit(signaller, MI_NOOP); - intel_ring_emit(signaller, MI_NOOP); - intel_ring_emit(signaller, MI_NOOP); - intel_ring_emit(signaller, MI_NOOP); } } + /* If num_dwords was rounded, make sure the tail pointer is correct */ + if (num_rings % 2 == 0) + intel_ring_emit(signaller, MI_NOOP); + return 0; } -- cgit v1.2.3 From 3e78998a588c64d6447849deabf630e837505a24 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Mon, 30 Jun 2014 09:53:37 -0700 Subject: drm/i915/bdw: implement semaphore signal Semaphore signalling works similarly to previous GENs with the exception that the per ring mailboxes no longer exist. Instead you must define your own space, somewhere in the GTT. The comments in the code define the layout I've opted for, which should be fairly future proof. Ie. I tried to define offsets in abstract terms (NUM_RINGS, seqno size, etc). NOTE: If one wanted to move this to the HWSP they could. I've decided one 4k object would be easier to deal with, and provide potential wins with cache locality, but that's all speculative. v2: Update the macro to not need the other ring's ring->id (Chris) Update the comment to use the correct formula (Chris) v3: Move the macros the ringbuffer.h to prevent churn in next patch (Ville) v4: Fixed compilation rebase conflict commit 1ec9e26ddab06459e89a890431b2de064c5d1056 Author: Daniel Vetter Date: Fri Feb 14 14:01:11 2014 +0100 drm/i915: Consolidate binding parameters into flags v5: VCS2 rebase Replace hweight_long with hweight32 v6 (Rodrigo): * Add missed VC2 gen8 ring signal init * fixing conflicst on rebase * minor fixes on address table * remove WARN_ON Reviewed-by: Rodrigo Vivi Signed-off-by: Ben Widawsky Signed-off-by: Rodrigo Vivi [danvet: s/BUG_ON/WARN_ON/] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_reg.h | 5 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 185 +++++++++++++++++++------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 78 ++++++++++++-- 4 files changed, 189 insertions(+), 80 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 204de4032a4a..7ef67b656e2f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1375,6 +1375,7 @@ struct drm_i915_private { struct pci_dev *bridge_dev; struct intel_engine_cs ring[I915_NUM_RINGS]; + struct drm_i915_gem_object *semaphore_obj; uint32_t last_seqno, next_seqno; drm_dma_handle_t *status_page_dmah; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 66151ff1535d..218ca7b7bb7c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -240,7 +240,7 @@ #define MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19) #define MI_DISPLAY_FLIP_IVB_PLANE_C (4 << 19) #define MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19) -#define MI_SEMAPHORE_MBOX MI_INSTR(0x16, 1) /* gen6+ */ +#define MI_SEMAPHORE_MBOX MI_INSTR(0x16, 1) /* gen6, gen7 */ #define MI_SEMAPHORE_GLOBAL_GTT (1<<22) #define MI_SEMAPHORE_UPDATE (1<<21) #define MI_SEMAPHORE_COMPARE (1<<20) @@ -266,6 +266,8 @@ #define MI_RESTORE_EXT_STATE_EN (1<<2) #define MI_FORCE_RESTORE (1<<1) #define MI_RESTORE_INHIBIT (1<<0) +#define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */ +#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) #define MI_MEM_VIRTUAL (1 << 22) /* 965+ only */ #define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) @@ -360,6 +362,7 @@ #define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */ #define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) #define PIPE_CONTROL_NOTIFY (1<<8) +#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */ #define PIPE_CONTROL_VF_CACHE_INVALIDATE (1<<4) #define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1<<3) #define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1<<2) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e9c8814bed36..e1aac25fc84c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -660,6 +660,13 @@ static int init_render_ring(struct intel_engine_cs *ring) static void render_ring_cleanup(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + if (dev_priv->semaphore_obj) { + i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj); + drm_gem_object_unreference(&dev_priv->semaphore_obj->base); + dev_priv->semaphore_obj = NULL; + } if (ring->scratch.obj == NULL) return; @@ -673,6 +680,80 @@ static void render_ring_cleanup(struct intel_engine_cs *ring) ring->scratch.obj = NULL; } +static int gen8_rcs_signal(struct intel_engine_cs *signaller, + unsigned int num_dwords) +{ +#define MBOX_UPDATE_DWORDS 8 + struct drm_device *dev = signaller->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *waiter; + int i, ret, num_rings; + + num_rings = hweight32(INTEL_INFO(dev)->ring_mask); + num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; +#undef MBOX_UPDATE_DWORDS + + ret = intel_ring_begin(signaller, num_dwords); + if (ret) + return ret; + + for_each_ring(waiter, dev_priv, i) { + u64 gtt_offset = signaller->semaphore.signal_ggtt[i]; + if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) + continue; + + intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_FLUSH_ENABLE); + intel_ring_emit(signaller, lower_32_bits(gtt_offset)); + intel_ring_emit(signaller, upper_32_bits(gtt_offset)); + intel_ring_emit(signaller, signaller->outstanding_lazy_seqno); + intel_ring_emit(signaller, 0); + intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->id)); + intel_ring_emit(signaller, 0); + } + + return 0; +} + +static int gen8_xcs_signal(struct intel_engine_cs *signaller, + unsigned int num_dwords) +{ +#define MBOX_UPDATE_DWORDS 6 + struct drm_device *dev = signaller->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *waiter; + int i, ret, num_rings; + + num_rings = hweight32(INTEL_INFO(dev)->ring_mask); + num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; +#undef MBOX_UPDATE_DWORDS + + ret = intel_ring_begin(signaller, num_dwords); + if (ret) + return ret; + + for_each_ring(waiter, dev_priv, i) { + u64 gtt_offset = signaller->semaphore.signal_ggtt[i]; + if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) + continue; + + intel_ring_emit(signaller, (MI_FLUSH_DW + 1) | + MI_FLUSH_DW_OP_STOREDW); + intel_ring_emit(signaller, lower_32_bits(gtt_offset) | + MI_FLUSH_DW_USE_GTT); + intel_ring_emit(signaller, upper_32_bits(gtt_offset)); + intel_ring_emit(signaller, signaller->outstanding_lazy_seqno); + intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->id)); + intel_ring_emit(signaller, 0); + } + + return 0; +} + static int gen6_signal(struct intel_engine_cs *signaller, unsigned int num_dwords) { @@ -1942,12 +2023,30 @@ int intel_init_render_ring_buffer(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring = &dev_priv->ring[RCS]; + struct drm_i915_gem_object *obj; + int ret; ring->name = "render ring"; ring->id = RCS; ring->mmio_base = RENDER_RING_BASE; if (INTEL_INFO(dev)->gen >= 8) { + if (i915_semaphore_is_enabled(dev)) { + obj = i915_gem_alloc_object(dev, 4096); + if (obj == NULL) { + DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n"); + i915.semaphores = 0; + } else { + i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK); + if (ret != 0) { + drm_gem_object_unreference(&obj->base); + DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n"); + i915.semaphores = 0; + } else + dev_priv->semaphore_obj = obj; + } + } ring->add_request = gen6_add_request; ring->flush = gen8_render_ring_flush; ring->irq_get = gen8_ring_get_irq; @@ -1956,18 +2055,10 @@ int intel_init_render_ring_buffer(struct drm_device *dev) ring->get_seqno = gen6_ring_get_seqno; ring->set_seqno = ring_set_seqno; if (i915_semaphore_is_enabled(dev)) { + WARN_ON(!dev_priv->semaphore_obj); ring->semaphore.sync_to = gen6_ring_sync; - ring->semaphore.signal = gen6_signal; - ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; + ring->semaphore.signal = gen8_rcs_signal; + GEN8_RING_SEMAPHORE_INIT; } } else if (INTEL_INFO(dev)->gen >= 6) { ring->add_request = gen6_add_request; @@ -2045,9 +2136,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev) /* Workaround batchbuffer to combat CS tlb bug. */ if (HAS_BROKEN_CS_TLB(dev)) { - struct drm_i915_gem_object *obj; - int ret; - obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT); if (obj == NULL) { DRM_ERROR("Failed to allocate batch bo\n"); @@ -2180,25 +2268,8 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) gen8_ring_dispatch_execbuffer; if (i915_semaphore_is_enabled(dev)) { ring->semaphore.sync_to = gen6_ring_sync; - ring->semaphore.signal = gen6_signal; - /* - * The current semaphore is only applied on - * pre-gen8 platform. And there is no VCS2 ring - * on the pre-gen8 platform. So the semaphore - * between VCS and VCS2 is initialized as - * INVALID. Gen8 will initialize the sema - * between VCS2 and VCS later. - */ - ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; + ring->semaphore.signal = gen8_xcs_signal; + GEN8_RING_SEMAPHORE_INIT; } } else { ring->irq_enable_mask = GT_BSD_USER_INTERRUPT; @@ -2273,24 +2344,10 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; ring->semaphore.sync_to = gen6_ring_sync; - ring->semaphore.signal = gen6_signal; - /* - * The current semaphore is only applied on the pre-gen8. And there - * is no bsd2 ring on the pre-gen8. So now the semaphore_register - * between VCS2 and other ring is initialized as invalid. - * Gen8 will initialize the sema between VCS2 and other ring later. - */ - ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; - + if (i915_semaphore_is_enabled(dev)) { + ring->semaphore.signal = gen8_xcs_signal; + GEN8_RING_SEMAPHORE_INIT; + } ring->init = init_ring_common; return intel_init_ring_buffer(dev, ring); @@ -2318,17 +2375,8 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; if (i915_semaphore_is_enabled(dev)) { ring->semaphore.sync_to = gen6_ring_sync; - ring->semaphore.signal = gen6_signal; - ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; + ring->semaphore.signal = gen8_xcs_signal; + GEN8_RING_SEMAPHORE_INIT; } } else { ring->irq_enable_mask = GT_BLT_USER_INTERRUPT; @@ -2385,17 +2433,8 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; if (i915_semaphore_is_enabled(dev)) { ring->semaphore.sync_to = gen6_ring_sync; - ring->semaphore.signal = gen6_signal; - ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; - ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC; - ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; + ring->semaphore.signal = gen8_xcs_signal; + GEN8_RING_SEMAPHORE_INIT; } } else { ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 2e8b51645bcb..17ef9cdf1b8d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -40,6 +40,32 @@ struct intel_hw_status_page { #define I915_READ_MODE(ring) I915_READ(RING_MI_MODE((ring)->mmio_base)) #define I915_WRITE_MODE(ring, val) I915_WRITE(RING_MI_MODE((ring)->mmio_base), val) +/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to + * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. + */ +#define i915_semaphore_seqno_size sizeof(uint64_t) +#define GEN8_SIGNAL_OFFSET(__ring, to) \ + (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \ + ((__ring)->id * I915_NUM_RINGS * i915_semaphore_seqno_size) + \ + (i915_semaphore_seqno_size * (to))) + +#define GEN8_WAIT_OFFSET(__ring, from) \ + (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \ + ((from) * I915_NUM_RINGS * i915_semaphore_seqno_size) + \ + (i915_semaphore_seqno_size * (__ring)->id)) + +#define GEN8_RING_SEMAPHORE_INIT do { \ + if (!dev_priv->semaphore_obj) { \ + break; \ + } \ + ring->semaphore.signal_ggtt[RCS] = GEN8_SIGNAL_OFFSET(ring, RCS); \ + ring->semaphore.signal_ggtt[VCS] = GEN8_SIGNAL_OFFSET(ring, VCS); \ + ring->semaphore.signal_ggtt[BCS] = GEN8_SIGNAL_OFFSET(ring, BCS); \ + ring->semaphore.signal_ggtt[VECS] = GEN8_SIGNAL_OFFSET(ring, VECS); \ + ring->semaphore.signal_ggtt[VCS2] = GEN8_SIGNAL_OFFSET(ring, VCS2); \ + ring->semaphore.signal_ggtt[ring->id] = MI_SEMAPHORE_SYNC_INVALID; \ + } while(0) + enum intel_ring_hangcheck_action { HANGCHECK_IDLE = 0, HANGCHECK_WAIT, @@ -127,15 +153,55 @@ struct intel_engine_cs { #define I915_DISPATCH_PINNED 0x2 void (*cleanup)(struct intel_engine_cs *ring); + /* GEN8 signal/wait table - never trust comments! + * signal to signal to signal to signal to signal to + * RCS VCS BCS VECS VCS2 + * -------------------------------------------------------------------- + * RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) | + * |------------------------------------------------------------------- + * VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) | + * |------------------------------------------------------------------- + * BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) | + * |------------------------------------------------------------------- + * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) | NOP (0x90) | VCS2 (0x98) | + * |------------------------------------------------------------------- + * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP (0xc0) | + * |------------------------------------------------------------------- + * + * Generalization: + * f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id) + * ie. transpose of g(x, y) + * + * sync from sync from sync from sync from sync from + * RCS VCS BCS VECS VCS2 + * -------------------------------------------------------------------- + * RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) | + * |------------------------------------------------------------------- + * VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) | + * |------------------------------------------------------------------- + * BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) | + * |------------------------------------------------------------------- + * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) | NOP (0x90) | VCS2 (0xb8) | + * |------------------------------------------------------------------- + * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) | NOP (0xc0) | + * |------------------------------------------------------------------- + * + * Generalization: + * g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id) + * ie. transpose of f(x, y) + */ struct { u32 sync_seqno[I915_NUM_RINGS-1]; - struct { - /* our mbox written by others */ - u32 wait[I915_NUM_RINGS]; - /* mboxes this ring signals to */ - u32 signal[I915_NUM_RINGS]; - } mbox; + union { + struct { + /* our mbox written by others */ + u32 wait[I915_NUM_RINGS]; + /* mboxes this ring signals to */ + u32 signal[I915_NUM_RINGS]; + } mbox; + u64 signal_ggtt[I915_NUM_RINGS]; + }; /* AKA wait() */ int (*sync_to)(struct intel_engine_cs *ring, -- cgit v1.2.3 From 5ee426ca135c91e071d23853e876b957526841c5 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Mon, 30 Jun 2014 09:53:38 -0700 Subject: drm/i915/bdw: implement semaphore wait Semaphore waits use a new instruction, MI_SEMAPHORE_WAIT. The seqno to wait on is all well defined by the table in the previous patch. There is nothing else different from previous GEN's semaphore synchronization code. v2: Update macros to not require the other ring's ring->id (Chris) v3: Add missing VCS2 gen8_ring_wait init besides s/ring_buffer/engine_cs (Rodrigo) Reviewed-by: Rodrigo Vivi Signed-off-by: Ben Widawsky Signed-off-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 3 +++ drivers/gpu/drm/i915/intel_ringbuffer.c | 35 ++++++++++++++++++++++++++++----- 2 files changed, 33 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 218ca7b7bb7c..8c0f70de9fd7 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -268,6 +268,9 @@ #define MI_RESTORE_INHIBIT (1<<0) #define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */ #define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) +#define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ +#define MI_SEMAPHORE_POLL (1<<15) +#define MI_SEMAPHORE_SAD_GTE_SDD (1<<12) #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) #define MI_MEM_VIRTUAL (1 << 22) /* 965+ only */ #define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e1aac25fc84c..88196808cc40 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -832,6 +832,31 @@ static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev, * @signaller - ring which has, or will signal * @seqno - seqno which the waiter will block on */ + +static int +gen8_ring_sync(struct intel_engine_cs *waiter, + struct intel_engine_cs *signaller, + u32 seqno) +{ + struct drm_i915_private *dev_priv = waiter->dev->dev_private; + int ret; + + ret = intel_ring_begin(waiter, 4); + if (ret) + return ret; + + intel_ring_emit(waiter, MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_SAD_GTE_SDD); + intel_ring_emit(waiter, seqno); + intel_ring_emit(waiter, + lower_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id))); + intel_ring_emit(waiter, + upper_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id))); + intel_ring_advance(waiter); + return 0; +} + static int gen6_ring_sync(struct intel_engine_cs *waiter, struct intel_engine_cs *signaller, @@ -2056,7 +2081,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) ring->set_seqno = ring_set_seqno; if (i915_semaphore_is_enabled(dev)) { WARN_ON(!dev_priv->semaphore_obj); - ring->semaphore.sync_to = gen6_ring_sync; + ring->semaphore.sync_to = gen8_ring_sync; ring->semaphore.signal = gen8_rcs_signal; GEN8_RING_SEMAPHORE_INIT; } @@ -2267,7 +2292,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; if (i915_semaphore_is_enabled(dev)) { - ring->semaphore.sync_to = gen6_ring_sync; + ring->semaphore.sync_to = gen8_ring_sync; ring->semaphore.signal = gen8_xcs_signal; GEN8_RING_SEMAPHORE_INIT; } @@ -2343,8 +2368,8 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) ring->irq_put = gen8_ring_put_irq; ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; - ring->semaphore.sync_to = gen6_ring_sync; if (i915_semaphore_is_enabled(dev)) { + ring->semaphore.sync_to = gen8_ring_sync; ring->semaphore.signal = gen8_xcs_signal; GEN8_RING_SEMAPHORE_INIT; } @@ -2374,7 +2399,7 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) ring->irq_put = gen8_ring_put_irq; ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; if (i915_semaphore_is_enabled(dev)) { - ring->semaphore.sync_to = gen6_ring_sync; + ring->semaphore.sync_to = gen8_ring_sync; ring->semaphore.signal = gen8_xcs_signal; GEN8_RING_SEMAPHORE_INIT; } @@ -2432,7 +2457,7 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) ring->irq_put = gen8_ring_put_irq; ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; if (i915_semaphore_is_enabled(dev)) { - ring->semaphore.sync_to = gen6_ring_sync; + ring->semaphore.sync_to = gen8_ring_sync; ring->semaphore.signal = gen8_xcs_signal; GEN8_RING_SEMAPHORE_INIT; } -- cgit v1.2.3 From bae4fcd2c7b778a3f69e03e244221cd4932b8f67 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Mon, 30 Jun 2014 09:53:43 -0700 Subject: drm/i915/bdw: poll semaphores As Ville points out, it's possible/probable we don't actually need this. Potentially, this validates the letter of the spec, and not the spirit. Ville: > I discussed this on irc w/ Ben, and I was suggesting we don't need to > poll. Polling apparently can be used as a workaround for certain > hardware issues, but it looks like those issues shouldn't affect us, > for the momemnt at least. So my suggestion was to try w/o polling > first (since there could be some power cost to polling) and add the > poll bit if problems arise. Rodrigo: Spec suggests this as an W/A for GT3. However semaphores didn't worked in my BDW GT2 on Signal Mode. So pool mode is definitely needed. Reviewed-by: Rodrigo Vivi Tested-by: Rodrigo Vivi Signed-off-by: Ben Widawsky Signed-off-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 88196808cc40..e18ed05dc0d5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -847,6 +847,7 @@ gen8_ring_sync(struct intel_engine_cs *waiter, intel_ring_emit(waiter, MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_GTE_SDD); intel_ring_emit(waiter, seqno); intel_ring_emit(waiter, -- cgit v1.2.3 From 2919d2913c515fcfef6d50a46aae31128d454ce9 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 3 Jul 2014 16:28:02 +0100 Subject: drm/i915: Extract ringbuffer destroy & generalize alloc to take a ringbuf More prep work: with Execlists, we are going to start creating a lot of extra ringbuffers soon, so these functions are handy. No functional changes. v2: rename allocate/destroy_ring_buffer to alloc/destroy_ringbuffer_obj because the name is more meaningful and to mirror a similar function in the context world: i915_gem_alloc_context_obj(). Change suggested by Brad Volkin. Reviewed-by: Jesse Barnes Signed-off-by: Oscar Mateo Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e18ed05dc0d5..40b17f246a69 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1482,15 +1482,25 @@ static int init_phys_status_page(struct intel_engine_cs *ring) return 0; } -static int allocate_ring_buffer(struct intel_engine_cs *ring) +static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) +{ + if (!ringbuf->obj) + return; + + iounmap(ringbuf->virtual_start); + i915_gem_object_ggtt_unpin(ringbuf->obj); + drm_gem_object_unreference(&ringbuf->obj->base); + ringbuf->obj = NULL; +} + +static int intel_alloc_ringbuffer_obj(struct drm_device *dev, + struct intel_ringbuffer *ringbuf) { - struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_ringbuffer *ringbuf = ring->buffer; struct drm_i915_gem_object *obj; int ret; - if (intel_ring_initialized(ring)) + if (ringbuf->obj) return 0; obj = NULL; @@ -1562,7 +1572,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, goto error; } - ret = allocate_ring_buffer(ring); + ret = intel_alloc_ringbuffer_obj(dev, ringbuf); if (ret) { DRM_ERROR("Failed to allocate ringbuffer %s: %d\n", ring->name, ret); goto error; @@ -1603,11 +1613,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring) intel_stop_ring_buffer(ring); WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0); - iounmap(ringbuf->virtual_start); - - i915_gem_object_ggtt_unpin(ringbuf->obj); - drm_gem_object_unreference(&ringbuf->obj->base); - ringbuf->obj = NULL; + intel_destroy_ringbuffer_obj(ringbuf); ring->preallocated_lazy_request = NULL; ring->outstanding_lazy_seqno = 0; -- cgit v1.2.3 From 64c58f2c48da85223f0db438fe4854bc3ef24353 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 3 Jul 2014 16:28:03 +0100 Subject: drm/i915: Generalize ring_space to take a ringbuf It's simple enough that it doesn't need to know anything about the engine. Trivial change. Reviewed-by: Jesse Barnes Signed-off-by: Oscar Mateo Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 40b17f246a69..738140c0f5ad 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -48,9 +48,8 @@ static inline int __ring_space(int head, int tail, int size) return space; } -static inline int ring_space(struct intel_engine_cs *ring) +static inline int ring_space(struct intel_ringbuffer *ringbuf) { - struct intel_ringbuffer *ringbuf = ring->buffer; return __ring_space(ringbuf->head & HEAD_ADDR, ringbuf->tail, ringbuf->size); } @@ -545,7 +544,7 @@ static int init_ring_common(struct intel_engine_cs *ring) else { ringbuf->head = I915_READ_HEAD(ring); ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR; - ringbuf->space = ring_space(ring); + ringbuf->space = ring_space(ringbuf); ringbuf->last_retired_head = -1; } @@ -1639,7 +1638,7 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n) ringbuf->head = ringbuf->last_retired_head; ringbuf->last_retired_head = -1; - ringbuf->space = ring_space(ring); + ringbuf->space = ring_space(ringbuf); if (ringbuf->space >= n) return 0; } @@ -1662,7 +1661,7 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n) ringbuf->head = ringbuf->last_retired_head; ringbuf->last_retired_head = -1; - ringbuf->space = ring_space(ring); + ringbuf->space = ring_space(ringbuf); return 0; } @@ -1691,7 +1690,7 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n) trace_i915_ring_wait_begin(ring); do { ringbuf->head = I915_READ_HEAD(ring); - ringbuf->space = ring_space(ring); + ringbuf->space = ring_space(ringbuf); if (ringbuf->space >= n) { ret = 0; break; @@ -1743,7 +1742,7 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring) iowrite32(MI_NOOP, virt++); ringbuf->tail = 0; - ringbuf->space = ring_space(ring); + ringbuf->space = ring_space(ringbuf); return 0; } -- cgit v1.2.3 From 1f767e02d69f6a37a86aa94fd2c7e2528ae1161e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 3 Jul 2014 17:33:03 -0400 Subject: drm/i915: HWS must be in the mappable region for g33 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On g33, the documentation states "HWS_PGA: Format = Bits 28:12 of graphics memory address (bits 31:29 MBZ)." which translates to that the address of the HWS must be below 256MiB, which is conveniently the mappable aperture. This also appears to be true (but not documented as so) for gen4 and gen5. To generalise we force it into the low mappable region for all non-LLC platforms. If we locate the HWS at the top of the GTT the machine will hard hang during boot (fails on pnv, gm45, ilk and byt, but works on snb, ivb, hsw). v2: Add comments to explain why use PIN_MAPPABLE even though we have no intention of mapping the object. (Ville) Signed-off-by: Chris Wilson Cc: Ville Syrjälä Signed-off-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 738140c0f5ad..599709e80a16 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1432,6 +1432,7 @@ static int init_status_page(struct intel_engine_cs *ring) struct drm_i915_gem_object *obj; if ((obj = ring->status_page.obj) == NULL) { + unsigned flags; int ret; obj = i915_gem_alloc_object(ring->dev, 4096); @@ -1444,7 +1445,20 @@ static int init_status_page(struct intel_engine_cs *ring) if (ret) goto err_unref; - ret = i915_gem_obj_ggtt_pin(obj, 4096, 0); + flags = 0; + if (!HAS_LLC(ring->dev)) + /* On g33, we cannot place HWS above 256MiB, so + * restrict its pinning to the low mappable arena. + * Though this restriction is not documented for + * gen4, gen5, or byt, they also behave similarly + * and hang if the HWS is placed at the top of the + * GTT. To generalise, it appears that all !llc + * platforms have issues with us placing the HWS + * above the mappable region (even though we never + * actualy map it). + */ + flags |= PIN_MAPPABLE; + ret = i915_gem_obj_ggtt_pin(obj, 4096, flags); if (ret) { err_unref: drm_gem_object_unreference(&obj->base); -- cgit v1.2.3 From 480c80338618867851659710d1a27c9cc85833d2 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 16 Jul 2014 09:49:40 +0200 Subject: drm/i915: Use genX_ prefix for gt irq enable/disable functions Traditionally we use genX_ for GT/render stuff and the codenames for display stuff. But the gt and pm interrupt handling functions on gen5/6+ stuck out as exceptions, so convert them. Looking at the diff this nicely realigns our ducks since almost all the callers are already platform-specific functions following the genX_ pattern. Spotted while reviewing some internal rps patches. No function change in this patch. Acked-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 24 ++++++++++++------------ drivers/gpu/drm/i915/intel_drv.h | 12 ++++++------ drivers/gpu/drm/i915/intel_pm.c | 4 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 12 ++++++------ 4 files changed, 26 insertions(+), 26 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7d61ca2a01df..dfe923a3cb92 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -182,12 +182,12 @@ static void ilk_update_gt_irq(struct drm_i915_private *dev_priv, POSTING_READ(GTIMR); } -void ilk_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask) +void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask) { ilk_update_gt_irq(dev_priv, mask, mask); } -void ilk_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask) +void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask) { ilk_update_gt_irq(dev_priv, mask, 0); } @@ -220,12 +220,12 @@ static void snb_update_pm_irq(struct drm_i915_private *dev_priv, } } -void snb_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask) +void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask) { snb_update_pm_irq(dev_priv, mask, mask); } -void snb_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask) +void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask) { snb_update_pm_irq(dev_priv, mask, 0); } @@ -278,12 +278,12 @@ static void bdw_update_pm_irq(struct drm_i915_private *dev_priv, } } -void bdw_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask) +void gen8_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask) { bdw_update_pm_irq(dev_priv, mask, mask); } -void bdw_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask) +void gen8_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask) { bdw_update_pm_irq(dev_priv, mask, 0); } @@ -1408,10 +1408,10 @@ static void gen6_pm_rps_work(struct work_struct *work) pm_iir = dev_priv->rps.pm_iir; dev_priv->rps.pm_iir = 0; if (INTEL_INFO(dev_priv->dev)->gen >= 8) - bdw_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); + gen8_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); else { /* Make sure not to corrupt PMIMR state used by ringbuffer */ - snb_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); + gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); } spin_unlock_irq(&dev_priv->irq_lock); @@ -1553,7 +1553,7 @@ static void ivybridge_parity_work(struct work_struct *work) out: WARN_ON(dev_priv->l3_parity.which_slice); spin_lock_irqsave(&dev_priv->irq_lock, flags); - ilk_enable_gt_irq(dev_priv, GT_PARITY_ERROR(dev_priv->dev)); + gen5_enable_gt_irq(dev_priv, GT_PARITY_ERROR(dev_priv->dev)); spin_unlock_irqrestore(&dev_priv->irq_lock, flags); mutex_unlock(&dev_priv->dev->struct_mutex); @@ -1567,7 +1567,7 @@ static void ivybridge_parity_error_irq_handler(struct drm_device *dev, u32 iir) return; spin_lock(&dev_priv->irq_lock); - ilk_disable_gt_irq(dev_priv, GT_PARITY_ERROR(dev)); + gen5_disable_gt_irq(dev_priv, GT_PARITY_ERROR(dev)); spin_unlock(&dev_priv->irq_lock); iir &= GT_PARITY_ERROR(dev); @@ -1622,7 +1622,7 @@ static void gen8_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) spin_lock(&dev_priv->irq_lock); dev_priv->rps.pm_iir |= pm_iir & dev_priv->pm_rps_events; - bdw_disable_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events); + gen8_disable_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events); spin_unlock(&dev_priv->irq_lock); queue_work(dev_priv->wq, &dev_priv->rps.work); @@ -1969,7 +1969,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) if (pm_iir & dev_priv->pm_rps_events) { spin_lock(&dev_priv->irq_lock); dev_priv->rps.pm_iir |= pm_iir & dev_priv->pm_rps_events; - snb_disable_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events); + gen6_disable_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events); spin_unlock(&dev_priv->irq_lock); queue_work(dev_priv->wq, &dev_priv->rps.work); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 6093ebdeb7cf..9d97a50cae4b 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -683,12 +683,12 @@ bool intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, bool intel_set_pch_fifo_underrun_reporting(struct drm_device *dev, enum transcoder pch_transcoder, bool enable); -void ilk_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); -void ilk_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); -void snb_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask); -void snb_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask); -void bdw_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask); -void bdw_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask); +void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); +void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); +void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask); +void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask); +void gen8_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask); +void gen8_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask); void intel_runtime_pm_disable_interrupts(struct drm_device *dev); void intel_runtime_pm_restore_interrupts(struct drm_device *dev); int intel_get_crtc_scanline(struct intel_crtc *crtc); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6e03851a4fa4..25ae4e6d3dd6 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3474,7 +3474,7 @@ static void gen8_enable_rps_interrupts(struct drm_device *dev) spin_lock_irq(&dev_priv->irq_lock); WARN_ON(dev_priv->rps.pm_iir); - bdw_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); + gen8_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); I915_WRITE(GEN8_GT_IIR(2), dev_priv->pm_rps_events); spin_unlock_irq(&dev_priv->irq_lock); } @@ -3485,7 +3485,7 @@ static void gen6_enable_rps_interrupts(struct drm_device *dev) spin_lock_irq(&dev_priv->irq_lock); WARN_ON(dev_priv->rps.pm_iir); - snb_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); + gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); I915_WRITE(GEN6_PMIIR, dev_priv->pm_rps_events); spin_unlock_irq(&dev_priv->irq_lock); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 599709e80a16..b3d8f766fa7f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1004,7 +1004,7 @@ gen5_ring_get_irq(struct intel_engine_cs *ring) spin_lock_irqsave(&dev_priv->irq_lock, flags); if (ring->irq_refcount++ == 0) - ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask); + gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask); spin_unlock_irqrestore(&dev_priv->irq_lock, flags); return true; @@ -1019,7 +1019,7 @@ gen5_ring_put_irq(struct intel_engine_cs *ring) spin_lock_irqsave(&dev_priv->irq_lock, flags); if (--ring->irq_refcount == 0) - ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask); + gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask); spin_unlock_irqrestore(&dev_priv->irq_lock, flags); } @@ -1212,7 +1212,7 @@ gen6_ring_get_irq(struct intel_engine_cs *ring) GT_PARITY_ERROR(dev))); else I915_WRITE_IMR(ring, ~ring->irq_enable_mask); - ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask); + gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask); } spin_unlock_irqrestore(&dev_priv->irq_lock, flags); @@ -1232,7 +1232,7 @@ gen6_ring_put_irq(struct intel_engine_cs *ring) I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev)); else I915_WRITE_IMR(ring, ~0); - ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask); + gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask); } spin_unlock_irqrestore(&dev_priv->irq_lock, flags); } @@ -1250,7 +1250,7 @@ hsw_vebox_get_irq(struct intel_engine_cs *ring) spin_lock_irqsave(&dev_priv->irq_lock, flags); if (ring->irq_refcount++ == 0) { I915_WRITE_IMR(ring, ~ring->irq_enable_mask); - snb_enable_pm_irq(dev_priv, ring->irq_enable_mask); + gen6_enable_pm_irq(dev_priv, ring->irq_enable_mask); } spin_unlock_irqrestore(&dev_priv->irq_lock, flags); @@ -1270,7 +1270,7 @@ hsw_vebox_put_irq(struct intel_engine_cs *ring) spin_lock_irqsave(&dev_priv->irq_lock, flags); if (--ring->irq_refcount == 0) { I915_WRITE_IMR(ring, ~0); - snb_disable_pm_irq(dev_priv, ring->irq_enable_mask); + gen6_disable_pm_irq(dev_priv, ring->irq_enable_mask); } spin_unlock_irqrestore(&dev_priv->irq_lock, flags); } -- cgit v1.2.3 From 884ceacee308f0e4616d0c933518af2639f7b1d8 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sat, 28 Jun 2014 02:04:20 +0300 Subject: drm/i915: Refactor Broadwell PIPE_CONTROL emission into a helper. We'll want to reuse this for a workaround. Signed-off-by: Kenneth Graunke Reviewed-by: Jesse Barnes [danvet: Rmove now unused int.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 37 ++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b3d8f766fa7f..2908896334f5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -379,13 +379,33 @@ gen7_render_ring_flush(struct intel_engine_cs *ring, return 0; } +static int +gen8_emit_pipe_control(struct intel_engine_cs *ring, + u32 flags, u32 scratch_addr) +{ + int ret; + + ret = intel_ring_begin(ring, 6); + if (ret) + return ret; + + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, flags); + intel_ring_emit(ring, scratch_addr); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); + + return 0; +} + static int gen8_render_ring_flush(struct intel_engine_cs *ring, u32 invalidate_domains, u32 flush_domains) { u32 flags = 0; u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; - int ret; flags |= PIPE_CONTROL_CS_STALL; @@ -404,20 +424,7 @@ gen8_render_ring_flush(struct intel_engine_cs *ring, flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; } - ret = intel_ring_begin(ring, 6); - if (ret) - return ret; - - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); - - return 0; - + return gen8_emit_pipe_control(ring, flags, scratch_addr); } static void ring_write_tail(struct intel_engine_cs *ring, -- cgit v1.2.3 From 02c9f7e3cfe76a7f54ef03438c36aade86cc1c8b Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 27 Jan 2014 14:20:16 -0800 Subject: drm/i915: Add the WaCsStallBeforeStateCacheInvalidate:bdw workaround. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Broadwell, any PIPE_CONTROL with the "State Cache Invalidate" bit set must be preceded by a PIPE_CONTROL with the "CS Stall" bit set. Documented on the BSpec 3D workarounds page. Reviewed-by: Rafael Barbalho Signed-off-by: Kenneth Graunke [vsyrjala: add chv w/a note too] Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 2908896334f5..05969f03c0c1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -406,6 +406,7 @@ gen8_render_ring_flush(struct intel_engine_cs *ring, { u32 flags = 0; u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; + int ret; flags |= PIPE_CONTROL_CS_STALL; @@ -422,6 +423,14 @@ gen8_render_ring_flush(struct intel_engine_cs *ring, flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; flags |= PIPE_CONTROL_QW_WRITE; flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + + /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */ + ret = gen8_emit_pipe_control(ring, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD, + 0); + if (ret) + return ret; } return gen8_emit_pipe_control(ring, flags, scratch_addr); -- cgit v1.2.3 From ece4a17d237a79f63fbfaf3f724a12b6d500555c Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 7 Aug 2014 16:29:53 +0200 Subject: drm/i915: read HEAD register back in init_ring_common() to enforce ordering Withtout this, ring initialization fails reliabily during resume with [drm:init_ring_common] *ERROR* render ring initialization failed ctl 0001f001 head ffffff8804 tail 00000000 start 000e4000 This is not a complete fix, but it is verified to make the ring initialization failures during resume much less likely. We were not able to root-cause this bug (likely HW-specific to Gen4 chips) yet. This is therefore used as a ducttape before problem is fully understood and proper fix created, so that people don't suffer from completely unusable systems in the meantime. The discussion and debugging is happening at https://bugs.freedesktop.org/show_bug.cgi?id=76554 Signed-off-by: Jiri Kosina Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 05969f03c0c1..16371a444426 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -532,6 +532,9 @@ static int init_ring_common(struct intel_engine_cs *ring) else ring_setup_phys_status_page(ring); + /* Enforce ordering by reading HEAD register back */ + I915_READ_HEAD(ring); + /* Initialize the ring. This must happen _after_ we've cleared the ring * registers with the above sequence (the readback of the HEAD registers * also enforces ordering), otherwise the hw might lose the new ring -- cgit v1.2.3 From c4d69da167fa967749aeb70bc0e94a457e5d00c1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 8 Sep 2014 14:25:41 +0100 Subject: drm/i915: Evict CS TLBs between batches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Running igt, I was encountering the invalid TLB bug on my 845g, despite that it was using the CS workaround. Examining the w/a buffer in the error state, showed that the copy from the user batch into the workaround itself was suffering from the invalid TLB bug (the first cacheline was broken with the first two words reversed). Time to try a fresh approach. This extends the workaround to write into each page of our scratch buffer in order to overflow the TLB and evict the invalid entries. This could be refined to only do so after we update the GTT, but for simplicity, we do it before each batch. I suspect this supersedes our current workaround, but for safety keep doing both. v2: The magic number shall be 2. This doesn't conclusively prove that it is the mythical TLB bug we've been trying to workaround for so long, that it requires touching a number of pages to prevent the corruption indicates to me that it is TLB related, but the corruption (the reversed cacheline) is more subtle than a TLB bug, where we would expect it to read the wrong page entirely. Oh well, it prevents a reliable hang for me and so probably for others as well. Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Ville Syrjälä Cc: stable@vger.kernel.org Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_reg.h | 12 ++++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 66 +++++++++++++++++++-------------- 2 files changed, 47 insertions(+), 31 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e4d7607da2c4..f29b44c86a2f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -334,16 +334,20 @@ #define GFX_OP_DESTBUFFER_INFO ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1) #define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3)) #define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2) -#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|4) + +#define COLOR_BLT_CMD (2<<29 | 0x40<<22 | (5-2)) +#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|4) #define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) #define XY_MONO_SRC_COPY_IMM_BLT ((2<<29)|(0x71<<22)|5) -#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21) -#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20) +#define BLT_WRITE_A (2<<20) +#define BLT_WRITE_RGB (1<<20) +#define BLT_WRITE_RGBA (BLT_WRITE_RGB | BLT_WRITE_A) #define BLT_DEPTH_8 (0<<24) #define BLT_DEPTH_16_565 (1<<24) #define BLT_DEPTH_16_1555 (2<<24) #define BLT_DEPTH_32 (3<<24) -#define BLT_ROP_GXCOPY (0xcc<<16) +#define BLT_ROP_SRC_COPY (0xcc<<16) +#define BLT_ROP_COLOR_COPY (0xf0<<16) #define XY_SRC_COPY_BLT_SRC_TILED (1<<15) /* 965+ only */ #define XY_SRC_COPY_BLT_DST_TILED (1<<11) /* 965+ only */ #define CMD_OP_DISPLAYBUFFER_INFO ((0x0<<29)|(0x14<<23)|2) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 16371a444426..2d068edd1adc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1363,54 +1363,66 @@ i965_dispatch_execbuffer(struct intel_engine_cs *ring, /* Just userspace ABI convention to limit the wa batch bo to a resonable size */ #define I830_BATCH_LIMIT (256*1024) +#define I830_TLB_ENTRIES (2) +#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) static int i830_dispatch_execbuffer(struct intel_engine_cs *ring, u64 offset, u32 len, unsigned flags) { + u32 cs_offset = ring->scratch.gtt_offset; int ret; - if (flags & I915_DISPATCH_PINNED) { - ret = intel_ring_begin(ring, 4); - if (ret) - return ret; + ret = intel_ring_begin(ring, 6); + if (ret) + return ret; - intel_ring_emit(ring, MI_BATCH_BUFFER); - intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); - intel_ring_emit(ring, offset + len - 8); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); - } else { - u32 cs_offset = ring->scratch.gtt_offset; + /* Evict the invalid PTE TLBs */ + intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA); + intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096); + intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */ + intel_ring_emit(ring, cs_offset); + intel_ring_emit(ring, 0xdeadbeef); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + if ((flags & I915_DISPATCH_PINNED) == 0) { if (len > I830_BATCH_LIMIT) return -ENOSPC; - ret = intel_ring_begin(ring, 9+3); + ret = intel_ring_begin(ring, 6 + 2); if (ret) return ret; - /* Blit the batch (which has now all relocs applied) to the stable batch - * scratch bo area (so that the CS never stumbles over its tlb - * invalidation bug) ... */ - intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD | - XY_SRC_COPY_BLT_WRITE_ALPHA | - XY_SRC_COPY_BLT_WRITE_RGB); - intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024); + + /* Blit the batch (which has now all relocs applied) to the + * stable batch scratch bo area (so that the CS never + * stumbles over its tlb invalidation bug) ... + */ + intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA); + intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096); + intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 1024); intel_ring_emit(ring, cs_offset); - intel_ring_emit(ring, 0); intel_ring_emit(ring, 4096); intel_ring_emit(ring, offset); + intel_ring_emit(ring, MI_FLUSH); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); /* ... and execute it. */ - intel_ring_emit(ring, MI_BATCH_BUFFER); - intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); - intel_ring_emit(ring, cs_offset + len - 8); - intel_ring_advance(ring); + offset = cs_offset; } + ret = intel_ring_begin(ring, 4); + if (ret) + return ret; + + intel_ring_emit(ring, MI_BATCH_BUFFER); + intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); + intel_ring_emit(ring, offset + len - 8); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + return 0; } @@ -2200,7 +2212,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) /* Workaround batchbuffer to combat CS tlb bug. */ if (HAS_BROKEN_CS_TLB(dev)) { - obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT); + obj = i915_gem_alloc_object(dev, I830_WA_SIZE); if (obj == NULL) { DRM_ERROR("Failed to allocate batch bo\n"); return -ENOMEM; -- cgit v1.2.3 From 611a7a4fd8b5fb6b25ab1f8bdcde61800a7feacf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 12 Sep 2014 07:37:42 +0100 Subject: drm/i915: Fix SRC_COPY width on 830/845g One small change I forgot to make in commit c4d69da167fa967749aeb70bc0e94a457e5d00c1 Author: Chris Wilson Date: Mon Sep 8 14:25:41 2014 +0100 drm/i915: Evict CS TLBs between batches was to update the copy width for the compact BLT copy instruction. Reported-by: Thomas Richter Signed-off-by: Chris Wilson Cc: Thomas Richter Cc: Jani Nikula Cc: stable@vger.kernel.org Tested-by: Thomas Richter Acked-by: Daniel Vetter Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 2d068edd1adc..47a126a0493f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1400,7 +1400,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring, */ intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA); intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096); - intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 1024); + intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096); intel_ring_emit(ring, cs_offset); intel_ring_emit(ring, 4096); intel_ring_emit(ring, offset); -- cgit v1.2.3