From 38a8791aa7bfaa497e12e56b904a6b45986162d8 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Thu, 17 Aug 2017 16:37:49 -0400
Subject: drm/amdgpu: Fix huge page updates with CPU
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Correctly detect system memory mappings when using CPU and don't use
huge pages for them.

Avoid incorrectly translating a physical page table GPU address when
splitting a huge page while mapping system memory.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6b1343e5541d..ba475af99332 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1277,7 +1277,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 	/* In the case of a mixed PT the PDE must point to it*/
 	if (p->adev->asic_type < CHIP_VEGA10 ||
 	    nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
-	    p->func == amdgpu_vm_do_copy_ptes ||
+	    p->src ||
 	    !(flags & AMDGPU_PTE_VALID)) {
 
 		dst = amdgpu_bo_gpu_offset(entry->bo);
@@ -1294,9 +1294,23 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 	entry->addr = (dst | flags);
 
 	if (use_cpu_update) {
+		/* In case a huge page is replaced with a system
+		 * memory mapping, p->pages_addr != NULL and
+		 * amdgpu_vm_cpu_set_ptes would try to translate dst
+		 * through amdgpu_vm_map_gart. But dst is already a
+		 * GPU address (of the page table). Disable
+		 * amdgpu_vm_map_gart temporarily.
+		 */
+		dma_addr_t *tmp;
+
+		tmp = p->pages_addr;
+		p->pages_addr = NULL;
+
 		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo);
 		pde = pd_addr + (entry - parent->entries) * 8;
 		amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
+
+		p->pages_addr = tmp;
 	} else {
 		if (parent->bo->shadow) {
 			pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
-- 
cgit v1.2.3


From cf273a59ca3068caced2adaf2deeb44d2013c8a9 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 18 Aug 2017 15:50:17 +0200
Subject: drm/amdgpu: fix and cleanup shadow handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Set the shadow flag on the shadow and not the parent, always bind shadow BOs
during allocation instead of manually, use the reservation_object wrappers
to grab the lock.

This fixes a couple of issues with binding the shadow BOs as well as correctly
evicting them when memory becomes tight.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  6 ----
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 46 +++++++++++++++---------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  8 ------
 3 files changed, 23 insertions(+), 37 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f7ffb029f6d5..e630d918fefc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2622,12 +2622,6 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
 			goto err;
 		}
 
-		r = amdgpu_ttm_bind(&bo->shadow->tbo, &bo->shadow->tbo.mem);
-		if (r) {
-			DRM_ERROR("%p bind failed\n", bo->shadow);
-			goto err;
-		}
-
 		r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
 						 NULL, fence, true);
 		if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index e7e899190bef..9e495da0bb03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -91,7 +91,10 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
 
 	if (domain & AMDGPU_GEM_DOMAIN_GTT) {
 		places[c].fpfn = 0;
-		places[c].lpfn = 0;
+		if (flags & AMDGPU_GEM_CREATE_SHADOW)
+			places[c].lpfn = adev->mc.gart_size >> PAGE_SHIFT;
+		else
+			places[c].lpfn = 0;
 		places[c].flags = TTM_PL_FLAG_TT;
 		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
 			places[c].flags |= TTM_PL_FLAG_WC |
@@ -446,17 +449,16 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
 	if (bo->shadow)
 		return 0;
 
-	bo->flags |= AMDGPU_GEM_CREATE_SHADOW;
-	memset(&placements, 0,
-	       (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place));
-
-	amdgpu_ttm_placement_init(adev, &placement,
-				  placements, AMDGPU_GEM_DOMAIN_GTT,
-				  AMDGPU_GEM_CREATE_CPU_GTT_USWC);
+	memset(&placements, 0, sizeof(placements));
+	amdgpu_ttm_placement_init(adev, &placement, placements,
+				  AMDGPU_GEM_DOMAIN_GTT,
+				  AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+				  AMDGPU_GEM_CREATE_SHADOW);
 
 	r = amdgpu_bo_create_restricted(adev, size, byte_align, true,
 					AMDGPU_GEM_DOMAIN_GTT,
-					AMDGPU_GEM_CREATE_CPU_GTT_USWC,
+					AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+					AMDGPU_GEM_CREATE_SHADOW,
 					NULL, &placement,
 					bo->tbo.resv,
 					0,
@@ -484,30 +486,28 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 {
 	struct ttm_placement placement = {0};
 	struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1];
+	uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW;
 	int r;
 
-	memset(&placements, 0,
-	       (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place));
+	memset(&placements, 0, sizeof(placements));
+	amdgpu_ttm_placement_init(adev, &placement, placements,
+				  domain, parent_flags);
 
-	amdgpu_ttm_placement_init(adev, &placement,
-				  placements, domain, flags);
-
-	r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel,
-					domain, flags, sg, &placement,
-					resv, init_value, bo_ptr);
+	r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel, domain,
+					parent_flags, sg, &placement, resv,
+					init_value, bo_ptr);
 	if (r)
 		return r;
 
-	if (amdgpu_need_backup(adev) && (flags & AMDGPU_GEM_CREATE_SHADOW)) {
-		if (!resv) {
-			r = ww_mutex_lock(&(*bo_ptr)->tbo.resv->lock, NULL);
-			WARN_ON(r != 0);
-		}
+	if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) {
+		if (!resv)
+			WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
+							NULL));
 
 		r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr));
 
 		if (!resv)
-			ww_mutex_unlock(&(*bo_ptr)->tbo.resv->lock);
+			reservation_object_unlock((*bo_ptr)->tbo.resv);
 
 		if (r)
 			amdgpu_bo_unref(bo_ptr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ba475af99332..96ec4e2b56e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -165,14 +165,6 @@ static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,
 	unsigned i;
 	int r;
 
-	if (parent->bo->shadow) {
-		struct amdgpu_bo *shadow = parent->bo->shadow;
-
-		r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
-		if (r)
-			return r;
-	}
-
 	if (use_cpu_for_update) {
 		r = amdgpu_bo_kmap(parent->bo, NULL);
 		if (r)
-- 
cgit v1.2.3


From febb84a60304e7257d42b90cc191ef84279e5152 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Tue, 22 Aug 2017 12:50:46 +0200
Subject: drm/amdgpu: remove the GART copy hack
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This isn't used since we don't map evicted BOs to GART any more.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Roger He <Hongbo.He@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 96ec4e2b56e9..3bd430e180b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1616,7 +1616,6 @@ error_free:
  *
  * @adev: amdgpu_device pointer
  * @exclusive: fence we need to sync to
- * @gtt_flags: flags as they are used for GTT
  * @pages_addr: DMA addresses to use for mapping
  * @vm: requested vm
  * @mapping: mapped range and flags to use for the update
@@ -1630,7 +1629,6 @@ error_free:
  */
 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 				      struct dma_fence *exclusive,
-				      uint64_t gtt_flags,
 				      dma_addr_t *pages_addr,
 				      struct amdgpu_vm *vm,
 				      struct amdgpu_bo_va_mapping *mapping,
@@ -1685,11 +1683,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		}
 
 		if (pages_addr) {
-			if (flags == gtt_flags)
-				src = adev->gart.table_addr +
-					(addr >> AMDGPU_GPU_PAGE_SHIFT) * 8;
-			else
-				max_entries = min(max_entries, 16ull * 1024ull);
+			max_entries = min(max_entries, 16ull * 1024ull);
 			addr = 0;
 		} else if (flags & AMDGPU_PTE_VALID) {
 			addr += adev->vm_manager.vram_base_offset;
@@ -1734,10 +1728,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	struct amdgpu_vm *vm = bo_va->base.vm;
 	struct amdgpu_bo_va_mapping *mapping;
 	dma_addr_t *pages_addr = NULL;
-	uint64_t gtt_flags, flags;
 	struct ttm_mem_reg *mem;
 	struct drm_mm_node *nodes;
 	struct dma_fence *exclusive;
+	uint64_t flags;
 	int r;
 
 	if (clear || !bo_va->base.bo) {
@@ -1757,15 +1751,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 		exclusive = reservation_object_get_excl(bo->tbo.resv);
 	}
 
-	if (bo) {
+	if (bo)
 		flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
-		gtt_flags = (amdgpu_ttm_is_bound(bo->tbo.ttm) &&
-			adev == amdgpu_ttm_adev(bo->tbo.bdev)) ?
-			flags : 0;
-	} else {
+	else
 		flags = 0x0;
-		gtt_flags = ~0x0;
-	}
 
 	spin_lock(&vm->status_lock);
 	if (!list_empty(&bo_va->base.vm_status))
@@ -1773,8 +1762,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	spin_unlock(&vm->status_lock);
 
 	list_for_each_entry(mapping, &bo_va->invalids, list) {
-		r = amdgpu_vm_bo_split_mapping(adev, exclusive,
-					       gtt_flags, pages_addr, vm,
+		r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
 					       mapping, flags, nodes,
 					       &bo_va->last_pt_update);
 		if (r)
-- 
cgit v1.2.3


From 87f64a76b38acaa081b2bc46c3169884b9ccd6f2 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 23 Aug 2017 14:05:48 +0200
Subject: drm/amdgpu: fix amdgpu_vm_bo_map trace point
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

That somehow got lost.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 3bd430e180b5..db63ff5c80f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2111,6 +2111,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 
 	if (flags & AMDGPU_PTE_PRT)
 		amdgpu_vm_prt_get(adev);
+	trace_amdgpu_vm_bo_map(bo_va, mapping);
 
 	return 0;
 }
@@ -2176,6 +2177,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
 
 	if (flags & AMDGPU_PTE_PRT)
 		amdgpu_vm_prt_get(adev);
+	trace_amdgpu_vm_bo_map(bo_va, mapping);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 34d7be5dc28041b95254da517fd0f0f740544ece Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 24 Aug 2017 12:32:55 +0200
Subject: drm/amdgpu: fix and cleanup VM ready check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Stop checking the mapped BO itself, cause that one is
certainly not a page table.

Additional to that move the code into amdgpu_vm.c

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 33 ++-------------------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 32 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  1 +
 3 files changed, 35 insertions(+), 31 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 7171968f261e..9b1b6bdd4841 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -127,35 +127,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
 	return 0;
 }
 
-static int amdgpu_gem_vm_check(void *param, struct amdgpu_bo *bo)
-{
-	/* if anything is swapped out don't swap it in here,
-	   just abort and wait for the next CS */
-	if (!amdgpu_bo_gpu_accessible(bo))
-		return -ERESTARTSYS;
-
-	if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
-		return -ERESTARTSYS;
-
-	return 0;
-}
-
-static bool amdgpu_gem_vm_ready(struct amdgpu_device *adev,
-				struct amdgpu_vm *vm,
-				struct list_head *list)
-{
-	struct ttm_validate_buffer *entry;
-
-	list_for_each_entry(entry, list, head) {
-		struct amdgpu_bo *bo =
-			container_of(entry->bo, struct amdgpu_bo, tbo);
-		if (amdgpu_gem_vm_check(NULL, bo))
-			return false;
-	}
-
-	return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_vm_check, NULL);
-}
-
 void amdgpu_gem_object_close(struct drm_gem_object *obj,
 			     struct drm_file *file_priv)
 {
@@ -189,7 +160,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 	if (bo_va && --bo_va->ref_count == 0) {
 		amdgpu_vm_bo_rmv(adev, bo_va);
 
-		if (amdgpu_gem_vm_ready(adev, vm, &list)) {
+		if (amdgpu_vm_ready(adev, vm)) {
 			struct dma_fence *fence = NULL;
 
 			r = amdgpu_vm_clear_freed(adev, vm, &fence);
@@ -513,7 +484,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 {
 	int r = -ERESTARTSYS;
 
-	if (!amdgpu_gem_vm_ready(adev, vm, list))
+	if (!amdgpu_vm_ready(adev, vm))
 		goto error;
 
 	r = amdgpu_vm_update_directories(adev, vm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index db63ff5c80f2..67c37b22f8ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -231,6 +231,38 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 					adev->mman.bdev.glob);
 }
 
+/**
+ * amdgpu_vm_check - helper for amdgpu_vm_ready
+ */
+static int amdgpu_vm_check(void *param, struct amdgpu_bo *bo)
+{
+	/* if anything is swapped out don't swap it in here,
+	   just abort and wait for the next CS */
+	if (!amdgpu_bo_gpu_accessible(bo))
+		return -ERESTARTSYS;
+
+	if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
+		return -ERESTARTSYS;
+
+	return 0;
+}
+
+/**
+ * amdgpu_vm_ready - check VM is ready for updates
+ *
+ * @adev: amdgpu device
+ * @vm: VM to check
+ *
+ * Check if all VM PDs/PTs are ready for updates
+ */
+bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+	if (amdgpu_vm_check(NULL, vm->root.bo))
+		return false;
+
+	return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_vm_check, NULL);
+}
+
 /**
  * amdgpu_vm_alloc_levels - allocate the PD/PT levels
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index ba6691b58ee7..9347d28c3c1e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -225,6 +225,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
 			 struct amdgpu_bo_list_entry *entry);
+bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			      int (*callback)(void *p, struct amdgpu_bo *bo),
 			      void *param);
-- 
cgit v1.2.3


From 3d7d4d3a1b9f67c0caecf2b2aa8d7c347f074a33 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 23 Aug 2017 16:13:33 +0200
Subject: drm/amdgpu: rework moved handling in the VM v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of using the vm_state use a separate flag to note
that the BO was moved.

v2: reorder patches to avoid temporary lockless access

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 13 ++++++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  3 +++
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 67c37b22f8ef..1b36c62997b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1788,10 +1788,16 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	else
 		flags = 0x0;
 
-	spin_lock(&vm->status_lock);
-	if (!list_empty(&bo_va->base.vm_status))
+	if (!clear && bo_va->base.moved) {
+		bo_va->base.moved = false;
 		list_splice_init(&bo_va->valids, &bo_va->invalids);
-	spin_unlock(&vm->status_lock);
+
+	} else {
+		spin_lock(&vm->status_lock);
+		if (!list_empty(&bo_va->base.vm_status))
+			list_splice_init(&bo_va->valids, &bo_va->invalids);
+		spin_unlock(&vm->status_lock);
+	}
 
 	list_for_each_entry(mapping, &bo_va->invalids, list) {
 		r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
@@ -2419,6 +2425,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 	struct amdgpu_vm_bo_base *bo_base;
 
 	list_for_each_entry(bo_base, &bo->va, bo_list) {
+		bo_base->moved = true;
 		spin_lock(&bo_base->vm->status_lock);
 		if (list_empty(&bo_base->vm_status))
 			list_add(&bo_base->vm_status,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 9347d28c3c1e..1b478e62a948 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -105,6 +105,9 @@ struct amdgpu_vm_bo_base {
 
 	/* protected by spinlock */
 	struct list_head		vm_status;
+
+	/* protected by the BO being reserved */
+	bool				moved;
 };
 
 struct amdgpu_vm_pt {
-- 
cgit v1.2.3


From cb7b6ec2f8b8759b6b5beb4d17ea6984867a3296 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Tue, 15 Aug 2017 17:08:12 +0200
Subject: drm/amdgpu: add bo_va cleared flag again v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We changed this to use an extra list a while back, but for the next
series I need a separate flag again.

v2: reorder to avoid unlocked list access

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 35 +++++++++++-------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     |  3 ---
 3 files changed, 16 insertions(+), 25 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index a288fa6d72c8..e613ba42f167 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -55,6 +55,9 @@ struct amdgpu_bo_va {
 	/* mappings for this bo_va */
 	struct list_head		invalids;
 	struct list_head		valids;
+
+	/* If the mappings are cleared or filled */
+	bool				cleared;
 };
 
 struct amdgpu_bo {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1b36c62997b3..1334bbb82634 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1792,11 +1792,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 		bo_va->base.moved = false;
 		list_splice_init(&bo_va->valids, &bo_va->invalids);
 
-	} else {
-		spin_lock(&vm->status_lock);
-		if (!list_empty(&bo_va->base.vm_status))
-			list_splice_init(&bo_va->valids, &bo_va->invalids);
-		spin_unlock(&vm->status_lock);
+	} else if (bo_va->cleared != clear) {
+		list_splice_init(&bo_va->valids, &bo_va->invalids);
 	}
 
 	list_for_each_entry(mapping, &bo_va->invalids, list) {
@@ -1807,25 +1804,22 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			return r;
 	}
 
-	if (trace_amdgpu_vm_bo_mapping_enabled()) {
-		list_for_each_entry(mapping, &bo_va->valids, list)
-			trace_amdgpu_vm_bo_mapping(mapping);
-
-		list_for_each_entry(mapping, &bo_va->invalids, list)
-			trace_amdgpu_vm_bo_mapping(mapping);
+	if (vm->use_cpu_for_update) {
+		/* Flush HDP */
+		mb();
+		amdgpu_gart_flush_gpu_tlb(adev, 0);
 	}
 
 	spin_lock(&vm->status_lock);
-	list_splice_init(&bo_va->invalids, &bo_va->valids);
 	list_del_init(&bo_va->base.vm_status);
-	if (clear)
-		list_add(&bo_va->base.vm_status, &vm->cleared);
 	spin_unlock(&vm->status_lock);
 
-	if (vm->use_cpu_for_update) {
-		/* Flush HDP */
-		mb();
-		amdgpu_gart_flush_gpu_tlb(adev, 0);
+	list_splice_init(&bo_va->invalids, &bo_va->valids);
+	bo_va->cleared = clear;
+
+	if (trace_amdgpu_vm_bo_mapping_enabled()) {
+		list_for_each_entry(mapping, &bo_va->valids, list)
+			trace_amdgpu_vm_bo_mapping(mapping);
 	}
 
 	return 0;
@@ -2427,9 +2421,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 	list_for_each_entry(bo_base, &bo->va, bo_list) {
 		bo_base->moved = true;
 		spin_lock(&bo_base->vm->status_lock);
-		if (list_empty(&bo_base->vm_status))
-			list_add(&bo_base->vm_status,
-				 &bo_base->vm->moved);
+		list_move(&bo_base->vm_status, &bo_base->vm->moved);
 		spin_unlock(&bo_base->vm->status_lock);
 	}
 }
@@ -2516,7 +2508,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		vm->reserved_vmid[i] = NULL;
 	spin_lock_init(&vm->status_lock);
 	INIT_LIST_HEAD(&vm->moved);
-	INIT_LIST_HEAD(&vm->cleared);
 	INIT_LIST_HEAD(&vm->freed);
 
 	/* create scheduler entity for page table updates */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 1b478e62a948..ff093d4b5e11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -129,9 +129,6 @@ struct amdgpu_vm {
 	/* BOs moved, but not yet updated in the PT */
 	struct list_head	moved;
 
-	/* BOs cleared in the PT because of a move */
-	struct list_head	cleared;
-
 	/* BO mappings freed, but not yet updated in the PT */
 	struct list_head	freed;
 
-- 
cgit v1.2.3


From 3f3333f8a0e90ac26f84ed7b0aa344efce695c08 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 3 Aug 2017 14:02:13 +0200
Subject: drm/amdgpu: track evicted page tables v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of validating all page tables when one was evicted,
track which one needs a validation.

v2: simplify amdgpu_vm_ready as well

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v1)
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     |   7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c    |   8 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 227 +++++++++++++----------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     |  16 +-
 5 files changed, 119 insertions(+), 141 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index d6ddd5562c16..8bf178a912f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -636,9 +636,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 
 	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
 				     p->bytes_moved_vis);
-	fpriv->vm.last_eviction_counter =
-		atomic64_read(&p->adev->num_evictions);
-
 	if (p->bo_list) {
 		struct amdgpu_bo *gds = p->bo_list->gds_obj;
 		struct amdgpu_bo *gws = p->bo_list->gws_obj;
@@ -835,7 +832,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 			if (!bo)
 				continue;
 
-			amdgpu_vm_bo_invalidate(adev, bo);
+			amdgpu_vm_bo_invalidate(adev, bo, false);
 		}
 	}
 
@@ -860,7 +857,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 	}
 
 	if (p->job->vm) {
-		p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.bo);
+		p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
 
 		r = amdgpu_bo_vm_update_pte(p);
 		if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index ba012933e6aa..d02880640ee7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -160,7 +160,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 	if (bo_va && --bo_va->ref_count == 0) {
 		amdgpu_vm_bo_rmv(adev, bo_va);
 
-		if (amdgpu_vm_ready(adev, vm)) {
+		if (amdgpu_vm_ready(vm)) {
 			struct dma_fence *fence = NULL;
 
 			r = amdgpu_vm_clear_freed(adev, vm, &fence);
@@ -481,10 +481,10 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 				    struct list_head *list,
 				    uint32_t operation)
 {
-	int r = -ERESTARTSYS;
+	int r;
 
-	if (!amdgpu_vm_ready(adev, vm))
-		goto error;
+	if (!amdgpu_vm_ready(vm))
+		return;
 
 	r = amdgpu_vm_update_directories(adev, vm);
 	if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 9e495da0bb03..52d0109c0d9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -929,7 +929,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 		return;
 
 	abo = container_of(bo, struct amdgpu_bo, tbo);
-	amdgpu_vm_bo_invalidate(adev, abo);
+	amdgpu_vm_bo_invalidate(adev, abo, evict);
 
 	amdgpu_bo_kunmap(abo);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1334bbb82634..6ff3c1bf035e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -140,7 +140,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
 			 struct amdgpu_bo_list_entry *entry)
 {
-	entry->robj = vm->root.bo;
+	entry->robj = vm->root.base.bo;
 	entry->priority = 0;
 	entry->tv.bo = &entry->robj->tbo;
 	entry->tv.shared = true;
@@ -148,61 +148,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 	list_add(&entry->tv.head, validated);
 }
 
-/**
- * amdgpu_vm_validate_layer - validate a single page table level
- *
- * @parent: parent page table level
- * @validate: callback to do the validation
- * @param: parameter for the validation callback
- *
- * Validate the page table BOs on command submission if neccessary.
- */
-static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,
-				    int (*validate)(void *, struct amdgpu_bo *),
-				    void *param, bool use_cpu_for_update,
-				    struct ttm_bo_global *glob)
-{
-	unsigned i;
-	int r;
-
-	if (use_cpu_for_update) {
-		r = amdgpu_bo_kmap(parent->bo, NULL);
-		if (r)
-			return r;
-	}
-
-	if (!parent->entries)
-		return 0;
-
-	for (i = 0; i <= parent->last_entry_used; ++i) {
-		struct amdgpu_vm_pt *entry = &parent->entries[i];
-
-		if (!entry->bo)
-			continue;
-
-		r = validate(param, entry->bo);
-		if (r)
-			return r;
-
-		spin_lock(&glob->lru_lock);
-		ttm_bo_move_to_lru_tail(&entry->bo->tbo);
-		if (entry->bo->shadow)
-			ttm_bo_move_to_lru_tail(&entry->bo->shadow->tbo);
-		spin_unlock(&glob->lru_lock);
-
-		/*
-		 * Recurse into the sub directory. This is harmless because we
-		 * have only a maximum of 5 layers.
-		 */
-		r = amdgpu_vm_validate_level(entry, validate, param,
-					     use_cpu_for_update, glob);
-		if (r)
-			return r;
-	}
-
-	return r;
-}
-
 /**
  * amdgpu_vm_validate_pt_bos - validate the page table BOs
  *
@@ -217,32 +162,43 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			      int (*validate)(void *p, struct amdgpu_bo *bo),
 			      void *param)
 {
-	uint64_t num_evictions;
+	struct ttm_bo_global *glob = adev->mman.bdev.glob;
+	int r;
 
-	/* We only need to validate the page tables
-	 * if they aren't already valid.
-	 */
-	num_evictions = atomic64_read(&adev->num_evictions);
-	if (num_evictions == vm->last_eviction_counter)
-		return 0;
+	spin_lock(&vm->status_lock);
+	while (!list_empty(&vm->evicted)) {
+		struct amdgpu_vm_bo_base *bo_base;
+		struct amdgpu_bo *bo;
 
-	return amdgpu_vm_validate_level(&vm->root, validate, param,
-					vm->use_cpu_for_update,
-					adev->mman.bdev.glob);
-}
+		bo_base = list_first_entry(&vm->evicted,
+					   struct amdgpu_vm_bo_base,
+					   vm_status);
+		spin_unlock(&vm->status_lock);
 
-/**
- * amdgpu_vm_check - helper for amdgpu_vm_ready
- */
-static int amdgpu_vm_check(void *param, struct amdgpu_bo *bo)
-{
-	/* if anything is swapped out don't swap it in here,
-	   just abort and wait for the next CS */
-	if (!amdgpu_bo_gpu_accessible(bo))
-		return -ERESTARTSYS;
+		bo = bo_base->bo;
+		BUG_ON(!bo);
+		if (bo->parent) {
+			r = validate(param, bo);
+			if (r)
+				return r;
 
-	if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
-		return -ERESTARTSYS;
+			spin_lock(&glob->lru_lock);
+			ttm_bo_move_to_lru_tail(&bo->tbo);
+			if (bo->shadow)
+				ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
+			spin_unlock(&glob->lru_lock);
+		}
+
+		if (vm->use_cpu_for_update) {
+			r = amdgpu_bo_kmap(bo, NULL);
+			if (r)
+				return r;
+		}
+
+		spin_lock(&vm->status_lock);
+		list_del_init(&bo_base->vm_status);
+	}
+	spin_unlock(&vm->status_lock);
 
 	return 0;
 }
@@ -250,17 +206,19 @@ static int amdgpu_vm_check(void *param, struct amdgpu_bo *bo)
 /**
  * amdgpu_vm_ready - check VM is ready for updates
  *
- * @adev: amdgpu device
  * @vm: VM to check
  *
  * Check if all VM PDs/PTs are ready for updates
  */
-bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+bool amdgpu_vm_ready(struct amdgpu_vm *vm)
 {
-	if (amdgpu_vm_check(NULL, vm->root.bo))
-		return false;
+	bool ready;
+
+	spin_lock(&vm->status_lock);
+	ready = list_empty(&vm->evicted);
+	spin_unlock(&vm->status_lock);
 
-	return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_vm_check, NULL);
+	return ready;
 }
 
 /**
@@ -326,11 +284,11 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 
 	/* walk over the address space and allocate the page tables */
 	for (pt_idx = from; pt_idx <= to; ++pt_idx) {
-		struct reservation_object *resv = vm->root.bo->tbo.resv;
+		struct reservation_object *resv = vm->root.base.bo->tbo.resv;
 		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
 		struct amdgpu_bo *pt;
 
-		if (!entry->bo) {
+		if (!entry->base.bo) {
 			r = amdgpu_bo_create(adev,
 					     amdgpu_vm_bo_size(adev, level),
 					     AMDGPU_GPU_PAGE_SIZE, true,
@@ -351,9 +309,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 			/* Keep a reference to the root directory to avoid
 			* freeing them up in the wrong order.
 			*/
-			pt->parent = amdgpu_bo_ref(vm->root.bo);
+			pt->parent = amdgpu_bo_ref(vm->root.base.bo);
 
-			entry->bo = pt;
+			entry->base.vm = vm;
+			entry->base.bo = pt;
+			list_add_tail(&entry->base.bo_list, &pt->va);
+			INIT_LIST_HEAD(&entry->base.vm_status);
 			entry->addr = 0;
 		}
 
@@ -1020,7 +981,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	int r;
 
 	amdgpu_sync_create(&sync);
-	amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.resv, owner);
+	amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner);
 	r = amdgpu_sync_wait(&sync, true);
 	amdgpu_sync_free(&sync);
 
@@ -1059,10 +1020,10 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
-	shadow = parent->bo->shadow;
+	shadow = parent->base.bo->shadow;
 
 	if (vm->use_cpu_for_update) {
-		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo);
+		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
 		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
 		if (unlikely(r))
 			return r;
@@ -1078,7 +1039,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 		/* assume the worst case */
 		ndw += parent->last_entry_used * 6;
 
-		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
+		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
 
 		if (shadow) {
 			shadow_addr = amdgpu_bo_gpu_offset(shadow);
@@ -1098,7 +1059,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 
 	/* walk over the address space and update the directory */
 	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
-		struct amdgpu_bo *bo = parent->entries[pt_idx].bo;
+		struct amdgpu_bo *bo = parent->entries[pt_idx].base.bo;
 		uint64_t pde, pt;
 
 		if (bo == NULL)
@@ -1141,7 +1102,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 	}
 
 	if (count) {
-		if (vm->root.bo->shadow)
+		if (vm->root.base.bo->shadow)
 			params.func(&params, last_shadow, last_pt,
 				    count, incr, AMDGPU_PTE_VALID);
 
@@ -1154,7 +1115,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 			amdgpu_job_free(job);
 		} else {
 			amdgpu_ring_pad_ib(ring, params.ib);
-			amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv,
+			amdgpu_sync_resv(adev, &job->sync,
+					 parent->base.bo->tbo.resv,
 					 AMDGPU_FENCE_OWNER_VM);
 			if (shadow)
 				amdgpu_sync_resv(adev, &job->sync,
@@ -1167,7 +1129,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 			if (r)
 				goto error_free;
 
-			amdgpu_bo_fence(parent->bo, fence, true);
+			amdgpu_bo_fence(parent->base.bo, fence, true);
 			dma_fence_put(vm->last_dir_update);
 			vm->last_dir_update = dma_fence_get(fence);
 			dma_fence_put(fence);
@@ -1180,7 +1142,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
 		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
 
-		if (!entry->bo)
+		if (!entry->base.bo)
 			continue;
 
 		r = amdgpu_vm_update_level(adev, vm, entry, level + 1);
@@ -1213,7 +1175,7 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
 	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
 		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
 
-		if (!entry->bo)
+		if (!entry->base.bo)
 			continue;
 
 		entry->addr = ~0ULL;
@@ -1268,7 +1230,7 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
 	*entry = &p->vm->root;
 	while ((*entry)->entries) {
 		idx = addr >> (p->adev->vm_manager.block_size * level--);
-		idx %= amdgpu_bo_size((*entry)->bo) / 8;
+		idx %= amdgpu_bo_size((*entry)->base.bo) / 8;
 		*parent = *entry;
 		*entry = &(*entry)->entries[idx];
 	}
@@ -1304,7 +1266,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 	    p->src ||
 	    !(flags & AMDGPU_PTE_VALID)) {
 
-		dst = amdgpu_bo_gpu_offset(entry->bo);
+		dst = amdgpu_bo_gpu_offset(entry->base.bo);
 		dst = amdgpu_gart_get_vm_pde(p->adev, dst);
 		flags = AMDGPU_PTE_VALID;
 	} else {
@@ -1330,18 +1292,18 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 		tmp = p->pages_addr;
 		p->pages_addr = NULL;
 
-		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo);
+		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
 		pde = pd_addr + (entry - parent->entries) * 8;
 		amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
 
 		p->pages_addr = tmp;
 	} else {
-		if (parent->bo->shadow) {
-			pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
+		if (parent->base.bo->shadow) {
+			pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow);
 			pde = pd_addr + (entry - parent->entries) * 8;
 			amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
 		}
-		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
+		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
 		pde = pd_addr + (entry - parent->entries) * 8;
 		amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
 	}
@@ -1392,7 +1354,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 		if (entry->addr & AMDGPU_PDE_PTE)
 			continue;
 
-		pt = entry->bo;
+		pt = entry->base.bo;
 		if (use_cpu_update) {
 			pe_start = (unsigned long)amdgpu_bo_kptr(pt);
 		} else {
@@ -1612,12 +1574,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (r)
 		goto error_free;
 
-	r = amdgpu_sync_resv(adev, &job->sync, vm->root.bo->tbo.resv,
+	r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv,
 			     owner);
 	if (r)
 		goto error_free;
 
-	r = reservation_object_reserve_shared(vm->root.bo->tbo.resv);
+	r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
 	if (r)
 		goto error_free;
 
@@ -1632,7 +1594,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (r)
 		goto error_free;
 
-	amdgpu_bo_fence(vm->root.bo, f, true);
+	amdgpu_bo_fence(vm->root.base.bo, f, true);
 	dma_fence_put(*fence);
 	*fence = f;
 	return 0;
@@ -1927,7 +1889,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
  */
 static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
-	struct reservation_object *resv = vm->root.bo->tbo.resv;
+	struct reservation_object *resv = vm->root.base.bo->tbo.resv;
 	struct dma_fence *excl, **shared;
 	unsigned i, shared_count;
 	int r;
@@ -2414,12 +2376,25 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
  * Mark @bo as invalid.
  */
 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
-			     struct amdgpu_bo *bo)
+			     struct amdgpu_bo *bo, bool evicted)
 {
 	struct amdgpu_vm_bo_base *bo_base;
 
 	list_for_each_entry(bo_base, &bo->va, bo_list) {
+		struct amdgpu_vm *vm = bo_base->vm;
+
 		bo_base->moved = true;
+		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
+			spin_lock(&bo_base->vm->status_lock);
+			list_move(&bo_base->vm_status, &vm->evicted);
+			spin_unlock(&bo_base->vm->status_lock);
+			continue;
+		}
+
+		/* Don't add page tables to the moved state */
+		if (bo->tbo.type == ttm_bo_type_kernel)
+			continue;
+
 		spin_lock(&bo_base->vm->status_lock);
 		list_move(&bo_base->vm_status, &bo_base->vm->moved);
 		spin_unlock(&bo_base->vm->status_lock);
@@ -2507,6 +2482,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
 		vm->reserved_vmid[i] = NULL;
 	spin_lock_init(&vm->status_lock);
+	INIT_LIST_HEAD(&vm->evicted);
 	INIT_LIST_HEAD(&vm->moved);
 	INIT_LIST_HEAD(&vm->freed);
 
@@ -2551,30 +2527,31 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
 			     AMDGPU_GEM_DOMAIN_VRAM,
 			     flags,
-			     NULL, NULL, init_pde_value, &vm->root.bo);
+			     NULL, NULL, init_pde_value, &vm->root.base.bo);
 	if (r)
 		goto error_free_sched_entity;
 
-	r = amdgpu_bo_reserve(vm->root.bo, false);
-	if (r)
-		goto error_free_root;
-
-	vm->last_eviction_counter = atomic64_read(&adev->num_evictions);
+	vm->root.base.vm = vm;
+	list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
+	INIT_LIST_HEAD(&vm->root.base.vm_status);
 
 	if (vm->use_cpu_for_update) {
-		r = amdgpu_bo_kmap(vm->root.bo, NULL);
+		r = amdgpu_bo_reserve(vm->root.base.bo, false);
 		if (r)
 			goto error_free_root;
-	}
 
-	amdgpu_bo_unreserve(vm->root.bo);
+		r = amdgpu_bo_kmap(vm->root.base.bo, NULL);
+		if (r)
+			goto error_free_root;
+		amdgpu_bo_unreserve(vm->root.base.bo);
+	}
 
 	return 0;
 
 error_free_root:
-	amdgpu_bo_unref(&vm->root.bo->shadow);
-	amdgpu_bo_unref(&vm->root.bo);
-	vm->root.bo = NULL;
+	amdgpu_bo_unref(&vm->root.base.bo->shadow);
+	amdgpu_bo_unref(&vm->root.base.bo);
+	vm->root.base.bo = NULL;
 
 error_free_sched_entity:
 	amd_sched_entity_fini(&ring->sched, &vm->entity);
@@ -2593,9 +2570,11 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
 {
 	unsigned i;
 
-	if (level->bo) {
-		amdgpu_bo_unref(&level->bo->shadow);
-		amdgpu_bo_unref(&level->bo);
+	if (level->base.bo) {
+		list_del(&level->base.bo_list);
+		list_del(&level->base.vm_status);
+		amdgpu_bo_unref(&level->base.bo->shadow);
+		amdgpu_bo_unref(&level->base.bo);
 	}
 
 	if (level->entries)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index ff093d4b5e11..4e465e817fe8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -111,12 +111,12 @@ struct amdgpu_vm_bo_base {
 };
 
 struct amdgpu_vm_pt {
-	struct amdgpu_bo	*bo;
-	uint64_t		addr;
+	struct amdgpu_vm_bo_base	base;
+	uint64_t			addr;
 
 	/* array of page tables, one for each directory entry */
-	struct amdgpu_vm_pt	*entries;
-	unsigned		last_entry_used;
+	struct amdgpu_vm_pt		*entries;
+	unsigned			last_entry_used;
 };
 
 struct amdgpu_vm {
@@ -126,6 +126,9 @@ struct amdgpu_vm {
 	/* protecting invalidated */
 	spinlock_t		status_lock;
 
+	/* BOs who needs a validation */
+	struct list_head	evicted;
+
 	/* BOs moved, but not yet updated in the PT */
 	struct list_head	moved;
 
@@ -135,7 +138,6 @@ struct amdgpu_vm {
 	/* contains the page directory */
 	struct amdgpu_vm_pt     root;
 	struct dma_fence	*last_dir_update;
-	uint64_t		last_eviction_counter;
 
 	/* protecting freed */
 	spinlock_t		freed_lock;
@@ -225,7 +227,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
 			 struct amdgpu_bo_list_entry *entry);
-bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+bool amdgpu_vm_ready(struct amdgpu_vm *vm);
 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			      int (*callback)(void *p, struct amdgpu_bo *bo),
 			      void *param);
@@ -250,7 +252,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			struct amdgpu_bo_va *bo_va,
 			bool clear);
 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
-			     struct amdgpu_bo *bo);
+			     struct amdgpu_bo *bo, bool evicted);
 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
 				       struct amdgpu_bo *bo);
 struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
-- 
cgit v1.2.3


From ea09729c930223edf492d0ca647c27e7eb0ccb12 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 9 Aug 2017 14:15:46 +0200
Subject: drm/amdgpu: rework page directory filling v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Keep track off relocated PDs/PTs instead of walking and checking all PDs.

v2: fix root PD handling

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v1)
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 89 +++++++++++++++++++++++-----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  3 ++
 2 files changed, 63 insertions(+), 29 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6ff3c1bf035e..faa08d5728da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -196,7 +196,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		}
 
 		spin_lock(&vm->status_lock);
-		list_del_init(&bo_base->vm_status);
+		list_move(&bo_base->vm_status, &vm->relocated);
 	}
 	spin_unlock(&vm->status_lock);
 
@@ -314,8 +314,10 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 			entry->base.vm = vm;
 			entry->base.bo = pt;
 			list_add_tail(&entry->base.bo_list, &pt->va);
-			INIT_LIST_HEAD(&entry->base.vm_status);
-			entry->addr = 0;
+			spin_lock(&vm->status_lock);
+			list_add(&entry->base.vm_status, &vm->relocated);
+			spin_unlock(&vm->status_lock);
+			entry->addr = ~0ULL;
 		}
 
 		if (level < adev->vm_manager.num_level) {
@@ -1000,18 +1002,17 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  */
 static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 				  struct amdgpu_vm *vm,
-				  struct amdgpu_vm_pt *parent,
-				  unsigned level)
+				  struct amdgpu_vm_pt *parent)
 {
 	struct amdgpu_bo *shadow;
 	struct amdgpu_ring *ring = NULL;
 	uint64_t pd_addr, shadow_addr = 0;
-	uint32_t incr = amdgpu_vm_bo_size(adev, level + 1);
 	uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0;
 	unsigned count = 0, pt_idx, ndw = 0;
 	struct amdgpu_job *job;
 	struct amdgpu_pte_update_params params;
 	struct dma_fence *fence = NULL;
+	uint32_t incr;
 
 	int r;
 
@@ -1059,12 +1060,17 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 
 	/* walk over the address space and update the directory */
 	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
-		struct amdgpu_bo *bo = parent->entries[pt_idx].base.bo;
+		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
+		struct amdgpu_bo *bo = entry->base.bo;
 		uint64_t pde, pt;
 
 		if (bo == NULL)
 			continue;
 
+		spin_lock(&vm->status_lock);
+		list_del_init(&entry->base.vm_status);
+		spin_unlock(&vm->status_lock);
+
 		pt = amdgpu_bo_gpu_offset(bo);
 		pt = amdgpu_gart_get_vm_pde(adev, pt);
 		/* Don't update huge pages here */
@@ -1075,6 +1081,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 		parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID;
 
 		pde = pd_addr + pt_idx * 8;
+		incr = amdgpu_bo_size(bo);
 		if (((last_pde + 8 * count) != pde) ||
 		    ((last_pt + incr * count) != pt) ||
 		    (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
@@ -1135,20 +1142,6 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 			dma_fence_put(fence);
 		}
 	}
-	/*
-	 * Recurse into the subdirectories. This recursion is harmless because
-	 * we only have a maximum of 5 layers.
-	 */
-	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
-		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
-
-		if (!entry->base.bo)
-			continue;
-
-		r = amdgpu_vm_update_level(adev, vm, entry, level + 1);
-		if (r)
-			return r;
-	}
 
 	return 0;
 
@@ -1164,7 +1157,8 @@ error_free:
  *
  * Mark all PD level as invalid after an error.
  */
-static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
+static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
+				       struct amdgpu_vm_pt *parent)
 {
 	unsigned pt_idx;
 
@@ -1179,7 +1173,10 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
 			continue;
 
 		entry->addr = ~0ULL;
-		amdgpu_vm_invalidate_level(entry);
+		spin_lock(&vm->status_lock);
+		list_move(&entry->base.vm_status, &vm->relocated);
+		spin_unlock(&vm->status_lock);
+		amdgpu_vm_invalidate_level(vm, entry);
 	}
 }
 
@@ -1197,9 +1194,38 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 {
 	int r;
 
-	r = amdgpu_vm_update_level(adev, vm, &vm->root, 0);
-	if (r)
-		amdgpu_vm_invalidate_level(&vm->root);
+	spin_lock(&vm->status_lock);
+	while (!list_empty(&vm->relocated)) {
+		struct amdgpu_vm_bo_base *bo_base;
+		struct amdgpu_bo *bo;
+
+		bo_base = list_first_entry(&vm->relocated,
+					   struct amdgpu_vm_bo_base,
+					   vm_status);
+		spin_unlock(&vm->status_lock);
+
+		bo = bo_base->bo->parent;
+		if (bo) {
+			struct amdgpu_vm_bo_base *parent;
+			struct amdgpu_vm_pt *pt;
+
+			parent = list_first_entry(&bo->va,
+						  struct amdgpu_vm_bo_base,
+						  bo_list);
+			pt = container_of(parent, struct amdgpu_vm_pt, base);
+
+			r = amdgpu_vm_update_level(adev, vm, pt);
+			if (r) {
+				amdgpu_vm_invalidate_level(vm, &vm->root);
+				return r;
+			}
+			spin_lock(&vm->status_lock);
+		} else {
+			spin_lock(&vm->status_lock);
+			list_del_init(&bo_base->vm_status);
+		}
+	}
+	spin_unlock(&vm->status_lock);
 
 	if (vm->use_cpu_for_update) {
 		/* Flush HDP */
@@ -1601,7 +1627,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 error_free:
 	amdgpu_job_free(job);
-	amdgpu_vm_invalidate_level(&vm->root);
+	amdgpu_vm_invalidate_level(vm, &vm->root);
 	return r;
 }
 
@@ -2391,9 +2417,13 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 			continue;
 		}
 
-		/* Don't add page tables to the moved state */
-		if (bo->tbo.type == ttm_bo_type_kernel)
+		if (bo->tbo.type == ttm_bo_type_kernel) {
+			spin_lock(&bo_base->vm->status_lock);
+			if (list_empty(&bo_base->vm_status))
+				list_add(&bo_base->vm_status, &vm->relocated);
+			spin_unlock(&bo_base->vm->status_lock);
 			continue;
+		}
 
 		spin_lock(&bo_base->vm->status_lock);
 		list_move(&bo_base->vm_status, &bo_base->vm->moved);
@@ -2483,6 +2513,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		vm->reserved_vmid[i] = NULL;
 	spin_lock_init(&vm->status_lock);
 	INIT_LIST_HEAD(&vm->evicted);
+	INIT_LIST_HEAD(&vm->relocated);
 	INIT_LIST_HEAD(&vm->moved);
 	INIT_LIST_HEAD(&vm->freed);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 4e465e817fe8..c3753afe9853 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -129,6 +129,9 @@ struct amdgpu_vm {
 	/* BOs who needs a validation */
 	struct list_head	evicted;
 
+	/* PT BOs which relocated and their parent need an update */
+	struct list_head	relocated;
+
 	/* BOs moved, but not yet updated in the PT */
 	struct list_head	moved;
 
-- 
cgit v1.2.3


From 570144c6522f5f332635d20dfa278cfcc764229c Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 30 Aug 2017 15:38:45 +0200
Subject: drm/amdgpu: cleanup the VM code a bit more
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The src isn't used any more after GART hack removal.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index faa08d5728da..1582feba9289 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1476,7 +1476,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
  *
  * @adev: amdgpu_device pointer
  * @exclusive: fence we need to sync to
- * @src: address where to copy page table entries from
  * @pages_addr: DMA addresses to use for mapping
  * @vm: requested vm
  * @start: start of mapped range
@@ -1490,7 +1489,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
  */
 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 				       struct dma_fence *exclusive,
-				       uint64_t src,
 				       dma_addr_t *pages_addr,
 				       struct amdgpu_vm *vm,
 				       uint64_t start, uint64_t last,
@@ -1508,7 +1506,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
 	params.vm = vm;
-	params.src = src;
 
 	/* sync to everything on unmapping */
 	if (!(flags & AMDGPU_PTE_VALID))
@@ -1548,13 +1545,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	/* one PDE write for each huge page */
 	ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6;
 
-	if (src) {
-		/* only copy commands needed */
-		ndw += ncmds * 7;
-
-		params.func = amdgpu_vm_do_copy_ptes;
-
-	} else if (pages_addr) {
+	if (pages_addr) {
 		/* copy commands needed */
 		ndw += ncmds * 7;
 
@@ -1579,7 +1570,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 	params.ib = &job->ibs[0];
 
-	if (!src && pages_addr) {
+	if (pages_addr) {
 		uint64_t *pte;
 		unsigned i;
 
@@ -1656,7 +1647,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 				      struct drm_mm_node *nodes,
 				      struct dma_fence **fence)
 {
-	uint64_t pfn, src = 0, start = mapping->start;
+	uint64_t pfn, start = mapping->start;
 	int r;
 
 	/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
@@ -1711,8 +1702,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		addr += pfn << PAGE_SHIFT;
 
 		last = min((uint64_t)mapping->last, start + max_entries - 1);
-		r = amdgpu_vm_bo_update_mapping(adev, exclusive,
-						src, pages_addr, vm,
+		r = amdgpu_vm_bo_update_mapping(adev, exclusive, pages_addr, vm,
 						start, last, flags, addr,
 						fence);
 		if (r)
@@ -1973,7 +1963,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 		if (vm->pte_support_ats)
 			init_pte_value = AMDGPU_PTE_SYSTEM;
 
-		r = amdgpu_vm_bo_update_mapping(adev, NULL, 0, NULL, vm,
+		r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
 						mapping->start, mapping->last,
 						init_pte_value, 0, &f);
 		amdgpu_vm_free_mapping(adev, vm, mapping, f);
-- 
cgit v1.2.3


From 0f2fc435d837213202bec3b8e26fbb67a4d6df24 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 31 Aug 2017 10:46:20 +0200
Subject: drm/amdgpu: fix new PD update code for Vega10 v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We need to refer to the parent instead of the root BO for multi
level page tables on Vega10. Also don't set the PDE_PTE bit.

v2: Don't set the PDE_PTE bit either.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-and-Tested-by: Roger He <Hongbo.He@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1582feba9289..d3c48557555c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -309,7 +309,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 			/* Keep a reference to the root directory to avoid
 			* freeing them up in the wrong order.
 			*/
-			pt->parent = amdgpu_bo_ref(vm->root.base.bo);
+			pt->parent = amdgpu_bo_ref(parent->base.bo);
 
 			entry->base.vm = vm;
 			entry->base.bo = pt;
@@ -317,7 +317,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 			spin_lock(&vm->status_lock);
 			list_add(&entry->base.vm_status, &vm->relocated);
 			spin_unlock(&vm->status_lock);
-			entry->addr = ~0ULL;
+			entry->addr = 0;
 		}
 
 		if (level < adev->vm_manager.num_level) {
-- 
cgit v1.2.3


From 73fb16e7ebee12953de32a7a2552e0cf2bf74ebf Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 16 Aug 2017 11:13:48 +0200
Subject: drm/amdgpu: add support for per VM BOs v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per VM BOs are handled like VM PDs and PTs. They are always valid and don't
need to be specified in the BO lists.

v2: validate PDs/PTs first

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 ++++++++++++++++++++++++----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  5 ++-
 3 files changed, 60 insertions(+), 26 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 8bf178a912f2..b57adc0723cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -822,7 +822,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 
 	}
 
-	r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync);
+	r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync);
 
 	if (amdgpu_vm_debug && p->bo_list) {
 		/* Invalidate all BOs to test for userspace bugs */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d3c48557555c..26eb7dce5fe5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -189,14 +189,18 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			spin_unlock(&glob->lru_lock);
 		}
 
-		if (vm->use_cpu_for_update) {
+		if (bo->tbo.type == ttm_bo_type_kernel &&
+		    vm->use_cpu_for_update) {
 			r = amdgpu_bo_kmap(bo, NULL);
 			if (r)
 				return r;
 		}
 
 		spin_lock(&vm->status_lock);
-		list_move(&bo_base->vm_status, &vm->relocated);
+		if (bo->tbo.type != ttm_bo_type_kernel)
+			list_move(&bo_base->vm_status, &vm->moved);
+		else
+			list_move(&bo_base->vm_status, &vm->relocated);
 	}
 	spin_unlock(&vm->status_lock);
 
@@ -1985,20 +1989,23 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 }
 
 /**
- * amdgpu_vm_clear_moved - clear moved BOs in the PT
+ * amdgpu_vm_handle_moved - handle moved BOs in the PT
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
+ * @sync: sync object to add fences to
  *
- * Make sure all moved BOs are cleared in the PT.
+ * Make sure all BOs which are moved are updated in the PTs.
  * Returns 0 for success.
  *
- * PTs have to be reserved and mutex must be locked!
+ * PTs have to be reserved!
  */
-int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-			    struct amdgpu_sync *sync)
+int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
+			   struct amdgpu_vm *vm,
+			   struct amdgpu_sync *sync)
 {
 	struct amdgpu_bo_va *bo_va = NULL;
+	bool clear;
 	int r = 0;
 
 	spin_lock(&vm->status_lock);
@@ -2007,7 +2014,10 @@ int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			struct amdgpu_bo_va, base.vm_status);
 		spin_unlock(&vm->status_lock);
 
-		r = amdgpu_vm_bo_update(adev, bo_va, true);
+		/* Per VM BOs never need to bo cleared in the page tables */
+		clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv;
+
+		r = amdgpu_vm_bo_update(adev, bo_va, clear);
 		if (r)
 			return r;
 
@@ -2059,6 +2069,37 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
 	return bo_va;
 }
 
+
+/**
+ * amdgpu_vm_bo_insert_mapping - insert a new mapping
+ *
+ * @adev: amdgpu_device pointer
+ * @bo_va: bo_va to store the address
+ * @mapping: the mapping to insert
+ *
+ * Insert a new mapping into all structures.
+ */
+static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
+				    struct amdgpu_bo_va *bo_va,
+				    struct amdgpu_bo_va_mapping *mapping)
+{
+	struct amdgpu_vm *vm = bo_va->base.vm;
+	struct amdgpu_bo *bo = bo_va->base.bo;
+
+	list_add(&mapping->list, &bo_va->invalids);
+	amdgpu_vm_it_insert(mapping, &vm->va);
+
+	if (mapping->flags & AMDGPU_PTE_PRT)
+		amdgpu_vm_prt_get(adev);
+
+	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
+		spin_lock(&vm->status_lock);
+		list_move(&bo_va->base.vm_status, &vm->moved);
+		spin_unlock(&vm->status_lock);
+	}
+	trace_amdgpu_vm_bo_map(bo_va, mapping);
+}
+
 /**
  * amdgpu_vm_bo_map - map bo inside a vm
  *
@@ -2110,18 +2151,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 	if (!mapping)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&mapping->list);
 	mapping->start = saddr;
 	mapping->last = eaddr;
 	mapping->offset = offset;
 	mapping->flags = flags;
 
-	list_add(&mapping->list, &bo_va->invalids);
-	amdgpu_vm_it_insert(mapping, &vm->va);
-
-	if (flags & AMDGPU_PTE_PRT)
-		amdgpu_vm_prt_get(adev);
-	trace_amdgpu_vm_bo_map(bo_va, mapping);
+	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
 
 	return 0;
 }
@@ -2148,7 +2183,6 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
 {
 	struct amdgpu_bo_va_mapping *mapping;
 	struct amdgpu_bo *bo = bo_va->base.bo;
-	struct amdgpu_vm *vm = bo_va->base.vm;
 	uint64_t eaddr;
 	int r;
 
@@ -2182,12 +2216,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
 	mapping->offset = offset;
 	mapping->flags = flags;
 
-	list_add(&mapping->list, &bo_va->invalids);
-	amdgpu_vm_it_insert(mapping, &vm->va);
-
-	if (flags & AMDGPU_PTE_PRT)
-		amdgpu_vm_prt_get(adev);
-	trace_amdgpu_vm_bo_map(bo_va, mapping);
+	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
 
 	return 0;
 }
@@ -2402,7 +2431,11 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 		bo_base->moved = true;
 		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
 			spin_lock(&bo_base->vm->status_lock);
-			list_move(&bo_base->vm_status, &vm->evicted);
+			if (bo->tbo.type == ttm_bo_type_kernel)
+				list_move(&bo_base->vm_status, &vm->evicted);
+			else
+				list_move_tail(&bo_base->vm_status,
+					       &vm->evicted);
 			spin_unlock(&bo_base->vm->status_lock);
 			continue;
 		}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index c3753afe9853..90b7741d024b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -249,8 +249,9 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			  struct amdgpu_vm *vm,
 			  struct dma_fence **fence);
-int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-			  struct amdgpu_sync *sync);
+int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
+			   struct amdgpu_vm *vm,
+			   struct amdgpu_sync *sync);
 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			struct amdgpu_bo_va *bo_va,
 			bool clear);
-- 
cgit v1.2.3


From 6849d47cabc36e8f2697043f8c81e7719876dfd3 Mon Sep 17 00:00:00 2001
From: Roger He <Hongbo.He@amd.com>
Date: Wed, 30 Aug 2017 13:01:19 +0800
Subject: drm/amdgpu: handle all fragment sizes v4
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This can improve performance for some cases.

v2 (chk): handle all sizes, simplify the patch quite a bit
v3 (chk): adjust dw estimation as well
v4 (chk): use single loop, make end mask 64bit

Signed-off-by: Roger He <Hongbo.He@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Tested-by: Roger He <Hongbo.He@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55 ++++++++++++++++------------------
 1 file changed, 26 insertions(+), 29 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 26eb7dce5fe5..b83e0fa1f269 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1420,8 +1420,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 				uint64_t start, uint64_t end,
 				uint64_t dst, uint64_t flags)
 {
-	int r;
-
 	/**
 	 * The MC L1 TLB supports variable sized pages, based on a fragment
 	 * field in the PTE. When this field is set to a non-zero value, page
@@ -1440,39 +1438,38 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 	 * Userspace can support this by aligning virtual base address and
 	 * allocation size to the fragment size.
 	 */
-	unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
-	uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
-	uint64_t frag_align = 1 << pages_per_frag;
-
-	uint64_t frag_start = ALIGN(start, frag_align);
-	uint64_t frag_end = end & ~(frag_align - 1);
+	unsigned max_frag = params->adev->vm_manager.fragment_size;
+	int r;
 
 	/* system pages are non continuously */
-	if (params->src || !(flags & AMDGPU_PTE_VALID) ||
-	    (frag_start >= frag_end))
+	if (params->src || !(flags & AMDGPU_PTE_VALID))
 		return amdgpu_vm_update_ptes(params, start, end, dst, flags);
 
-	/* handle the 4K area at the beginning */
-	if (start != frag_start) {
-		r = amdgpu_vm_update_ptes(params, start, frag_start,
-					  dst, flags);
+	while (start != end) {
+		uint64_t frag_flags, frag_end;
+		unsigned frag;
+
+		/* This intentionally wraps around if no bit is set */
+		frag = min((unsigned)ffs(start) - 1,
+			   (unsigned)fls64(end - start) - 1);
+		if (frag >= max_frag) {
+			frag_flags = AMDGPU_PTE_FRAG(max_frag);
+			frag_end = end & ~((1ULL << max_frag) - 1);
+		} else {
+			frag_flags = AMDGPU_PTE_FRAG(frag);
+			frag_end = start + (1 << frag);
+		}
+
+		r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
+					  flags | frag_flags);
 		if (r)
 			return r;
-		dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
-	}
-
-	/* handle the area in the middle */
-	r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
-				  flags | frag_flags);
-	if (r)
-		return r;
 
-	/* handle the 4K area at the end */
-	if (frag_end != end) {
-		dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
-		r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
+		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
+		start = frag_end;
 	}
-	return r;
+
+	return 0;
 }
 
 /**
@@ -1562,8 +1559,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		/* set page commands needed */
 		ndw += ncmds * 10;
 
-		/* two extra commands for begin/end of fragment */
-		ndw += 2 * 10;
+		/* extra commands for begin/end fragments */
+		ndw += 2 * 10 * adev->vm_manager.fragment_size;
 
 		params.func = amdgpu_vm_do_set_ptes;
 	}
-- 
cgit v1.2.3


From 481c2e94897eb74abcfb4a3cdb87f5f89499b93f Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 1 Sep 2017 14:46:19 +0200
Subject: drm/amdgpu: fix moved list handling in the VM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Only move BOs to the moved/relocated list when they aren't already on a list.

This prevents accidential removal from the evicted list.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b83e0fa1f269..c9223a5184de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1178,7 +1178,8 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
 
 		entry->addr = ~0ULL;
 		spin_lock(&vm->status_lock);
-		list_move(&entry->base.vm_status, &vm->relocated);
+		if (list_empty(&entry->base.vm_status))
+			list_add(&entry->base.vm_status, &vm->relocated);
 		spin_unlock(&vm->status_lock);
 		amdgpu_vm_invalidate_level(vm, entry);
 	}
@@ -2091,7 +2092,8 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
 
 	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
 		spin_lock(&vm->status_lock);
-		list_move(&bo_va->base.vm_status, &vm->moved);
+		if (list_empty(&bo_va->base.vm_status))
+			list_add(&bo_va->base.vm_status, &vm->moved);
 		spin_unlock(&vm->status_lock);
 	}
 	trace_amdgpu_vm_bo_map(bo_va, mapping);
@@ -2446,7 +2448,8 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 		}
 
 		spin_lock(&bo_base->vm->status_lock);
-		list_move(&bo_base->vm_status, &bo_base->vm->moved);
+		if (list_empty(&bo_base->vm_status))
+			list_add(&bo_base->vm_status, &vm->moved);
 		spin_unlock(&bo_base->vm->status_lock);
 	}
 }
-- 
cgit v1.2.3


From 862095237c392887819e98018d6dc187d8c78726 Mon Sep 17 00:00:00 2001
From: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Date: Thu, 7 Sep 2017 13:23:21 +0200
Subject: drm/amdgpu: Account for shadow PTs in mapping update IB size.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When amdgpu_vm_frag_ptes calls amdgpu_vm_update_ptes and the pt
has a shadow PT we mirror all the write to the shadow PT too, which
results in twice the commands.

Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c9223a5184de..545531db66db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1536,10 +1536,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	nptes = last - start + 1;
 
 	/*
-	 * reserve space for one command every (1 << BLOCK_SIZE)
+	 * reserve space for two commands every (1 << BLOCK_SIZE)
 	 *  entries or 2k dwords (whatever is smaller)
+         *
+         * The second command is for the shadow pagetables.
 	 */
-	ncmds = (nptes >> min(adev->vm_manager.block_size, 11u)) + 1;
+	ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
 
 	/* padding, etc. */
 	ndw = 64;
-- 
cgit v1.2.3


From aebc5e6f50f770ec9392c3ca804f18b30797dfa7 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 6 Sep 2017 16:55:16 +0200
Subject: drm/amdgpu: rework amdgpu_cs_find_mapping
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the VM instead of the BO list to find the BO for a virtual address.

This fixes UVD/VCE in physical mode with VM local BOs.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Leo Liu <leo.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 42 +++++++-----------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 17 ++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     |  2 ++
 4 files changed, 30 insertions(+), 32 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index c30110a3024a..5f19227b35e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1479,46 +1479,24 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
 			   uint64_t addr, struct amdgpu_bo **bo,
 			   struct amdgpu_bo_va_mapping **map)
 {
+	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
+	struct amdgpu_vm *vm = &fpriv->vm;
 	struct amdgpu_bo_va_mapping *mapping;
-	unsigned i;
 	int r;
 
-	if (!parser->bo_list)
-		return 0;
-
 	addr /= AMDGPU_GPU_PAGE_SIZE;
 
-	for (i = 0; i < parser->bo_list->num_entries; i++) {
-		struct amdgpu_bo_list_entry *lobj;
-
-		lobj = &parser->bo_list->array[i];
-		if (!lobj->bo_va)
-			continue;
-
-		list_for_each_entry(mapping, &lobj->bo_va->valids, list) {
-			if (mapping->start > addr ||
-			    addr > mapping->last)
-				continue;
-
-			*bo = lobj->bo_va->base.bo;
-			*map = mapping;
-			goto found;
-		}
-
-		list_for_each_entry(mapping, &lobj->bo_va->invalids, list) {
-			if (mapping->start > addr ||
-			    addr > mapping->last)
-				continue;
+	mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
+	if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
+		return -EINVAL;
 
-			*bo = lobj->bo_va->base.bo;
-			*map = mapping;
-			goto found;
-		}
-	}
+	*bo = mapping->bo_va->base.bo;
+	*map = mapping;
 
-	return -EINVAL;
+	/* Double check that the BO is reserved by this CS */
+	if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket)
+		return -EINVAL;
 
-found:
 	r = amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem);
 	if (unlikely(r))
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 42492e63b3a2..a4891bea2ca8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -35,6 +35,7 @@
 
 /* bo virtual addresses in a vm */
 struct amdgpu_bo_va_mapping {
+	struct amdgpu_bo_va		*bo_va;
 	struct list_head		list;
 	struct rb_node			rb;
 	uint64_t			start;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 545531db66db..758bbb9e77f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2086,6 +2086,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
 	struct amdgpu_vm *vm = bo_va->base.vm;
 	struct amdgpu_bo *bo = bo_va->base.bo;
 
+	mapping->bo_va = bo_va;
 	list_add(&mapping->list, &bo_va->invalids);
 	amdgpu_vm_it_insert(mapping, &vm->va);
 
@@ -2263,6 +2264,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
 
 	list_del(&mapping->list);
 	amdgpu_vm_it_remove(mapping, &vm->va);
+	mapping->bo_va = NULL;
 	trace_amdgpu_vm_bo_unmap(bo_va, mapping);
 
 	if (valid)
@@ -2348,6 +2350,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 		if (tmp->last > eaddr)
 		    tmp->last = eaddr;
 
+		tmp->bo_va = NULL;
 		list_add(&tmp->list, &vm->freed);
 		trace_amdgpu_vm_bo_unmap(NULL, tmp);
 	}
@@ -2373,6 +2376,19 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 	return 0;
 }
 
+/**
+ * amdgpu_vm_bo_lookup_mapping - find mapping by address
+ *
+ * @vm: the requested VM
+ *
+ * Find a mapping by it's address.
+ */
+struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
+							 uint64_t addr)
+{
+	return amdgpu_vm_it_iter_first(&vm->va, addr, addr);
+}
+
 /**
  * amdgpu_vm_bo_rmv - remove a bo to a specific vm
  *
@@ -2398,6 +2414,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 	list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
 		list_del(&mapping->list);
 		amdgpu_vm_it_remove(mapping, &vm->va);
+		mapping->bo_va = NULL;
 		trace_amdgpu_vm_bo_unmap(bo_va, mapping);
 		list_add(&mapping->list, &vm->freed);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 90b7741d024b..c1accd15efc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -276,6 +276,8 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
 int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 				struct amdgpu_vm *vm,
 				uint64_t saddr, uint64_t size);
+struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
+							 uint64_t addr);
 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 		      struct amdgpu_bo_va *bo_va);
 void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev,
-- 
cgit v1.2.3


From d5884513a31df072879c89c80306d544467ee770 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 8 Sep 2017 14:09:41 +0200
Subject: drm/amdgpu: fix VM sync with always valid BOs v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All users of a VM must always wait for updates with always
valid BOs to be completed.

v2: remove debugging leftovers, rename struct member

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Roger He <Hongbo.He@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 10 ++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 ++++++++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +-
 3 files changed, 17 insertions(+), 10 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 5f19227b35e9..ff61073b7181 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -761,10 +761,6 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;
 
-	r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_dir_update);
-	if (r)
-		return r;
-
 	r = amdgpu_vm_clear_freed(adev, vm, NULL);
 	if (r)
 		return r;
@@ -819,6 +815,12 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 	}
 
 	r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync);
+	if (r)
+		return r;
+
+	r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update);
+	if (r)
+		return r;
 
 	if (amdgpu_vm_debug && p->bo_list) {
 		/* Invalidate all BOs to test for userspace bugs */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 758bbb9e77f3..64baa3138965 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1141,9 +1141,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 				goto error_free;
 
 			amdgpu_bo_fence(parent->base.bo, fence, true);
-			dma_fence_put(vm->last_dir_update);
-			vm->last_dir_update = dma_fence_get(fence);
-			dma_fence_put(fence);
+			dma_fence_put(vm->last_update);
+			vm->last_update = fence;
 		}
 	}
 
@@ -1804,6 +1803,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			trace_amdgpu_vm_bo_mapping(mapping);
 	}
 
+	if (bo_va->base.bo &&
+	    bo_va->base.bo->tbo.resv == vm->root.base.bo->tbo.resv) {
+		dma_fence_put(vm->last_update);
+		vm->last_update = dma_fence_get(bo_va->last_pt_update);
+	}
+
 	return 0;
 }
 
@@ -2587,7 +2592,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			 vm->use_cpu_for_update ? "CPU" : "SDMA");
 	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
 		  "CPU update of VM recommended only for large BAR system\n");
-	vm->last_dir_update = NULL;
+	vm->last_update = NULL;
 
 	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
 			AMDGPU_GEM_CREATE_VRAM_CLEARED;
@@ -2693,7 +2698,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	}
 
 	amdgpu_vm_free_levels(&vm->root);
-	dma_fence_put(vm->last_dir_update);
+	dma_fence_put(vm->last_update);
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
 		amdgpu_vm_free_reserved_vmid(adev, vm, i);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index c1accd15efc8..cb6a6222fc3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -140,7 +140,7 @@ struct amdgpu_vm {
 
 	/* contains the page directory */
 	struct amdgpu_vm_pt     root;
-	struct dma_fence	*last_dir_update;
+	struct dma_fence	*last_update;
 
 	/* protecting freed */
 	spinlock_t		freed_lock;
-- 
cgit v1.2.3


From 4e55eb3879fea6d8c7d414cebaa5bff1da58b4a1 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 11 Sep 2017 16:54:59 +0200
Subject: drm/amdgpu: fix amdgpu_vm_handle_moved as well v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is no guarantee that the last BO_VA actually needed an update.

Additional to that all command submissions must wait for moved BOs to
be cleared, not just the first one.

v2: Don't overwrite any newer fence.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 24 ++++++++++--------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  3 +--
 3 files changed, 12 insertions(+), 17 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index ff61073b7181..9f1202a4182f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -814,7 +814,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 
 	}
 
-	r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync);
+	r = amdgpu_vm_handle_moved(adev, vm);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 64baa3138965..2df254cc802e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1743,7 +1743,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	dma_addr_t *pages_addr = NULL;
 	struct ttm_mem_reg *mem;
 	struct drm_mm_node *nodes;
-	struct dma_fence *exclusive;
+	struct dma_fence *exclusive, **last_update;
 	uint64_t flags;
 	int r;
 
@@ -1769,6 +1769,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	else
 		flags = 0x0;
 
+	if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv))
+		last_update = &vm->last_update;
+	else
+		last_update = &bo_va->last_pt_update;
+
 	if (!clear && bo_va->base.moved) {
 		bo_va->base.moved = false;
 		list_splice_init(&bo_va->valids, &bo_va->invalids);
@@ -1780,7 +1785,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	list_for_each_entry(mapping, &bo_va->invalids, list) {
 		r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
 					       mapping, flags, nodes,
-					       &bo_va->last_pt_update);
+					       last_update);
 		if (r)
 			return r;
 	}
@@ -1803,12 +1808,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			trace_amdgpu_vm_bo_mapping(mapping);
 	}
 
-	if (bo_va->base.bo &&
-	    bo_va->base.bo->tbo.resv == vm->root.base.bo->tbo.resv) {
-		dma_fence_put(vm->last_update);
-		vm->last_update = dma_fence_get(bo_va->last_pt_update);
-	}
-
 	return 0;
 }
 
@@ -2006,15 +2005,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
  * PTs have to be reserved!
  */
 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
-			   struct amdgpu_vm *vm,
-			   struct amdgpu_sync *sync)
+			   struct amdgpu_vm *vm)
 {
-	struct amdgpu_bo_va *bo_va = NULL;
 	bool clear;
 	int r = 0;
 
 	spin_lock(&vm->status_lock);
 	while (!list_empty(&vm->moved)) {
+		struct amdgpu_bo_va *bo_va;
+
 		bo_va = list_first_entry(&vm->moved,
 			struct amdgpu_bo_va, base.vm_status);
 		spin_unlock(&vm->status_lock);
@@ -2030,9 +2029,6 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 	}
 	spin_unlock(&vm->status_lock);
 
-	if (bo_va)
-		r = amdgpu_sync_fence(adev, sync, bo_va->last_pt_update);
-
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index cb6a6222fc3f..48c58ae4bb3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -250,8 +250,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			  struct amdgpu_vm *vm,
 			  struct dma_fence **fence);
 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
-			   struct amdgpu_vm *vm,
-			   struct amdgpu_sync *sync);
+			   struct amdgpu_vm *vm);
 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			struct amdgpu_bo_va *bo_va,
 			bool clear);
-- 
cgit v1.2.3


From ca290da8f6345c8e8e180256fbe092c751fa9654 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Fri, 25 Aug 2017 20:15:04 -0400
Subject: drm/amdgpu: Fix error handling in amdgpu_vm_init

Make sure vm->root.bo is not left reserved if amdgpu_bo_kmap fails.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 2df254cc802e..2196bca7331c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2615,9 +2615,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			goto error_free_root;
 
 		r = amdgpu_bo_kmap(vm->root.base.bo, NULL);
+		amdgpu_bo_unreserve(vm->root.base.bo);
 		if (r)
 			goto error_free_root;
-		amdgpu_bo_unreserve(vm->root.base.bo);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 02208441cc3a5110191996bb129db39ff10e7395 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Fri, 25 Aug 2017 20:40:26 -0400
Subject: drm/amdgpu: Add PASID management

Allows assigning a PASID to a VM for identifying VMs involved in page
faults. The global PASID manager is also exported in the KFD
interface so that AMDGPU and KFD can share the PASID space.

PASIDs of different sizes can be requested. On APUs, the PASID size
is deterined by the capabilities of the IOMMU. So KFD must be able
to allocate PASIDs in a smaller range.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c           |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c            | 75 ++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h            | 14 ++++-
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h   |  6 ++
 6 files changed, 97 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index b9dbbf9cb8b0..dc7e25cce741 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -169,6 +169,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.get_vmem_size = get_vmem_size,
 	.get_gpu_clock_counter = get_gpu_clock_counter,
 	.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
+	.alloc_pasid = amdgpu_vm_alloc_pasid,
+	.free_pasid = amdgpu_vm_free_pasid,
 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
 	.init_pipeline = kgd_init_pipeline,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 309f2419c6d8..c678c69936a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -128,6 +128,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.get_vmem_size = get_vmem_size,
 	.get_gpu_clock_counter = get_gpu_clock_counter,
 	.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
+	.alloc_pasid = amdgpu_vm_alloc_pasid,
+	.free_pasid = amdgpu_vm_free_pasid,
 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
 	.init_pipeline = kgd_init_pipeline,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index e16229000a98..79d9ab43d42c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -825,7 +825,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 	}
 
 	r = amdgpu_vm_init(adev, &fpriv->vm,
-			   AMDGPU_VM_CONTEXT_GFX);
+			   AMDGPU_VM_CONTEXT_GFX, 0);
 	if (r) {
 		kfree(fpriv);
 		goto out_suspend;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 2196bca7331c..9b795915cab1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -27,11 +27,58 @@
  */
 #include <linux/dma-fence-array.h>
 #include <linux/interval_tree_generic.h>
+#include <linux/idr.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
+/*
+ * PASID manager
+ *
+ * PASIDs are global address space identifiers that can be shared
+ * between the GPU, an IOMMU and the driver. VMs on different devices
+ * may use the same PASID if they share the same address
+ * space. Therefore PASIDs are allocated using a global IDA. VMs are
+ * looked up from the PASID per amdgpu_device.
+ */
+static DEFINE_IDA(amdgpu_vm_pasid_ida);
+
+/**
+ * amdgpu_vm_alloc_pasid - Allocate a PASID
+ * @bits: Maximum width of the PASID in bits, must be at least 1
+ *
+ * Allocates a PASID of the given width while keeping smaller PASIDs
+ * available if possible.
+ *
+ * Returns a positive integer on success. Returns %-EINVAL if bits==0.
+ * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on
+ * memory allocation failure.
+ */
+int amdgpu_vm_alloc_pasid(unsigned int bits)
+{
+	int pasid = -EINVAL;
+
+	for (bits = min(bits, 31U); bits > 0; bits--) {
+		pasid = ida_simple_get(&amdgpu_vm_pasid_ida,
+				       1U << (bits - 1), 1U << bits,
+				       GFP_KERNEL);
+		if (pasid != -ENOSPC)
+			break;
+	}
+
+	return pasid;
+}
+
+/**
+ * amdgpu_vm_free_pasid - Free a PASID
+ * @pasid: PASID to free
+ */
+void amdgpu_vm_free_pasid(unsigned int pasid)
+{
+	ida_simple_remove(&amdgpu_vm_pasid_ida, pasid);
+}
+
 /*
  * GPUVM
  * GPUVM is similar to the legacy gart on older asics, however
@@ -2539,7 +2586,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, uint32_
  * Init @vm fields.
  */
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-		   int vm_context)
+		   int vm_context, unsigned int pasid)
 {
 	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
 		AMDGPU_VM_PTE_COUNT(adev) * 8);
@@ -2620,6 +2667,19 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			goto error_free_root;
 	}
 
+	if (pasid) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
+		r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1,
+			      GFP_ATOMIC);
+		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
+		if (r < 0)
+			goto error_free_root;
+
+		vm->pasid = pasid;
+	}
+
 	return 0;
 
 error_free_root:
@@ -2673,6 +2733,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
 	int i;
 
+	if (vm->pasid) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
+		idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
+		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
+	}
+
 	amd_sched_entity_fini(vm->entity.sched, &vm->entity);
 
 	if (!RB_EMPTY_ROOT(&vm->va)) {
@@ -2752,6 +2820,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 	adev->vm_manager.vm_update_mode = 0;
 #endif
 
+	idr_init(&adev->vm_manager.pasid_idr);
+	spin_lock_init(&adev->vm_manager.pasid_lock);
 }
 
 /**
@@ -2765,6 +2835,9 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 {
 	unsigned i, j;
 
+	WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
+	idr_destroy(&adev->vm_manager.pasid_idr);
+
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
 		struct amdgpu_vm_id_manager *id_mgr =
 			&adev->vm_manager.id_mgr[i];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 48c58ae4bb3a..7873dfa8c0f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -25,6 +25,7 @@
 #define __AMDGPU_VM_H__
 
 #include <linux/rbtree.h>
+#include <linux/idr.h>
 
 #include "gpu_scheduler.h"
 #include "amdgpu_sync.h"
@@ -148,8 +149,9 @@ struct amdgpu_vm {
 	/* Scheduler entity for page table updates */
 	struct amd_sched_entity	entity;
 
-	/* client id */
+	/* client id and PASID (TODO: replace client_id with PASID) */
 	u64                     client_id;
+	unsigned int		pasid;
 	/* dedicated to vm */
 	struct amdgpu_vm_id	*reserved_vmid[AMDGPU_MAX_VMHUBS];
 
@@ -220,12 +222,20 @@ struct amdgpu_vm_manager {
 	 * BIT1[= 0] Compute updated by SDMA [= 1] by CPU
 	 */
 	int					vm_update_mode;
+
+	/* PASID to VM mapping, will be used in interrupt context to
+	 * look up VM of a page fault
+	 */
+	struct idr				pasid_idr;
+	spinlock_t				pasid_lock;
 };
 
+int amdgpu_vm_alloc_pasid(unsigned int bits);
+void amdgpu_vm_free_pasid(unsigned int pasid);
 void amdgpu_vm_manager_init(struct amdgpu_device *adev);
 void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-		   int vm_context);
+		   int vm_context, unsigned int pasid);
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 94277cb734d2..f516fd10e6ba 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -112,6 +112,9 @@ struct tile_config {
  *
  * @get_max_engine_clock_in_mhz: Retrieves maximum GPU clock in MHz
  *
+ * @alloc_pasid: Allocate a PASID
+ * @free_pasid: Free a PASID
+ *
  * @program_sh_mem_settings: A function that should initiate the memory
  * properties such as main aperture memory type (cache / non cached) and
  * secondary aperture base address, size and memory type.
@@ -160,6 +163,9 @@ struct kfd2kgd_calls {
 
 	uint32_t (*get_max_engine_clock_in_mhz)(struct kgd_dev *kgd);
 
+	int (*alloc_pasid)(unsigned int bits);
+	void (*free_pasid)(unsigned int pasid);
+
 	/* Register access functions */
 	void (*program_sh_mem_settings)(struct kgd_dev *kgd, uint32_t vmid,
 			uint32_t sh_mem_config,	uint32_t sh_mem_ape1_base,
-- 
cgit v1.2.3


From a2f14820e3493145c25095873d4a510a1b25efdc Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Sat, 26 Aug 2017 02:43:06 -0400
Subject: drm/amdgpu: Track pending retry faults in IH and VM (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

IH tracks pending retry faults in a hash table for fast lookup in
interrupt context. Each VM has a short FIFO of pending VM faults for
processing in a bottom half.

The IH prescreening stage adds retry faults and filters out repeated
retry interrupts to minimize the impact of interrupt storms.

It's the VM's responsibility remove pending faults once they are
handled. For now this is only done when the VM is destroyed.

v2:
- Made the hash table smaller and the FIFO longer. I never want the
  FIFO to fill up, because that would make prescreen take longer.
  128 pending page faults should be enough to keep migrations busy.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com> (v1)
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/Kconfig                |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 76 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h | 12 ++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  7 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  7 +++
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 78 +++++++++++++++++++++++++++++++++-
 6 files changed, 180 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 1989c276138c..7fb8492d8e63 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -184,6 +184,7 @@ config DRM_AMDGPU
 	select BACKLIGHT_CLASS_DEVICE
 	select BACKLIGHT_LCD_SUPPORT
 	select INTERVAL_TREE
+	select CHASH
 	help
 	  Choose this option if you have a recent AMD Radeon graphics card.
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index c834a40cfad6..f5f27e4f0f7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -196,3 +196,79 @@ restart_ih:
 
 	return IRQ_HANDLED;
 }
+
+/**
+ * amdgpu_ih_add_fault - Add a page fault record
+ *
+ * @adev: amdgpu device pointer
+ * @key: 64-bit encoding of PASID and address
+ *
+ * This should be called when a retry page fault interrupt is
+ * received. If this is a new page fault, it will be added to a hash
+ * table. The return value indicates whether this is a new fault, or
+ * a fault that was already known and is already being handled.
+ *
+ * If there are too many pending page faults, this will fail. Retry
+ * interrupts should be ignored in this case until there is enough
+ * free space.
+ *
+ * Returns 0 if the fault was added, 1 if the fault was already known,
+ * -ENOSPC if there are too many pending faults.
+ */
+int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key)
+{
+	unsigned long flags;
+	int r = -ENOSPC;
+
+	if (WARN_ON_ONCE(!adev->irq.ih.faults))
+		/* Should be allocated in <IP>_ih_sw_init on GPUs that
+		 * support retry faults and require retry filtering.
+		 */
+		return r;
+
+	spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
+
+	/* Only let the hash table fill up to 50% for best performance */
+	if (adev->irq.ih.faults->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1)))
+		goto unlock_out;
+
+	r = chash_table_copy_in(&adev->irq.ih.faults->hash, key, NULL);
+	if (!r)
+		adev->irq.ih.faults->count++;
+
+	/* chash_table_copy_in should never fail unless we're losing count */
+	WARN_ON_ONCE(r < 0);
+
+unlock_out:
+	spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
+	return r;
+}
+
+/**
+ * amdgpu_ih_clear_fault - Remove a page fault record
+ *
+ * @adev: amdgpu device pointer
+ * @key: 64-bit encoding of PASID and address
+ *
+ * This should be called when a page fault has been handled. Any
+ * future interrupt with this key will be processed as a new
+ * page fault.
+ */
+void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key)
+{
+	unsigned long flags;
+	int r;
+
+	if (!adev->irq.ih.faults)
+		return;
+
+	spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
+
+	r = chash_table_remove(&adev->irq.ih.faults->hash, key, NULL);
+	if (!WARN_ON_ONCE(r < 0)) {
+		adev->irq.ih.faults->count--;
+		WARN_ON_ONCE(adev->irq.ih.faults->count < 0);
+	}
+
+	spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index 3de8e74e5b3a..ada89358e220 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -24,6 +24,8 @@
 #ifndef __AMDGPU_IH_H__
 #define __AMDGPU_IH_H__
 
+#include <linux/chash.h>
+
 struct amdgpu_device;
  /*
   * vega10+ IH clients
@@ -69,6 +71,13 @@ enum amdgpu_ih_clientid
 
 #define AMDGPU_IH_CLIENTID_LEGACY 0
 
+#define AMDGPU_PAGEFAULT_HASH_BITS 8
+struct amdgpu_retryfault_hashtable {
+	DECLARE_CHASH_TABLE(hash, AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
+	spinlock_t	lock;
+	int		count;
+};
+
 /*
  * R6xx+ IH ring
  */
@@ -87,6 +96,7 @@ struct amdgpu_ih_ring {
 	bool			use_doorbell;
 	bool			use_bus_addr;
 	dma_addr_t		rb_dma_addr; /* only used when use_bus_addr = true */
+	struct amdgpu_retryfault_hashtable *faults;
 };
 
 #define AMDGPU_IH_SRC_DATA_MAX_SIZE_DW 4
@@ -109,5 +119,7 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
 			bool use_bus_addr);
 void amdgpu_ih_ring_fini(struct amdgpu_device *adev);
 int amdgpu_ih_process(struct amdgpu_device *adev);
+int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key);
+void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9b795915cab1..6c1133298b17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2680,6 +2680,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		vm->pasid = pasid;
 	}
 
+	INIT_KFIFO(vm->faults);
+
 	return 0;
 
 error_free_root:
@@ -2731,8 +2733,13 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
 	struct amdgpu_bo_va_mapping *mapping, *tmp;
 	bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
+	u64 fault;
 	int i;
 
+	/* Clear pending page faults from IH when the VM is destroyed */
+	while (kfifo_get(&vm->faults, &fault))
+		amdgpu_ih_clear_fault(adev, fault);
+
 	if (vm->pasid) {
 		unsigned long flags;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 7873dfa8c0f9..447ed6e7e586 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -120,6 +120,10 @@ struct amdgpu_vm_pt {
 	unsigned			last_entry_used;
 };
 
+#define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr))
+#define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48)
+#define AMDGPU_VM_FAULT_ADDR(fault)  ((u64)(fault) & 0xfffffffff000ULL)
+
 struct amdgpu_vm {
 	/* tree of virtual addresses mapped */
 	struct rb_root		va;
@@ -160,6 +164,9 @@ struct amdgpu_vm {
 
 	/* Flag to indicate ATS support from PTE for GFX9 */
 	bool			pte_support_ats;
+
+	/* Up to 128 pending page faults */
+	DECLARE_KFIFO(faults, u64, 128);
 };
 
 struct amdgpu_vm_id {
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index eda4771e273f..dd6af2176d3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -235,8 +235,73 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev)
  */
 static bool vega10_ih_prescreen_iv(struct amdgpu_device *adev)
 {
-	/* TODO: Filter known pending page faults */
+	u32 ring_index = adev->irq.ih.rptr >> 2;
+	u32 dw0, dw3, dw4, dw5;
+	u16 pasid;
+	u64 addr, key;
+	struct amdgpu_vm *vm;
+	int r;
+
+	dw0 = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]);
+	dw3 = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
+	dw4 = le32_to_cpu(adev->irq.ih.ring[ring_index + 4]);
+	dw5 = le32_to_cpu(adev->irq.ih.ring[ring_index + 5]);
+
+	/* Filter retry page faults, let only the first one pass. If
+	 * there are too many outstanding faults, ignore them until
+	 * some faults get cleared.
+	 */
+	switch (dw0 & 0xff) {
+	case AMDGPU_IH_CLIENTID_VMC:
+	case AMDGPU_IH_CLIENTID_UTCL2:
+		break;
+	default:
+		/* Not a VM fault */
+		return true;
+	}
+
+	/* Not a retry fault */
+	if (!(dw5 & 0x80))
+		return true;
+
+	pasid = dw3 & 0xffff;
+	/* No PASID, can't identify faulting process */
+	if (!pasid)
+		return true;
+
+	addr = ((u64)(dw5 & 0xf) << 44) | ((u64)dw4 << 12);
+	key = AMDGPU_VM_FAULT(pasid, addr);
+	r = amdgpu_ih_add_fault(adev, key);
+
+	/* Hash table is full or the fault is already being processed,
+	 * ignore further page faults
+	 */
+	if (r != 0)
+		goto ignore_iv;
+
+	/* Track retry faults in per-VM fault FIFO. */
+	spin_lock(&adev->vm_manager.pasid_lock);
+	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+	spin_unlock(&adev->vm_manager.pasid_lock);
+	if (WARN_ON_ONCE(!vm)) {
+		/* VM not found, process it normally */
+		amdgpu_ih_clear_fault(adev, key);
+		return true;
+	}
+	/* No locking required with single writer and single reader */
+	r = kfifo_put(&vm->faults, key);
+	if (!r) {
+		/* FIFO is full. Ignore it until there is space */
+		amdgpu_ih_clear_fault(adev, key);
+		goto ignore_iv;
+	}
+
+	/* It's the first fault for this address, process it normally */
 	return true;
+
+ignore_iv:
+	adev->irq.ih.rptr += 32;
+	return false;
 }
 
 /**
@@ -323,6 +388,14 @@ static int vega10_ih_sw_init(void *handle)
 	adev->irq.ih.use_doorbell = true;
 	adev->irq.ih.doorbell_index = AMDGPU_DOORBELL64_IH << 1;
 
+	adev->irq.ih.faults = kmalloc(sizeof(*adev->irq.ih.faults), GFP_KERNEL);
+	if (!adev->irq.ih.faults)
+		return -ENOMEM;
+	INIT_CHASH_TABLE(adev->irq.ih.faults->hash,
+			 AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
+	spin_lock_init(&adev->irq.ih.faults->lock);
+	adev->irq.ih.faults->count = 0;
+
 	r = amdgpu_irq_init(adev);
 
 	return r;
@@ -335,6 +408,9 @@ static int vega10_ih_sw_fini(void *handle)
 	amdgpu_irq_fini(adev);
 	amdgpu_ih_ring_fini(adev);
 
+	kfree(adev->irq.ih.faults);
+	adev->irq.ih.faults = NULL;
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 7bdc53f925af085ffa0580f10489f82b36cc2f1c Mon Sep 17 00:00:00 2001
From: Yong Zhao <yong.zhao@amd.com>
Date: Fri, 15 Sep 2017 18:20:37 -0400
Subject: drm/amdgpu: Fix a bug in amdgpu_fill_buffer()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When max_bytes is not 8 bytes aligned and bo size is larger than
max_bytes, the last 8 bytes in a ttm node may be left unchanged.
For example, on pre SDMA 4.0, max_bytes = 0x1fffff, and the bo size
is 0x200000, the problem will happen.

In order to fix the problem, we separately store the max nums of
PTEs/PDEs a single operation can set in amdgpu_vm_pte_funcs
structure, rather than inferring it from bytes limit of SDMA
constant fill, i.e. fill_max_bytes.

Together with the fix, we replace the hard code value "10" in
amdgpu_vm_bo_update_mapping() with the corresponding values from
structure amdgpu_vm_pte_funcs.

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 7 +++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 5 +++--
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c   | 3 +++
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c  | 3 +++
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c  | 4 ++++
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c  | 3 +++
 drivers/gpu/drm/amd/amdgpu/si_dma.c     | 3 +++
 8 files changed, 30 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f085c8c9f267..1bf42a8ef23e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -302,6 +302,13 @@ struct amdgpu_vm_pte_funcs {
 	void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
 			  uint64_t value, unsigned count,
 			  uint32_t incr);
+
+	/* maximum nums of PTEs/PDEs in a single operation */
+	uint32_t	set_max_nums_pte_pde;
+
+	/* number of dw to reserve per operation */
+	unsigned	set_pte_pde_num_dw;
+
 	/* for linear pte/pde updates without addr mapping */
 	void (*set_pte_pde)(struct amdgpu_ib *ib,
 			    uint64_t pe,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index ce435dbbb398..1086f039d8e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1527,8 +1527,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 		       struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	/* max_bytes applies to SDMA_OP_PTEPDE as well as SDMA_OP_CONST_FILL*/
-	uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
+	uint32_t max_bytes = 8 *
+			adev->vm_manager.vm_pte_funcs->set_max_nums_pte_pde;
 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 
 	struct drm_mm_node *mm_node;
@@ -1560,8 +1560,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 		++mm_node;
 	}
 
-	/* 10 double words for each SDMA_OP_PTEPDE cmd */
-	num_dw = num_loops * 10;
+	/* num of dwords for each SDMA_OP_PTEPDE cmd */
+	num_dw = num_loops * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw;
 
 	/* for IB padding */
 	num_dw += 64;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6c1133298b17..28d16781377f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1606,10 +1606,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 	} else {
 		/* set page commands needed */
-		ndw += ncmds * 10;
+		ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw;
 
 		/* extra commands for begin/end fragments */
-		ndw += 2 * 10 * adev->vm_manager.fragment_size;
+		ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw
+				* adev->vm_manager.fragment_size;
 
 		params.func = amdgpu_vm_do_set_ptes;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index f508f4d01e4a..c64dcd1883b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -1389,6 +1389,9 @@ static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev)
 static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
 	.copy_pte = cik_sdma_vm_copy_pte,
 	.write_pte = cik_sdma_vm_write_pte,
+
+	.set_max_nums_pte_pde = 0x1fffff >> 3,
+	.set_pte_pde_num_dw = 10,
 	.set_pte_pde = cik_sdma_vm_set_pte_pde,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index f2d0710258cb..c05eb74d3404 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -1326,6 +1326,9 @@ static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev)
 static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
 	.copy_pte = sdma_v2_4_vm_copy_pte,
 	.write_pte = sdma_v2_4_vm_write_pte,
+
+	.set_max_nums_pte_pde = 0x1fffff >> 3,
+	.set_pte_pde_num_dw = 10,
 	.set_pte_pde = sdma_v2_4_vm_set_pte_pde,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 4858c9974c86..2079340656d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -1750,6 +1750,10 @@ static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev)
 static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
 	.copy_pte = sdma_v3_0_vm_copy_pte,
 	.write_pte = sdma_v3_0_vm_write_pte,
+
+	/* not 0x3fffff due to HW limitation */
+	.set_max_nums_pte_pde = 0x3fffe0 >> 3,
+	.set_pte_pde_num_dw = 10,
 	.set_pte_pde = sdma_v3_0_vm_set_pte_pde,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 3524060f8480..2605faf56dff 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1716,6 +1716,9 @@ static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev)
 static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
 	.copy_pte = sdma_v4_0_vm_copy_pte,
 	.write_pte = sdma_v4_0_vm_write_pte,
+
+	.set_max_nums_pte_pde = 0x400000 >> 3,
+	.set_pte_pde_num_dw = 10,
 	.set_pte_pde = sdma_v4_0_vm_set_pte_pde,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 112969f3301a..adb6ae7d63ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -889,6 +889,9 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev)
 static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = {
 	.copy_pte = si_dma_vm_copy_pte,
 	.write_pte = si_dma_vm_write_pte,
+
+	.set_max_nums_pte_pde = 0xffff8 >> 3,
+	.set_pte_pde_num_dw = 9,
 	.set_pte_pde = si_dma_vm_set_pte_pde,
 };
 
-- 
cgit v1.2.3


From e6d921974a51e607515b39baa8d1c3f1a27d008b Mon Sep 17 00:00:00 2001
From: Yong Zhao <yong.zhao@amd.com>
Date: Tue, 19 Sep 2017 12:58:15 -0400
Subject: drm/amdgpu: Add copy_pte_num_dw member in amdgpu_vm_pte_funcs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use it to replace the hard coded value in amdgpu_vm_bo_update_mapping().

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    | 4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c  | 2 ++
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/si_dma.c    | 2 ++
 7 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 1bf42a8ef23e..7c43add4e444 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -294,10 +294,14 @@ struct amdgpu_buffer_funcs {
 
 /* provided by hw blocks that can write ptes, e.g., sdma */
 struct amdgpu_vm_pte_funcs {
+	/* number of dw to reserve per operation */
+	unsigned	copy_pte_num_dw;
+
 	/* copy pte entries from GART */
 	void (*copy_pte)(struct amdgpu_ib *ib,
 			 uint64_t pe, uint64_t src,
 			 unsigned count);
+
 	/* write pte one entry at a time with addr mapping */
 	void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
 			  uint64_t value, unsigned count,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 28d16781377f..8fcc743dfa86 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1597,7 +1597,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 	if (pages_addr) {
 		/* copy commands needed */
-		ndw += ncmds * 7;
+		ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
 
 		/* and also PTEs */
 		ndw += nptes * 2;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index c64dcd1883b5..60cecd117705 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -1387,7 +1387,9 @@ static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev)
 }
 
 static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
+	.copy_pte_num_dw = 7,
 	.copy_pte = cik_sdma_vm_copy_pte,
+
 	.write_pte = cik_sdma_vm_write_pte,
 
 	.set_max_nums_pte_pde = 0x1fffff >> 3,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index c05eb74d3404..acdee3a4602c 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -1324,7 +1324,9 @@ static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev)
 }
 
 static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
+	.copy_pte_num_dw = 7,
 	.copy_pte = sdma_v2_4_vm_copy_pte,
+
 	.write_pte = sdma_v2_4_vm_write_pte,
 
 	.set_max_nums_pte_pde = 0x1fffff >> 3,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 2079340656d2..72f31cc7df00 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -1748,7 +1748,9 @@ static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev)
 }
 
 static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
+	.copy_pte_num_dw = 7,
 	.copy_pte = sdma_v3_0_vm_copy_pte,
+
 	.write_pte = sdma_v3_0_vm_write_pte,
 
 	/* not 0x3fffff due to HW limitation */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 2605faf56dff..61572c5d19ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1714,7 +1714,9 @@ static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev)
 }
 
 static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
+	.copy_pte_num_dw = 7,
 	.copy_pte = sdma_v4_0_vm_copy_pte,
+
 	.write_pte = sdma_v4_0_vm_write_pte,
 
 	.set_max_nums_pte_pde = 0x400000 >> 3,
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index adb6ae7d63ef..3fa2fbf8c9a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -887,7 +887,9 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev)
 }
 
 static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = {
+	.copy_pte_num_dw = 5,
 	.copy_pte = si_dma_vm_copy_pte,
+
 	.write_pte = si_dma_vm_write_pte,
 
 	.set_max_nums_pte_pde = 0xffff8 >> 3,
-- 
cgit v1.2.3


From c98171ccf6580407d07a3b5dc8188ce9e1f4f7ca Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Thu, 21 Sep 2017 16:26:41 -0400
Subject: drm/amdgpu: Handle GPUVM fault storms

When many wavefronts cause VM faults at the same time, it can
overwhelm the interrupt handler and cause IH ring overflows before
the driver can notify or kill the faulting application.

As a workaround I'm introducing limited per-VM fault credit. After
that number of VM faults have occurred, further VM faults are
filtered out at the prescreen stage of processing.

This depends on the PASID in the interrupt packet, so it currently
only works for KFD contexts.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 31 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  7 ++++++-
 drivers/gpu/drm/amd/amdgpu/cik_ih.c     | 19 +++++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/cz_ih.c      | 19 +++++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/iceland_ih.c | 19 +++++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/tonga_ih.c   | 19 +++++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c  | 11 +++++++----
 7 files changed, 112 insertions(+), 13 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8fcc743dfa86..c91d5c7a273d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2682,6 +2682,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	}
 
 	INIT_KFIFO(vm->faults);
+	vm->fault_credit = 16;
 
 	return 0;
 
@@ -2775,6 +2776,36 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		amdgpu_vm_free_reserved_vmid(adev, vm, i);
 }
 
+/**
+ * amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: PASID do identify the VM
+ *
+ * This function is expected to be called in interrupt context. Returns
+ * true if there was fault credit, false otherwise
+ */
+bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
+				  unsigned int pasid)
+{
+	struct amdgpu_vm *vm;
+
+	spin_lock(&adev->vm_manager.pasid_lock);
+	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+	spin_unlock(&adev->vm_manager.pasid_lock);
+	if (!vm)
+		/* VM not found, can't track fault credit */
+		return true;
+
+	/* No lock needed. only accessed by IRQ handler */
+	if (!vm->fault_credit)
+		/* Too many faults in this VM */
+		return false;
+
+	vm->fault_credit--;
+	return true;
+}
+
 /**
  * amdgpu_vm_manager_init - init the VM manager
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 447ed6e7e586..66efbc2e43af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -165,8 +165,11 @@ struct amdgpu_vm {
 	/* Flag to indicate ATS support from PTE for GFX9 */
 	bool			pte_support_ats;
 
-	/* Up to 128 pending page faults */
+	/* Up to 128 pending retry page faults */
 	DECLARE_KFIFO(faults, u64, 128);
+
+	/* Limit non-retry fault storms */
+	unsigned int		fault_credit;
 };
 
 struct amdgpu_vm_id {
@@ -244,6 +247,8 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		   int vm_context, unsigned int pasid);
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
+				  unsigned int pasid);
 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
 			 struct amdgpu_bo_list_entry *entry);
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index 07d3d895da10..a870b354e3f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -237,8 +237,23 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev)
  */
 static bool cik_ih_prescreen_iv(struct amdgpu_device *adev)
 {
-	/* Process all interrupts */
-	return true;
+	u32 ring_index = adev->irq.ih.rptr >> 2;
+	u16 pasid;
+
+	switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
+	case 146:
+	case 147:
+		pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
+		if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
+			return true;
+		break;
+	default:
+		/* Not a VM fault */
+		return true;
+	}
+
+	adev->irq.ih.rptr += 16;
+	return false;
 }
 
  /**
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index b6cdf4afaf46..fa61d649bb44 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -216,8 +216,23 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev)
  */
 static bool cz_ih_prescreen_iv(struct amdgpu_device *adev)
 {
-	/* Process all interrupts */
-	return true;
+	u32 ring_index = adev->irq.ih.rptr >> 2;
+	u16 pasid;
+
+	switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
+	case 146:
+	case 147:
+		pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
+		if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
+			return true;
+		break;
+	default:
+		/* Not a VM fault */
+		return true;
+	}
+
+	adev->irq.ih.rptr += 16;
+	return false;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index 65ed6d3a8f05..bd592cb39f37 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -216,8 +216,23 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev)
  */
 static bool iceland_ih_prescreen_iv(struct amdgpu_device *adev)
 {
-	/* Process all interrupts */
-	return true;
+	u32 ring_index = adev->irq.ih.rptr >> 2;
+	u16 pasid;
+
+	switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
+	case 146:
+	case 147:
+		pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
+		if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
+			return true;
+		break;
+	default:
+		/* Not a VM fault */
+		return true;
+	}
+
+	adev->irq.ih.rptr += 16;
+	return false;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index 5ed00692618e..aa4e320e31f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -227,8 +227,23 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev)
  */
 static bool tonga_ih_prescreen_iv(struct amdgpu_device *adev)
 {
-	/* Process all interrupts */
-	return true;
+	u32 ring_index = adev->irq.ih.rptr >> 2;
+	u16 pasid;
+
+	switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
+	case 146:
+	case 147:
+		pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
+		if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
+			return true;
+		break;
+	default:
+		/* Not a VM fault */
+		return true;
+	}
+
+	adev->irq.ih.rptr += 16;
+	return false;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index a3b30d84dbb3..697325737ba8 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -260,15 +260,18 @@ static bool vega10_ih_prescreen_iv(struct amdgpu_device *adev)
 		return true;
 	}
 
-	/* Not a retry fault */
-	if (!(dw5 & 0x80))
-		return true;
-
 	pasid = dw3 & 0xffff;
 	/* No PASID, can't identify faulting process */
 	if (!pasid)
 		return true;
 
+	/* Not a retry fault, check fault credit */
+	if (!(dw5 & 0x80)) {
+		if (!amdgpu_vm_pasid_fault_credit(adev, pasid))
+			goto ignore_iv;
+		return true;
+	}
+
 	addr = ((u64)(dw5 & 0xf) << 44) | ((u64)dw4 << 12);
 	key = AMDGPU_VM_FAULT(pasid, addr);
 	r = amdgpu_ih_add_fault(adev, key);
-- 
cgit v1.2.3


From c38e0691107d14a8a39e9be7cb46c312bdfbd22d Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 18 Sep 2017 14:01:45 +0200
Subject: drm/amdgpu: minor coding style fix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix two minor 80 char issues.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c91d5c7a273d..bca9eebb6947 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2541,7 +2541,8 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
  * @adev: amdgpu_device pointer
  * @fragment_size_default: the default fragment size if it's set auto
  */
-void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, uint32_t fragment_size_default)
+void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev,
+				 uint32_t fragment_size_default)
 {
 	if (amdgpu_vm_fragment_size == -1)
 		adev->vm_manager.fragment_size = fragment_size_default;
@@ -2555,7 +2556,8 @@ void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, uint32_t fragment_s
  * @adev: amdgpu_device pointer
  * @vm_size: the default vm size if it's set auto
  */
-void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, uint32_t fragment_size_default)
+void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size,
+			   uint32_t fragment_size_default)
 {
 	/* adjust vm size firstly */
 	if (amdgpu_vm_size == -1)
-- 
cgit v1.2.3


From 6d16dac85c081825af58111023428c43d1da7e1a Mon Sep 17 00:00:00 2001
From: Yong Zhao <Yong.Zhao@amd.com>
Date: Thu, 31 Aug 2017 15:55:00 -0400
Subject: drm/amdgpu: Set the correct value for PDEs/PTEs of ATC memory on
 Raven
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Without the additional bits set in PDEs/PTEs, the ATC memory access
would have failed on Raven.

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  9 ++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 10 ++++++++++
 2 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index fee0a32ac56f..b500bb6a8491 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -328,9 +328,10 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 				AMDGPU_GEM_CREATE_SHADOW);
 
 	if (vm->pte_support_ats) {
-		init_value = AMDGPU_PTE_SYSTEM;
+		init_value = AMDGPU_PTE_DEFAULT_ATC;
 		if (level != adev->vm_manager.num_level - 1)
 			init_value |= AMDGPU_PDE_PTE;
+
 	}
 
 	/* walk over the address space and allocate the page tables */
@@ -2017,7 +2018,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 		list_del(&mapping->list);
 
 		if (vm->pte_support_ats)
-			init_pte_value = AMDGPU_PTE_SYSTEM;
+			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
 
 		r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
 						mapping->start, mapping->last,
@@ -2629,7 +2630,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 		if (adev->asic_type == CHIP_RAVEN) {
 			vm->pte_support_ats = true;
-			init_pde_value = AMDGPU_PTE_SYSTEM | AMDGPU_PDE_PTE;
+			init_pde_value = AMDGPU_PTE_DEFAULT_ATC
+					| AMDGPU_PDE_PTE;
+
 		}
 	} else
 		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d68f39b4e5e7..aa914256b4bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -73,6 +73,16 @@ struct amdgpu_bo_list_entry;
 #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
 #define AMDGPU_PTE_MTYPE_MASK	AMDGPU_PTE_MTYPE(3ULL)
 
+/* For Raven */
+#define AMDGPU_MTYPE_CC 2
+
+#define AMDGPU_PTE_DEFAULT_ATC  (AMDGPU_PTE_SYSTEM      \
+                                | AMDGPU_PTE_SNOOPED    \
+                                | AMDGPU_PTE_EXECUTABLE \
+                                | AMDGPU_PTE_READABLE   \
+                                | AMDGPU_PTE_WRITEABLE  \
+                                | AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_CC))
+
 /* How to programm VM fault handling */
 #define AMDGPU_VM_FAULT_STOP_NEVER	0
 #define AMDGPU_VM_FAULT_STOP_FIRST	1
-- 
cgit v1.2.3


From 9fc8fc709b356c85034cbcb3b84c9d8b77865f52 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 18 Sep 2017 13:58:30 +0200
Subject: drm/amdgpu: add VM support for huge pages v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Convert GTT mappings into linear ones for huge page handling.

v2: use fragment size as minimum for linear conversion

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b500bb6a8491..eb4a01c14eee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1699,6 +1699,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 				      struct drm_mm_node *nodes,
 				      struct dma_fence **fence)
 {
+	unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size;
 	uint64_t pfn, start = mapping->start;
 	int r;
 
@@ -1733,6 +1734,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 	}
 
 	do {
+		dma_addr_t *dma_addr = NULL;
 		uint64_t max_entries;
 		uint64_t addr, last;
 
@@ -1746,15 +1748,32 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		}
 
 		if (pages_addr) {
+			uint64_t count;
+
 			max_entries = min(max_entries, 16ull * 1024ull);
-			addr = 0;
+			for (count = 1; count < max_entries; ++count) {
+				uint64_t idx = pfn + count;
+
+				if (pages_addr[idx] !=
+				    (pages_addr[idx - 1] + PAGE_SIZE))
+					break;
+			}
+
+			if (count < min_linear_pages) {
+				addr = pfn << PAGE_SHIFT;
+				dma_addr = pages_addr;
+			} else {
+				addr = pages_addr[pfn];
+				max_entries = count;
+			}
+
 		} else if (flags & AMDGPU_PTE_VALID) {
 			addr += adev->vm_manager.vram_base_offset;
+			addr += pfn << PAGE_SHIFT;
 		}
-		addr += pfn << PAGE_SHIFT;
 
 		last = min((uint64_t)mapping->last, start + max_entries - 1);
-		r = amdgpu_vm_bo_update_mapping(adev, exclusive, pages_addr, vm,
+		r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm,
 						start, last, flags, addr,
 						fence);
 		if (r)
-- 
cgit v1.2.3


From 177ae09b5d699a5ebd1cafcee78889db968abf54 Mon Sep 17 00:00:00 2001
From: Andres Rodriguez <andresx7@gmail.com>
Date: Fri, 15 Sep 2017 20:44:06 -0400
Subject: drm/amdgpu: introduce AMDGPU_GEM_CREATE_EXPLICIT_SYNC v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce a flag to signal that access to a BO will be synchronized
through an external mechanism.

Currently all buffers shared between contexts are subject to implicit
synchronization. However, this is only required for protocols that
currently don't support an explicit synchronization mechanism (DRI2/3).

This patch introduces the AMDGPU_GEM_CREATE_EXPLICIT_SYNC, so that
users can specify when it is safe to disable implicit sync.

v2: only disable explicit sync in amdgpu_cs_ioctl

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c    | 4 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 8 ++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c   | 7 +++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h   | 3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 8 ++++----
 include/uapi/drm/amdgpu_drm.h              | 2 ++
 8 files changed, 29 insertions(+), 11 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index ab83dfcabb41..38027a00f8ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -705,7 +705,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
 
 	list_for_each_entry(e, &p->validated, tv.head) {
 		struct reservation_object *resv = e->robj->tbo.resv;
-		r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp);
+		r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,
+				     amdgpu_bo_explicit_sync(e->robj));
 
 		if (r)
 			return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index b0d45c8e6bb3..21e99366cab3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -212,7 +212,9 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 		      AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
 		      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
 		      AMDGPU_GEM_CREATE_VRAM_CLEARED |
-		      AMDGPU_GEM_CREATE_VM_ALWAYS_VALID))
+		      AMDGPU_GEM_CREATE_VM_ALWAYS_VALID |
+		      AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
+
 		return -EINVAL;
 
 	/* reject invalid gem domains */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index c26ef53604af..428aae048f4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -193,6 +193,14 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo)
 	}
 }
 
+/**
+ * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
+ */
+static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
+{
+	return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
+}
+
 int amdgpu_bo_create(struct amdgpu_device *adev,
 			    unsigned long size, int byte_align,
 			    bool kernel, u32 domain, u64 flags,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index c586f44312f9..a4bf21f8f1c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -169,14 +169,14 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
  *
  * @sync: sync object to add fences from reservation object to
  * @resv: reservation object with embedded fence
- * @shared: true if we should only sync to the exclusive fence
+ * @explicit_sync: true if we should only sync to the exclusive fence
  *
  * Sync to the fence
  */
 int amdgpu_sync_resv(struct amdgpu_device *adev,
 		     struct amdgpu_sync *sync,
 		     struct reservation_object *resv,
-		     void *owner)
+		     void *owner, bool explicit_sync)
 {
 	struct reservation_object_list *flist;
 	struct dma_fence *f;
@@ -191,6 +191,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 	f = reservation_object_get_excl(resv);
 	r = amdgpu_sync_fence(adev, sync, f);
 
+	if (explicit_sync)
+		return r;
+
 	flist = reservation_object_get_list(resv);
 	if (!flist || r)
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
index dc7687993317..70d7e3a279a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -45,7 +45,8 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 int amdgpu_sync_resv(struct amdgpu_device *adev,
 		     struct amdgpu_sync *sync,
 		     struct reservation_object *resv,
-		     void *owner);
+		     void *owner,
+		     bool explicit_sync);
 struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
 				     struct amdgpu_ring *ring);
 struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 10952c3e5eb6..a2282bacf960 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1489,7 +1489,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 	job->vm_needs_flush = vm_needs_flush;
 	if (resv) {
 		r = amdgpu_sync_resv(adev, &job->sync, resv,
-				     AMDGPU_FENCE_OWNER_UNDEFINED);
+				     AMDGPU_FENCE_OWNER_UNDEFINED,
+				     false);
 		if (r) {
 			DRM_ERROR("sync failed (%d).\n", r);
 			goto error_free;
@@ -1581,7 +1582,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 
 	if (resv) {
 		r = amdgpu_sync_resv(adev, &job->sync, resv,
-				     AMDGPU_FENCE_OWNER_UNDEFINED);
+				     AMDGPU_FENCE_OWNER_UNDEFINED, false);
 		if (r) {
 			DRM_ERROR("sync failed (%d).\n", r);
 			goto error_free;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index eb4a01c14eee..c559d76ff695 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1035,7 +1035,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	int r;
 
 	amdgpu_sync_create(&sync);
-	amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner);
+	amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false);
 	r = amdgpu_sync_wait(&sync, true);
 	amdgpu_sync_free(&sync);
 
@@ -1176,11 +1176,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 			amdgpu_ring_pad_ib(ring, params.ib);
 			amdgpu_sync_resv(adev, &job->sync,
 					 parent->base.bo->tbo.resv,
-					 AMDGPU_FENCE_OWNER_VM);
+					 AMDGPU_FENCE_OWNER_VM, false);
 			if (shadow)
 				amdgpu_sync_resv(adev, &job->sync,
 						 shadow->tbo.resv,
-						 AMDGPU_FENCE_OWNER_VM);
+						 AMDGPU_FENCE_OWNER_VM, false);
 
 			WARN_ON(params.ib->length_dw > ndw);
 			r = amdgpu_job_submit(job, ring, &vm->entity,
@@ -1644,7 +1644,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		goto error_free;
 
 	r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv,
-			     owner);
+			     owner, false);
 	if (r)
 		goto error_free;
 
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 4c6e8c482ee4..b62484af8ccb 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -91,6 +91,8 @@ extern "C" {
 #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)
 /* Flag that BO is always valid in this VM */
 #define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID	(1 << 6)
+/* Flag that BO sharing will be explicitly synchronized */
+#define AMDGPU_GEM_CREATE_EXPLICIT_SYNC		(1 << 7)
 
 struct drm_amdgpu_gem_create_in  {
 	/** the requested memory size */
-- 
cgit v1.2.3


From 2642cf110d08a403f585a051e4cbf45a90b3adea Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 13 Oct 2017 17:24:31 +0200
Subject: drm/amdgpu: reserve root PD while releasing it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Otherwise somebody could try to evict it at the same time and try to use
half torn down structures.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-and-Tested-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c559d76ff695..010d14195a5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2759,8 +2759,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
 	struct amdgpu_bo_va_mapping *mapping, *tmp;
 	bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
+	struct amdgpu_bo *root;
 	u64 fault;
-	int i;
+	int i, r;
 
 	/* Clear pending page faults from IH when the VM is destroyed */
 	while (kfifo_get(&vm->faults, &fault))
@@ -2795,7 +2796,15 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
 	}
 
-	amdgpu_vm_free_levels(&vm->root);
+	root = amdgpu_bo_ref(vm->root.base.bo);
+	r = amdgpu_bo_reserve(root, true);
+	if (r) {
+		dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
+	} else {
+		amdgpu_vm_free_levels(&vm->root);
+		amdgpu_bo_unreserve(root);
+	}
+	amdgpu_bo_unref(&root);
 	dma_fence_put(vm->last_update);
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
 		amdgpu_vm_free_reserved_vmid(adev, vm, i);
-- 
cgit v1.2.3


From 78aa02c713fcf19e9bc8511ab61a5fd6c877cc01 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 30 Sep 2017 11:14:13 +0300
Subject: drm/amdgpu: Potential uninitialized variable in
 amdgpu_vm_update_directories()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After commit ea09729c9302 ("drm/amdgpu: rework page directory filling
v2") then it becomes a lot harder to verify that "r" is initialized.  My
static checker complains and so I've reviewed the code.  It does look
like it might be buggy... Anyway, it doesn't hurt to set "r" to zero
at the start.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 010d14195a5e..c8c26f21993c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1244,7 +1244,7 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 				 struct amdgpu_vm *vm)
 {
-	int r;
+	int r = 0;
 
 	spin_lock(&vm->status_lock);
 	while (!list_empty(&vm->relocated)) {
-- 
cgit v1.2.3