AMD-GFX Archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdgpu: Unmap only clear the page table leaves
@ 2024-02-01 16:50 Philip Yang
  2024-02-01 16:50 ` [PATCH 2/2] drm/amdgpu: Improve huge page mapping update Philip Yang
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Philip Yang @ 2024-02-01 16:50 UTC (permalink / raw
  To: amd-gfx; +Cc: Felix.Kuehling, christian.koenig, Philip Yang

SVM migration unmap pages from GPU and then update mapping to GPU to
recover page fault. Currently unmap clears the PDE entry for range
length >= huge page and free PTB bo, update mapping to alloc new PT bo.
There is race bug that the freed entry bo maybe still on the pt_free
list, reused when updating mapping and then freed, leave invalid PDE
entry and cause GPU page fault.

By setting the update to clear only one PDE entry or clear PTB, to
avoid unmap to free PTE bo. This fixes the race bug and improve the
unmap and map to GPU performance. Update mapping to huge page will
still free the PTB bo.

With this change, the vm->pt_freed list and work is not needed. Add
WARN_ON(unlocked) in amdgpu_vm_pt_free_dfs to catch if unmap to free the
PTB.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  4 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  4 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 43 ++++++-----------------
 3 files changed, 10 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 82e5fd66a10d..3bde77dfc63f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2256,8 +2256,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	spin_lock_init(&vm->status_lock);
 	INIT_LIST_HEAD(&vm->freed);
 	INIT_LIST_HEAD(&vm->done);
-	INIT_LIST_HEAD(&vm->pt_freed);
-	INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
 	INIT_KFIFO(vm->faults);
 
 	r = amdgpu_vm_init_entities(adev, vm);
@@ -2446,8 +2444,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 
 	amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
 
-	flush_work(&vm->pt_free_work);
-
 	root = amdgpu_bo_ref(vm->root.bo);
 	amdgpu_bo_reserve(root, true);
 	amdgpu_vm_set_pasid(adev, vm, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index cdb61f1e7c35..74fe211b9ecd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -316,10 +316,6 @@ struct amdgpu_vm {
 	/* BOs which are invalidated, has been updated in the PTs */
 	struct list_head        done;
 
-	/* PT BOs scheduled to free and fill with zero if vm_resv is not hold */
-	struct list_head	pt_freed;
-	struct work_struct	pt_free_work;
-
 	/* contains the page directory */
 	struct amdgpu_vm_bo_base     root;
 	struct dma_fence	*last_update;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index a160265ddc07..a3d609655ce3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -657,27 +657,6 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
 	amdgpu_bo_unref(&entry->bo);
 }
 
-void amdgpu_vm_pt_free_work(struct work_struct *work)
-{
-	struct amdgpu_vm_bo_base *entry, *next;
-	struct amdgpu_vm *vm;
-	LIST_HEAD(pt_freed);
-
-	vm = container_of(work, struct amdgpu_vm, pt_free_work);
-
-	spin_lock(&vm->status_lock);
-	list_splice_init(&vm->pt_freed, &pt_freed);
-	spin_unlock(&vm->status_lock);
-
-	/* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */
-	amdgpu_bo_reserve(vm->root.bo, true);
-
-	list_for_each_entry_safe(entry, next, &pt_freed, vm_status)
-		amdgpu_vm_pt_free(entry);
-
-	amdgpu_bo_unreserve(vm->root.bo);
-}
-
 /**
  * amdgpu_vm_pt_free_dfs - free PD/PT levels
  *
@@ -696,17 +675,7 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
 	struct amdgpu_vm_pt_cursor cursor;
 	struct amdgpu_vm_bo_base *entry;
 
-	if (unlocked) {
-		spin_lock(&vm->status_lock);
-		for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
-			list_move(&entry->vm_status, &vm->pt_freed);
-
-		if (start)
-			list_move(&start->entry->vm_status, &vm->pt_freed);
-		spin_unlock(&vm->status_lock);
-		schedule_work(&vm->pt_free_work);
-		return;
-	}
+	WARN_ON(unlocked);
 
 	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
 		amdgpu_vm_pt_free(entry);
@@ -1009,7 +978,15 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
 		incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
 		mask = amdgpu_vm_pt_entries_mask(adev, cursor.level);
 		pe_start = ((cursor.pfn >> shift) & mask) * 8;
-		entry_end = ((uint64_t)mask + 1) << shift;
+
+		if (cursor.level < AMDGPU_VM_PTB && params->unlocked)
+			/*
+			 * Unmap to clear one PDE entry, to avoid unmap to free
+			 * PTB using pt_free work which has race condition.
+			 */
+			entry_end = 1ULL << shift;
+		else
+			entry_end = ((uint64_t)mask + 1) << shift;
 		entry_end += cursor.pfn & ~(entry_end - 1);
 		entry_end = min(entry_end, end);
 
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] drm/amdgpu: Improve huge page mapping update
  2024-02-01 16:50 [PATCH 1/2] drm/amdgpu: Unmap only clear the page table leaves Philip Yang
@ 2024-02-01 16:50 ` Philip Yang
  2024-02-02 23:50 ` [PATCH 1/2] drm/amdgpu: Unmap only clear the page table leaves Felix Kuehling
  2024-02-07 14:47 ` Christian König
  2 siblings, 0 replies; 4+ messages in thread
From: Philip Yang @ 2024-02-01 16:50 UTC (permalink / raw
  To: amd-gfx; +Cc: Felix.Kuehling, christian.koenig, Philip Yang

Update huge page mapping, ex 2MB address and size aligned, we alloc PTB
bo, and then free the PTB bo after updating PDE0 as PTE.

If fragment size >= parent_shift, don't alloc PT bo, because we will
update PDE entry, this will improve the huge page mapping update
by removing the extra PTB bo alloc and free.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index a3d609655ce3..ef3ef03e50ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -916,7 +916,11 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
 		uint64_t incr, entry_end, pe_start;
 		struct amdgpu_bo *pt;
 
-		if (!params->unlocked) {
+		shift = amdgpu_vm_pt_level_shift(adev, cursor.level);
+		parent_shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 1);
+
+		if (!params->unlocked &&
+		    (adev->asic_type < CHIP_VEGA10 || frag < parent_shift)) {
 			/* make sure that the page tables covering the
 			 * address range are actually allocated
 			 */
@@ -926,8 +930,6 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
 				return r;
 		}
 
-		shift = amdgpu_vm_pt_level_shift(adev, cursor.level);
-		parent_shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 1);
 		if (params->unlocked) {
 			/* Unlocked updates are only allowed on the leaves */
 			if (amdgpu_vm_pt_descendant(adev, &cursor))
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: Unmap only clear the page table leaves
  2024-02-01 16:50 [PATCH 1/2] drm/amdgpu: Unmap only clear the page table leaves Philip Yang
  2024-02-01 16:50 ` [PATCH 2/2] drm/amdgpu: Improve huge page mapping update Philip Yang
@ 2024-02-02 23:50 ` Felix Kuehling
  2024-02-07 14:47 ` Christian König
  2 siblings, 0 replies; 4+ messages in thread
From: Felix Kuehling @ 2024-02-02 23:50 UTC (permalink / raw
  To: Philip Yang, amd-gfx; +Cc: christian.koenig


On 2024-02-01 11:50, Philip Yang wrote:
> SVM migration unmap pages from GPU and then update mapping to GPU to
> recover page fault. Currently unmap clears the PDE entry for range
> length >= huge page and free PTB bo, update mapping to alloc new PT bo.
> There is race bug that the freed entry bo maybe still on the pt_free
> list, reused when updating mapping and then freed, leave invalid PDE
> entry and cause GPU page fault.
>
> By setting the update to clear only one PDE entry or clear PTB, to
> avoid unmap to free PTE bo. This fixes the race bug and improve the
> unmap and map to GPU performance. Update mapping to huge page will
> still free the PTB bo.
>
> With this change, the vm->pt_freed list and work is not needed. Add
> WARN_ON(unlocked) in amdgpu_vm_pt_free_dfs to catch if unmap to free the
> PTB.
>
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>

As we discussed offline, I think this is the wrong approach. This can 
lead to resource leaks when lots of virtual address space is released 
bug the page tables remain allocated indefinitely.

I think we need some solution that either

* prevents reuse of page tables that are about to be free
* prevents reused page tables from being freed by the worker (e.g. a ref 
count or cancelling the work)

Regards,
   Felix


> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  4 ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  4 ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 43 ++++++-----------------
>   3 files changed, 10 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 82e5fd66a10d..3bde77dfc63f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2256,8 +2256,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	spin_lock_init(&vm->status_lock);
>   	INIT_LIST_HEAD(&vm->freed);
>   	INIT_LIST_HEAD(&vm->done);
> -	INIT_LIST_HEAD(&vm->pt_freed);
> -	INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
>   	INIT_KFIFO(vm->faults);
>   
>   	r = amdgpu_vm_init_entities(adev, vm);
> @@ -2446,8 +2444,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   
>   	amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
>   
> -	flush_work(&vm->pt_free_work);
> -
>   	root = amdgpu_bo_ref(vm->root.bo);
>   	amdgpu_bo_reserve(root, true);
>   	amdgpu_vm_set_pasid(adev, vm, 0);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index cdb61f1e7c35..74fe211b9ecd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -316,10 +316,6 @@ struct amdgpu_vm {
>   	/* BOs which are invalidated, has been updated in the PTs */
>   	struct list_head        done;
>   
> -	/* PT BOs scheduled to free and fill with zero if vm_resv is not hold */
> -	struct list_head	pt_freed;
> -	struct work_struct	pt_free_work;
> -
>   	/* contains the page directory */
>   	struct amdgpu_vm_bo_base     root;
>   	struct dma_fence	*last_update;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> index a160265ddc07..a3d609655ce3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> @@ -657,27 +657,6 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
>   	amdgpu_bo_unref(&entry->bo);
>   }
>   
> -void amdgpu_vm_pt_free_work(struct work_struct *work)
> -{
> -	struct amdgpu_vm_bo_base *entry, *next;
> -	struct amdgpu_vm *vm;
> -	LIST_HEAD(pt_freed);
> -
> -	vm = container_of(work, struct amdgpu_vm, pt_free_work);
> -
> -	spin_lock(&vm->status_lock);
> -	list_splice_init(&vm->pt_freed, &pt_freed);
> -	spin_unlock(&vm->status_lock);
> -
> -	/* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */
> -	amdgpu_bo_reserve(vm->root.bo, true);
> -
> -	list_for_each_entry_safe(entry, next, &pt_freed, vm_status)
> -		amdgpu_vm_pt_free(entry);
> -
> -	amdgpu_bo_unreserve(vm->root.bo);
> -}
> -
>   /**
>    * amdgpu_vm_pt_free_dfs - free PD/PT levels
>    *
> @@ -696,17 +675,7 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
>   	struct amdgpu_vm_pt_cursor cursor;
>   	struct amdgpu_vm_bo_base *entry;
>   
> -	if (unlocked) {
> -		spin_lock(&vm->status_lock);
> -		for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
> -			list_move(&entry->vm_status, &vm->pt_freed);
> -
> -		if (start)
> -			list_move(&start->entry->vm_status, &vm->pt_freed);
> -		spin_unlock(&vm->status_lock);
> -		schedule_work(&vm->pt_free_work);
> -		return;
> -	}
> +	WARN_ON(unlocked);
>   
>   	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
>   		amdgpu_vm_pt_free(entry);
> @@ -1009,7 +978,15 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
>   		incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
>   		mask = amdgpu_vm_pt_entries_mask(adev, cursor.level);
>   		pe_start = ((cursor.pfn >> shift) & mask) * 8;
> -		entry_end = ((uint64_t)mask + 1) << shift;
> +
> +		if (cursor.level < AMDGPU_VM_PTB && params->unlocked)
> +			/*
> +			 * Unmap to clear one PDE entry, to avoid unmap to free
> +			 * PTB using pt_free work which has race condition.
> +			 */
> +			entry_end = 1ULL << shift;
> +		else
> +			entry_end = ((uint64_t)mask + 1) << shift;
>   		entry_end += cursor.pfn & ~(entry_end - 1);
>   		entry_end = min(entry_end, end);
>   

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: Unmap only clear the page table leaves
  2024-02-01 16:50 [PATCH 1/2] drm/amdgpu: Unmap only clear the page table leaves Philip Yang
  2024-02-01 16:50 ` [PATCH 2/2] drm/amdgpu: Improve huge page mapping update Philip Yang
  2024-02-02 23:50 ` [PATCH 1/2] drm/amdgpu: Unmap only clear the page table leaves Felix Kuehling
@ 2024-02-07 14:47 ` Christian König
  2 siblings, 0 replies; 4+ messages in thread
From: Christian König @ 2024-02-07 14:47 UTC (permalink / raw
  To: Philip Yang, amd-gfx; +Cc: Felix.Kuehling, christian.koenig

Am 01.02.24 um 17:50 schrieb Philip Yang:
> SVM migration unmap pages from GPU and then update mapping to GPU to
> recover page fault. Currently unmap clears the PDE entry for range
> length >= huge page and free PTB bo, update mapping to alloc new PT bo.
> There is race bug that the freed entry bo maybe still on the pt_free
> list, reused when updating mapping and then freed, leave invalid PDE
> entry and cause GPU page fault.

Well that race here is not clear to me. Can you explain more what's 
going on and why we should change the VM code to avoid this situation?

>
> By setting the update to clear only one PDE entry or clear PTB, to
> avoid unmap to free PTE bo. This fixes the race bug and improve the
> unmap and map to GPU performance. Update mapping to huge page will
> still free the PTB bo.

I would only do this if we don't have any other way to clean this up 
since that is a really ugly workaround for this issue.

Regards,
Christian.

>
> With this change, the vm->pt_freed list and work is not needed. Add
> WARN_ON(unlocked) in amdgpu_vm_pt_free_dfs to catch if unmap to free the
> PTB.
>
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  4 ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  4 ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 43 ++++++-----------------
>   3 files changed, 10 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 82e5fd66a10d..3bde77dfc63f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2256,8 +2256,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	spin_lock_init(&vm->status_lock);
>   	INIT_LIST_HEAD(&vm->freed);
>   	INIT_LIST_HEAD(&vm->done);
> -	INIT_LIST_HEAD(&vm->pt_freed);
> -	INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
>   	INIT_KFIFO(vm->faults);
>   
>   	r = amdgpu_vm_init_entities(adev, vm);
> @@ -2446,8 +2444,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   
>   	amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
>   
> -	flush_work(&vm->pt_free_work);
> -
>   	root = amdgpu_bo_ref(vm->root.bo);
>   	amdgpu_bo_reserve(root, true);
>   	amdgpu_vm_set_pasid(adev, vm, 0);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index cdb61f1e7c35..74fe211b9ecd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -316,10 +316,6 @@ struct amdgpu_vm {
>   	/* BOs which are invalidated, has been updated in the PTs */
>   	struct list_head        done;
>   
> -	/* PT BOs scheduled to free and fill with zero if vm_resv is not hold */
> -	struct list_head	pt_freed;
> -	struct work_struct	pt_free_work;
> -
>   	/* contains the page directory */
>   	struct amdgpu_vm_bo_base     root;
>   	struct dma_fence	*last_update;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> index a160265ddc07..a3d609655ce3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> @@ -657,27 +657,6 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
>   	amdgpu_bo_unref(&entry->bo);
>   }
>   
> -void amdgpu_vm_pt_free_work(struct work_struct *work)
> -{
> -	struct amdgpu_vm_bo_base *entry, *next;
> -	struct amdgpu_vm *vm;
> -	LIST_HEAD(pt_freed);
> -
> -	vm = container_of(work, struct amdgpu_vm, pt_free_work);
> -
> -	spin_lock(&vm->status_lock);
> -	list_splice_init(&vm->pt_freed, &pt_freed);
> -	spin_unlock(&vm->status_lock);
> -
> -	/* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */
> -	amdgpu_bo_reserve(vm->root.bo, true);
> -
> -	list_for_each_entry_safe(entry, next, &pt_freed, vm_status)
> -		amdgpu_vm_pt_free(entry);
> -
> -	amdgpu_bo_unreserve(vm->root.bo);
> -}
> -
>   /**
>    * amdgpu_vm_pt_free_dfs - free PD/PT levels
>    *
> @@ -696,17 +675,7 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
>   	struct amdgpu_vm_pt_cursor cursor;
>   	struct amdgpu_vm_bo_base *entry;
>   
> -	if (unlocked) {
> -		spin_lock(&vm->status_lock);
> -		for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
> -			list_move(&entry->vm_status, &vm->pt_freed);
> -
> -		if (start)
> -			list_move(&start->entry->vm_status, &vm->pt_freed);
> -		spin_unlock(&vm->status_lock);
> -		schedule_work(&vm->pt_free_work);
> -		return;
> -	}
> +	WARN_ON(unlocked);
>   
>   	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
>   		amdgpu_vm_pt_free(entry);
> @@ -1009,7 +978,15 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
>   		incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
>   		mask = amdgpu_vm_pt_entries_mask(adev, cursor.level);
>   		pe_start = ((cursor.pfn >> shift) & mask) * 8;
> -		entry_end = ((uint64_t)mask + 1) << shift;
> +
> +		if (cursor.level < AMDGPU_VM_PTB && params->unlocked)
> +			/*
> +			 * Unmap to clear one PDE entry, to avoid unmap to free
> +			 * PTB using pt_free work which has race condition.
> +			 */
> +			entry_end = 1ULL << shift;
> +		else
> +			entry_end = ((uint64_t)mask + 1) << shift;
>   		entry_end += cursor.pfn & ~(entry_end - 1);
>   		entry_end = min(entry_end, end);
>   


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-02-07 14:47 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-02-01 16:50 [PATCH 1/2] drm/amdgpu: Unmap only clear the page table leaves Philip Yang
2024-02-01 16:50 ` [PATCH 2/2] drm/amdgpu: Improve huge page mapping update Philip Yang
2024-02-02 23:50 ` [PATCH 1/2] drm/amdgpu: Unmap only clear the page table leaves Felix Kuehling
2024-02-07 14:47 ` Christian König

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).