All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: Oak Zeng <oak.zeng@intel.com>
To: dri-devel@lists.freedesktop.org, intel-xe@lists.freedesktop.org
Cc: matthew.brost@intel.com, Thomas.Hellstrom@linux.intel.com,
	brian.welty@intel.com, himal.prasad.ghimiray@intel.com,
	krishnaiah.bommu@intel.com, niranjana.vishwanathapura@intel.com
Subject: [PATCH 13/23] drm/xe/svm: Handle CPU page fault
Date: Wed, 17 Jan 2024 17:12:13 -0500	[thread overview]
Message-ID: <20240117221223.18540-14-oak.zeng@intel.com> (raw)
In-Reply-To: <20240117221223.18540-1-oak.zeng@intel.com>

Under the picture of svm, CPU and GPU program share one same
virtual address space. The backing store of this virtual address
space can be either in system memory or device memory. Since GPU
device memory is remaped as DEVICE_PRIVATE, CPU can't access it.
Any CPU access to device memory causes a page fault. Implement
a page fault handler to migrate memory back to system memory and
map it to CPU page table so the CPU program can proceed.

Also unbind this page from GPU side, and free the original GPU
device page

Signed-off-by: Oak Zeng <oak.zeng@intel.com>
Co-developed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@intel.com>
Cc: Brian Welty <brian.welty@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h |  12 ++
 drivers/gpu/drm/xe/xe_svm.h          |   8 +-
 drivers/gpu/drm/xe/xe_svm_devmem.c   |  10 +-
 drivers/gpu/drm/xe/xe_svm_migrate.c  | 230 +++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_svm_range.c    |  27 ++++
 5 files changed, 280 insertions(+), 7 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_svm_migrate.c

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 6dba5b0ab481..c08e41cb3229 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -573,4 +573,16 @@ struct xe_file {
 	struct xe_drm_client *client;
 };
 
+static inline struct xe_tile *mem_region_to_tile(struct xe_mem_region *mr)
+{
+	return container_of(mr, struct xe_tile, mem.vram);
+}
+
+static inline u64 vram_pfn_to_dpa(struct xe_mem_region *mr, u64 pfn)
+{
+	u64 dpa;
+	u64 offset = (pfn << PAGE_SHIFT) - mr->hpa_base;
+	dpa = mr->dpa_base + offset;
+	return dpa;
+}
 #endif
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 8551df2b9780..6b93055934f8 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -12,8 +12,10 @@
 #include <linux/rbtree_types.h>
 #include <linux/interval_tree.h>
 #include <linux/hashtable.h>
+#include <linux/mm_types.h>
 #include <linux/types.h>
 #include <linux/hmm.h>
+#include <linux/mm.h>
 #include "xe_device_types.h"
 
 struct xe_vm;
@@ -66,16 +68,20 @@ struct xe_svm_range {
 	struct interval_tree_node inode;
 };
 
+vm_fault_t xe_devm_migrate_to_ram(struct vm_fault *vmf);
 void xe_destroy_svm(struct xe_svm *svm);
 struct xe_svm *xe_create_svm(struct xe_vm *vm);
 struct xe_svm *xe_lookup_svm_by_mm(struct mm_struct *mm);
 struct xe_svm_range *xe_svm_range_from_addr(struct xe_svm *svm,
 								unsigned long addr);
+bool xe_svm_range_belongs_to_vma(struct mm_struct *mm,
+								struct xe_svm_range *range,
+								struct vm_area_struct *vma);
+
 int xe_svm_build_sg(struct hmm_range *range, struct sg_table *st);
 int xe_svm_devm_add(struct xe_tile *tile, struct xe_mem_region *mem);
 void xe_svm_devm_remove(struct xe_device *xe, struct xe_mem_region *mem);
 
-
 int xe_devm_alloc_pages(struct xe_tile *tile,
 						unsigned long npages,
 						struct list_head *blocks,
diff --git a/drivers/gpu/drm/xe/xe_svm_devmem.c b/drivers/gpu/drm/xe/xe_svm_devmem.c
index 5cd54dde4a9d..01f8385ebb5b 100644
--- a/drivers/gpu/drm/xe/xe_svm_devmem.c
+++ b/drivers/gpu/drm/xe/xe_svm_devmem.c
@@ -11,13 +11,16 @@
 #include <linux/dma-fence.h>
 #include <linux/bitops.h>
 #include <linux/bitmap.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
 #include <drm/drm_buddy.h>
-
 #include "xe_device_types.h"
 #include "xe_trace.h"
 #include "xe_migrate.h"
 #include "xe_ttm_vram_mgr_types.h"
 #include "xe_assert.h"
+#include "xe_pt.h"
+#include "xe_svm.h"
 
 /**
  * struct xe_svm_block_meta - svm uses this data structure to manage each
@@ -137,11 +140,6 @@ void xe_devm_free_blocks(struct list_head *blocks)
 		free_block(block);
 }
 
-static vm_fault_t xe_devm_migrate_to_ram(struct vm_fault *vmf)
-{
-	return 0;
-}
-
 void xe_devm_page_free(struct page *page)
 {
 	struct drm_buddy_block *block =
diff --git a/drivers/gpu/drm/xe/xe_svm_migrate.c b/drivers/gpu/drm/xe/xe_svm_migrate.c
new file mode 100644
index 000000000000..3be26da33aa3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_svm_migrate.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/gfp.h>
+#include <linux/migrate.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-fence.h>
+#include <linux/bitops.h>
+#include <linux/bitmap.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <drm/drm_buddy.h>
+#include "xe_device_types.h"
+#include "xe_trace.h"
+#include "xe_migrate.h"
+#include "xe_ttm_vram_mgr_types.h"
+#include "xe_assert.h"
+#include "xe_pt.h"
+#include "xe_svm.h"
+
+
+/**
+ * alloc_host_page() - allocate one host page for the fault vma
+ *
+ * @dev: (GPU) device that will access the allocated page
+ * @vma: the fault vma that we need allocate page for
+ * @addr: the fault address. The allocated page is for this address
+ * @dma_addr: used to output the dma address of the allocated page.
+ * This dma address will be used for gpu to access this page. GPU
+ * access host page through a dma mapped address.
+ * @pfn: used to output the pfn of the allocated page.
+ *
+ * This function allocate one host page for the specified vma. It
+ * also does some prepare work for GPU to access this page, such
+ * as map this page to iommu (by calling dma_map_page).
+ *
+ * When this function returns, the page is locked.
+ *
+ * Return struct page pointer when success
+ * NULL otherwise
+ */
+static struct page *alloc_host_page(struct device *dev,
+							 struct vm_area_struct *vma,
+							 unsigned long addr,
+							 dma_addr_t *dma_addr,
+							 unsigned long *pfn)
+{
+	struct page *page;
+
+	page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+	if (unlikely(!page))
+		return NULL;
+
+	/**Lock page per hmm requirement, see hmm.rst*/
+	lock_page(page);
+	*dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(dev, *dma_addr))) {
+		unlock_page(page);
+		__free_page(page);
+		return NULL;
+	}
+
+	*pfn = migrate_pfn(page_to_pfn(page));
+	return page;
+}
+
+static void free_host_page(struct page *page)
+{
+	unlock_page(page);
+	put_page(page);
+}
+
+static inline struct xe_mem_region *page_to_mem_region(struct page *page)
+{
+	return container_of(page->pgmap, struct xe_mem_region, pagemap);
+}
+
+/**
+ * migrate_page_vram_to_ram() - migrate one page from vram to ram
+ *
+ * @vma: The vma that the page is mapped to
+ * @addr: The virtual address that the page is mapped to
+ * @src_pfn: src page's page frame number
+ * @dst_pfn: used to return dstination page (in system ram)'s pfn
+ *
+ * Allocate one page in system ram and copy memory from device memory
+ * to system ram.
+ *
+ * Return: 0 if this page is already in sram (no need to migrate)
+ * 1: successfully migrated this page from vram to sram.
+ * error code otherwise
+ */
+static int migrate_page_vram_to_ram(struct vm_area_struct *vma, unsigned long addr,
+						unsigned long src_pfn, unsigned long *dst_pfn)
+{
+	struct xe_mem_region *mr;
+	struct xe_tile *tile;
+	struct xe_device *xe;
+	struct device *dev;
+	dma_addr_t dma_addr = 0;
+	struct dma_fence *fence;
+	struct page *host_page;
+	struct page *src_page;
+	u64 src_dpa;
+
+	src_page = migrate_pfn_to_page(src_pfn);
+	if (unlikely(!src_page || !(src_pfn & MIGRATE_PFN_MIGRATE)))
+		return 0;
+
+	mr = page_to_mem_region(src_page);
+	tile = mem_region_to_tile(mr);
+	xe = tile_to_xe(tile);
+	dev = xe->drm.dev;
+
+	src_dpa = vram_pfn_to_dpa(mr, src_pfn);
+	host_page = alloc_host_page(dev, vma, addr, &dma_addr, dst_pfn);
+	if (!host_page)
+		return -ENOMEM;
+
+	fence = xe_migrate_svm(tile->migrate, src_dpa, true,
+						dma_addr, false, PAGE_SIZE);
+	if (IS_ERR(fence)) {
+		dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
+		free_host_page(host_page);
+		return PTR_ERR(fence);
+	}
+
+	dma_fence_wait(fence, false);
+	dma_fence_put(fence);
+	dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
+	return 1;
+}
+
+/**
+ * xe_devmem_migrate_to_ram() - Migrate memory back to sram on CPU page fault
+ *
+ * @vmf: cpu vm fault structure, contains fault information such as vma etc.
+ *
+ * Note, this is in CPU's vm fault handler, caller holds the mmap read lock.
+ * FIXME: relook the lock design here. Is there any deadlock?
+ *
+ * This function migrate one svm range which contains the fault address to sram.
+ * We try to maintain a 1:1 mapping b/t the vma and svm_range (i.e., create one
+ * svm range for one vma initially and try not to split it). So this scheme end
+ * up migrate at the vma granularity. This might not be the best performant scheme
+ * when GPU is in the picture.
+ *
+ * This can be tunned with a migration granularity for  performance, for example,
+ * migration 2M for each CPU page fault, or let user specify how much to migrate.
+ * But this is more complicated as this scheme requires vma and svm_range splitting.
+ *
+ * This function should also update GPU page table, so the fault virtual address
+ * points to the same sram location from GPU side. This is TBD.
+ *
+ * Return:
+ * 0 on success
+ * VM_FAULT_SIGBUS: failed to migrate page to system memory, application
+ * will be signaled a SIGBUG
+ */
+vm_fault_t xe_devm_migrate_to_ram(struct vm_fault *vmf)
+{
+	struct xe_mem_region *mr = page_to_mem_region(vmf->page);
+	struct xe_tile *tile = mem_region_to_tile(mr);
+	struct xe_device *xe = tile_to_xe(tile);
+	struct vm_area_struct *vma = vmf->vma;
+	struct mm_struct *mm = vma->vm_mm;
+	struct xe_svm *svm = xe_lookup_svm_by_mm(mm);
+	struct xe_svm_range *range = xe_svm_range_from_addr(svm, vmf->address);
+	struct xe_vm *vm = svm->vm;
+	u64 npages = (range->end - range->start) >> PAGE_SHIFT;
+	unsigned long addr = range->start;
+	vm_fault_t ret = 0;
+	void *buf;
+	int i;
+
+	struct migrate_vma migrate_vma = {
+		.vma		= vmf->vma,
+		.start		= range->start,
+		.end		= range->end,
+		.pgmap_owner	= xe->drm.dev,
+		.flags		= MIGRATE_VMA_SELECT_DEVICE_PRIVATE,
+		.fault_page = vmf->page,
+	};
+
+	xe_assert(xe, IS_ALIGNED(vmf->address, PAGE_SIZE));
+	xe_assert(xe, IS_ALIGNED(range->start, PAGE_SIZE));
+	xe_assert(xe, IS_ALIGNED(range->end, PAGE_SIZE));
+	/**FIXME: in case of vma split, svm range might not belongs to one vma*/
+	xe_assert(xe, xe_svm_range_belongs_to_vma(mm, range, vma));
+
+	buf = kvcalloc(npages, 2* sizeof(*migrate_vma.src), GFP_KERNEL);
+	migrate_vma.src = buf;
+	migrate_vma.dst = buf + npages;
+	if (migrate_vma_setup(&migrate_vma) < 0) {
+		ret = VM_FAULT_SIGBUS;
+		goto free_buf;
+	}
+
+	if (!migrate_vma.cpages)
+		goto free_buf;
+
+	for (i = 0; i < npages; i++) {
+		ret = migrate_page_vram_to_ram(vma, addr, migrate_vma.src[i],
+							migrate_vma.dst + i);
+		if (ret < 0) {
+			ret = VM_FAULT_SIGBUS;
+			break;
+		}
+
+		/** Migration has been successful, unbind src page from gpu,
+		 *  and free source page
+		 */
+		if (ret == 1) {
+			struct page *src_page = migrate_pfn_to_page(migrate_vma.src[i]);
+
+			xe_invalidate_svm_range(vm, addr, PAGE_SIZE);
+			xe_devm_page_free(src_page);
+		}
+
+		addr += PAGE_SIZE;
+	}
+
+	migrate_vma_pages(&migrate_vma);
+	migrate_vma_finalize(&migrate_vma);
+free_buf:
+	kvfree(buf);
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_svm_range.c b/drivers/gpu/drm/xe/xe_svm_range.c
index d8251d38f65e..b32c32f60315 100644
--- a/drivers/gpu/drm/xe/xe_svm_range.c
+++ b/drivers/gpu/drm/xe/xe_svm_range.c
@@ -5,7 +5,9 @@
 
 #include <linux/interval_tree.h>
 #include <linux/container_of.h>
+#include <linux/mm_types.h>
 #include <linux/mutex.h>
+#include <linux/mm.h>
 #include "xe_svm.h"
 
 /**
@@ -30,3 +32,28 @@ struct xe_svm_range *xe_svm_range_from_addr(struct xe_svm *svm,
 
 	return container_of(node, struct xe_svm_range, inode);
 }
+
+/**
+ * xe_svm_range_belongs_to_vma() - determine a virtual address range
+ * belongs to a vma or not
+ *
+ * @mm: the mm of the virtual address range
+ * @range: the svm virtual address range
+ * @vma: the vma to determine the range
+ *
+ * Returns true if range belongs to vma
+ * false otherwise
+ */
+bool xe_svm_range_belongs_to_vma(struct mm_struct *mm,
+								struct xe_svm_range *range,
+								struct vm_area_struct *vma)
+{
+	struct vm_area_struct *vma1, *vma2;
+	unsigned long start = range->start;
+	unsigned long end = range->end;
+
+	vma1  = find_vma_intersection(mm, start, start + 4);
+	vma2  = find_vma_intersection(mm, end - 4, end);
+
+	return (vma1 == vma) && (vma2 == vma);
+}
-- 
2.26.3


WARNING: multiple messages have this Message-ID (diff)
From: Oak Zeng <oak.zeng@intel.com>
To: dri-devel@lists.freedesktop.org, intel-xe@lists.freedesktop.org
Subject: [PATCH 13/23] drm/xe/svm: Handle CPU page fault
Date: Wed, 17 Jan 2024 17:12:13 -0500	[thread overview]
Message-ID: <20240117221223.18540-14-oak.zeng@intel.com> (raw)
In-Reply-To: <20240117221223.18540-1-oak.zeng@intel.com>

Under the picture of svm, CPU and GPU program share one same
virtual address space. The backing store of this virtual address
space can be either in system memory or device memory. Since GPU
device memory is remaped as DEVICE_PRIVATE, CPU can't access it.
Any CPU access to device memory causes a page fault. Implement
a page fault handler to migrate memory back to system memory and
map it to CPU page table so the CPU program can proceed.

Also unbind this page from GPU side, and free the original GPU
device page

Signed-off-by: Oak Zeng <oak.zeng@intel.com>
Co-developed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@intel.com>
Cc: Brian Welty <brian.welty@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h |  12 ++
 drivers/gpu/drm/xe/xe_svm.h          |   8 +-
 drivers/gpu/drm/xe/xe_svm_devmem.c   |  10 +-
 drivers/gpu/drm/xe/xe_svm_migrate.c  | 230 +++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_svm_range.c    |  27 ++++
 5 files changed, 280 insertions(+), 7 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_svm_migrate.c

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 6dba5b0ab481..c08e41cb3229 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -573,4 +573,16 @@ struct xe_file {
 	struct xe_drm_client *client;
 };
 
+static inline struct xe_tile *mem_region_to_tile(struct xe_mem_region *mr)
+{
+	return container_of(mr, struct xe_tile, mem.vram);
+}
+
+static inline u64 vram_pfn_to_dpa(struct xe_mem_region *mr, u64 pfn)
+{
+	u64 dpa;
+	u64 offset = (pfn << PAGE_SHIFT) - mr->hpa_base;
+	dpa = mr->dpa_base + offset;
+	return dpa;
+}
 #endif
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 8551df2b9780..6b93055934f8 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -12,8 +12,10 @@
 #include <linux/rbtree_types.h>
 #include <linux/interval_tree.h>
 #include <linux/hashtable.h>
+#include <linux/mm_types.h>
 #include <linux/types.h>
 #include <linux/hmm.h>
+#include <linux/mm.h>
 #include "xe_device_types.h"
 
 struct xe_vm;
@@ -66,16 +68,20 @@ struct xe_svm_range {
 	struct interval_tree_node inode;
 };
 
+vm_fault_t xe_devm_migrate_to_ram(struct vm_fault *vmf);
 void xe_destroy_svm(struct xe_svm *svm);
 struct xe_svm *xe_create_svm(struct xe_vm *vm);
 struct xe_svm *xe_lookup_svm_by_mm(struct mm_struct *mm);
 struct xe_svm_range *xe_svm_range_from_addr(struct xe_svm *svm,
 								unsigned long addr);
+bool xe_svm_range_belongs_to_vma(struct mm_struct *mm,
+								struct xe_svm_range *range,
+								struct vm_area_struct *vma);
+
 int xe_svm_build_sg(struct hmm_range *range, struct sg_table *st);
 int xe_svm_devm_add(struct xe_tile *tile, struct xe_mem_region *mem);
 void xe_svm_devm_remove(struct xe_device *xe, struct xe_mem_region *mem);
 
-
 int xe_devm_alloc_pages(struct xe_tile *tile,
 						unsigned long npages,
 						struct list_head *blocks,
diff --git a/drivers/gpu/drm/xe/xe_svm_devmem.c b/drivers/gpu/drm/xe/xe_svm_devmem.c
index 5cd54dde4a9d..01f8385ebb5b 100644
--- a/drivers/gpu/drm/xe/xe_svm_devmem.c
+++ b/drivers/gpu/drm/xe/xe_svm_devmem.c
@@ -11,13 +11,16 @@
 #include <linux/dma-fence.h>
 #include <linux/bitops.h>
 #include <linux/bitmap.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
 #include <drm/drm_buddy.h>
-
 #include "xe_device_types.h"
 #include "xe_trace.h"
 #include "xe_migrate.h"
 #include "xe_ttm_vram_mgr_types.h"
 #include "xe_assert.h"
+#include "xe_pt.h"
+#include "xe_svm.h"
 
 /**
  * struct xe_svm_block_meta - svm uses this data structure to manage each
@@ -137,11 +140,6 @@ void xe_devm_free_blocks(struct list_head *blocks)
 		free_block(block);
 }
 
-static vm_fault_t xe_devm_migrate_to_ram(struct vm_fault *vmf)
-{
-	return 0;
-}
-
 void xe_devm_page_free(struct page *page)
 {
 	struct drm_buddy_block *block =
diff --git a/drivers/gpu/drm/xe/xe_svm_migrate.c b/drivers/gpu/drm/xe/xe_svm_migrate.c
new file mode 100644
index 000000000000..3be26da33aa3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_svm_migrate.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/gfp.h>
+#include <linux/migrate.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-fence.h>
+#include <linux/bitops.h>
+#include <linux/bitmap.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <drm/drm_buddy.h>
+#include "xe_device_types.h"
+#include "xe_trace.h"
+#include "xe_migrate.h"
+#include "xe_ttm_vram_mgr_types.h"
+#include "xe_assert.h"
+#include "xe_pt.h"
+#include "xe_svm.h"
+
+
+/**
+ * alloc_host_page() - allocate one host page for the fault vma
+ *
+ * @dev: (GPU) device that will access the allocated page
+ * @vma: the fault vma that we need allocate page for
+ * @addr: the fault address. The allocated page is for this address
+ * @dma_addr: used to output the dma address of the allocated page.
+ * This dma address will be used for gpu to access this page. GPU
+ * access host page through a dma mapped address.
+ * @pfn: used to output the pfn of the allocated page.
+ *
+ * This function allocate one host page for the specified vma. It
+ * also does some prepare work for GPU to access this page, such
+ * as map this page to iommu (by calling dma_map_page).
+ *
+ * When this function returns, the page is locked.
+ *
+ * Return struct page pointer when success
+ * NULL otherwise
+ */
+static struct page *alloc_host_page(struct device *dev,
+							 struct vm_area_struct *vma,
+							 unsigned long addr,
+							 dma_addr_t *dma_addr,
+							 unsigned long *pfn)
+{
+	struct page *page;
+
+	page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+	if (unlikely(!page))
+		return NULL;
+
+	/**Lock page per hmm requirement, see hmm.rst*/
+	lock_page(page);
+	*dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(dev, *dma_addr))) {
+		unlock_page(page);
+		__free_page(page);
+		return NULL;
+	}
+
+	*pfn = migrate_pfn(page_to_pfn(page));
+	return page;
+}
+
+static void free_host_page(struct page *page)
+{
+	unlock_page(page);
+	put_page(page);
+}
+
+static inline struct xe_mem_region *page_to_mem_region(struct page *page)
+{
+	return container_of(page->pgmap, struct xe_mem_region, pagemap);
+}
+
+/**
+ * migrate_page_vram_to_ram() - migrate one page from vram to ram
+ *
+ * @vma: The vma that the page is mapped to
+ * @addr: The virtual address that the page is mapped to
+ * @src_pfn: src page's page frame number
+ * @dst_pfn: used to return dstination page (in system ram)'s pfn
+ *
+ * Allocate one page in system ram and copy memory from device memory
+ * to system ram.
+ *
+ * Return: 0 if this page is already in sram (no need to migrate)
+ * 1: successfully migrated this page from vram to sram.
+ * error code otherwise
+ */
+static int migrate_page_vram_to_ram(struct vm_area_struct *vma, unsigned long addr,
+						unsigned long src_pfn, unsigned long *dst_pfn)
+{
+	struct xe_mem_region *mr;
+	struct xe_tile *tile;
+	struct xe_device *xe;
+	struct device *dev;
+	dma_addr_t dma_addr = 0;
+	struct dma_fence *fence;
+	struct page *host_page;
+	struct page *src_page;
+	u64 src_dpa;
+
+	src_page = migrate_pfn_to_page(src_pfn);
+	if (unlikely(!src_page || !(src_pfn & MIGRATE_PFN_MIGRATE)))
+		return 0;
+
+	mr = page_to_mem_region(src_page);
+	tile = mem_region_to_tile(mr);
+	xe = tile_to_xe(tile);
+	dev = xe->drm.dev;
+
+	src_dpa = vram_pfn_to_dpa(mr, src_pfn);
+	host_page = alloc_host_page(dev, vma, addr, &dma_addr, dst_pfn);
+	if (!host_page)
+		return -ENOMEM;
+
+	fence = xe_migrate_svm(tile->migrate, src_dpa, true,
+						dma_addr, false, PAGE_SIZE);
+	if (IS_ERR(fence)) {
+		dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
+		free_host_page(host_page);
+		return PTR_ERR(fence);
+	}
+
+	dma_fence_wait(fence, false);
+	dma_fence_put(fence);
+	dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
+	return 1;
+}
+
+/**
+ * xe_devmem_migrate_to_ram() - Migrate memory back to sram on CPU page fault
+ *
+ * @vmf: cpu vm fault structure, contains fault information such as vma etc.
+ *
+ * Note, this is in CPU's vm fault handler, caller holds the mmap read lock.
+ * FIXME: relook the lock design here. Is there any deadlock?
+ *
+ * This function migrate one svm range which contains the fault address to sram.
+ * We try to maintain a 1:1 mapping b/t the vma and svm_range (i.e., create one
+ * svm range for one vma initially and try not to split it). So this scheme end
+ * up migrate at the vma granularity. This might not be the best performant scheme
+ * when GPU is in the picture.
+ *
+ * This can be tunned with a migration granularity for  performance, for example,
+ * migration 2M for each CPU page fault, or let user specify how much to migrate.
+ * But this is more complicated as this scheme requires vma and svm_range splitting.
+ *
+ * This function should also update GPU page table, so the fault virtual address
+ * points to the same sram location from GPU side. This is TBD.
+ *
+ * Return:
+ * 0 on success
+ * VM_FAULT_SIGBUS: failed to migrate page to system memory, application
+ * will be signaled a SIGBUG
+ */
+vm_fault_t xe_devm_migrate_to_ram(struct vm_fault *vmf)
+{
+	struct xe_mem_region *mr = page_to_mem_region(vmf->page);
+	struct xe_tile *tile = mem_region_to_tile(mr);
+	struct xe_device *xe = tile_to_xe(tile);
+	struct vm_area_struct *vma = vmf->vma;
+	struct mm_struct *mm = vma->vm_mm;
+	struct xe_svm *svm = xe_lookup_svm_by_mm(mm);
+	struct xe_svm_range *range = xe_svm_range_from_addr(svm, vmf->address);
+	struct xe_vm *vm = svm->vm;
+	u64 npages = (range->end - range->start) >> PAGE_SHIFT;
+	unsigned long addr = range->start;
+	vm_fault_t ret = 0;
+	void *buf;
+	int i;
+
+	struct migrate_vma migrate_vma = {
+		.vma		= vmf->vma,
+		.start		= range->start,
+		.end		= range->end,
+		.pgmap_owner	= xe->drm.dev,
+		.flags		= MIGRATE_VMA_SELECT_DEVICE_PRIVATE,
+		.fault_page = vmf->page,
+	};
+
+	xe_assert(xe, IS_ALIGNED(vmf->address, PAGE_SIZE));
+	xe_assert(xe, IS_ALIGNED(range->start, PAGE_SIZE));
+	xe_assert(xe, IS_ALIGNED(range->end, PAGE_SIZE));
+	/**FIXME: in case of vma split, svm range might not belongs to one vma*/
+	xe_assert(xe, xe_svm_range_belongs_to_vma(mm, range, vma));
+
+	buf = kvcalloc(npages, 2* sizeof(*migrate_vma.src), GFP_KERNEL);
+	migrate_vma.src = buf;
+	migrate_vma.dst = buf + npages;
+	if (migrate_vma_setup(&migrate_vma) < 0) {
+		ret = VM_FAULT_SIGBUS;
+		goto free_buf;
+	}
+
+	if (!migrate_vma.cpages)
+		goto free_buf;
+
+	for (i = 0; i < npages; i++) {
+		ret = migrate_page_vram_to_ram(vma, addr, migrate_vma.src[i],
+							migrate_vma.dst + i);
+		if (ret < 0) {
+			ret = VM_FAULT_SIGBUS;
+			break;
+		}
+
+		/** Migration has been successful, unbind src page from gpu,
+		 *  and free source page
+		 */
+		if (ret == 1) {
+			struct page *src_page = migrate_pfn_to_page(migrate_vma.src[i]);
+
+			xe_invalidate_svm_range(vm, addr, PAGE_SIZE);
+			xe_devm_page_free(src_page);
+		}
+
+		addr += PAGE_SIZE;
+	}
+
+	migrate_vma_pages(&migrate_vma);
+	migrate_vma_finalize(&migrate_vma);
+free_buf:
+	kvfree(buf);
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_svm_range.c b/drivers/gpu/drm/xe/xe_svm_range.c
index d8251d38f65e..b32c32f60315 100644
--- a/drivers/gpu/drm/xe/xe_svm_range.c
+++ b/drivers/gpu/drm/xe/xe_svm_range.c
@@ -5,7 +5,9 @@
 
 #include <linux/interval_tree.h>
 #include <linux/container_of.h>
+#include <linux/mm_types.h>
 #include <linux/mutex.h>
+#include <linux/mm.h>
 #include "xe_svm.h"
 
 /**
@@ -30,3 +32,28 @@ struct xe_svm_range *xe_svm_range_from_addr(struct xe_svm *svm,
 
 	return container_of(node, struct xe_svm_range, inode);
 }
+
+/**
+ * xe_svm_range_belongs_to_vma() - determine a virtual address range
+ * belongs to a vma or not
+ *
+ * @mm: the mm of the virtual address range
+ * @range: the svm virtual address range
+ * @vma: the vma to determine the range
+ *
+ * Returns true if range belongs to vma
+ * false otherwise
+ */
+bool xe_svm_range_belongs_to_vma(struct mm_struct *mm,
+								struct xe_svm_range *range,
+								struct vm_area_struct *vma)
+{
+	struct vm_area_struct *vma1, *vma2;
+	unsigned long start = range->start;
+	unsigned long end = range->end;
+
+	vma1  = find_vma_intersection(mm, start, start + 4);
+	vma2  = find_vma_intersection(mm, end - 4, end);
+
+	return (vma1 == vma) && (vma2 == vma);
+}
-- 
2.26.3


  parent reply	other threads:[~2024-01-17 22:02 UTC|newest]

Thread overview: 198+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-17 22:12 [PATCH 00/23] XeKmd basic SVM support Oak Zeng
2024-01-17 22:12 ` Oak Zeng
2024-01-17 22:12 ` [PATCH 01/23] drm/xe/svm: Add SVM document Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-01-17 22:12 ` [PATCH 02/23] drm/xe/svm: Add svm key data structures Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-01-17 22:12 ` [PATCH 03/23] drm/xe/svm: create xe svm during vm creation Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-01-17 22:12 ` [PATCH 04/23] drm/xe/svm: Trace svm creation Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-01-17 22:12 ` [PATCH 05/23] drm/xe/svm: add helper to retrieve svm range from address Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-01-17 22:12 ` [PATCH 06/23] drm/xe/svm: Introduce a helper to build sg table from hmm range Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-04-05  0:39   ` Jason Gunthorpe
2024-04-05  3:33     ` Zeng, Oak
2024-04-05 12:37       ` Jason Gunthorpe
2024-04-05 16:42         ` Zeng, Oak
2024-04-05 18:02           ` Jason Gunthorpe
2024-04-09 16:45             ` Zeng, Oak
2024-04-09 17:24               ` Jason Gunthorpe
2024-04-23 21:17                 ` Zeng, Oak
2024-04-24  2:31                   ` Matthew Brost
2024-04-24 13:57                     ` Jason Gunthorpe
2024-04-24 16:35                       ` Matthew Brost
2024-04-24 16:44                         ` Jason Gunthorpe
2024-04-24 16:56                           ` Matthew Brost
2024-04-24 17:48                             ` Jason Gunthorpe
2024-04-24 13:48                   ` Jason Gunthorpe
2024-04-24 23:59                     ` Zeng, Oak
2024-04-25  1:05                       ` Jason Gunthorpe
2024-04-26  9:55                         ` Thomas Hellström
2024-04-26 12:00                           ` Jason Gunthorpe
2024-04-26 14:49                             ` Thomas Hellström
2024-04-26 16:35                               ` Jason Gunthorpe
2024-04-29  8:25                                 ` Thomas Hellström
2024-04-30 17:30                                   ` Jason Gunthorpe
2024-04-30 18:57                                     ` Daniel Vetter
2024-05-01  0:09                                       ` Jason Gunthorpe
2024-05-02  8:04                                         ` Daniel Vetter
2024-05-02  9:11                                           ` Thomas Hellström
2024-05-02 12:46                                             ` Jason Gunthorpe
2024-05-02 15:01                                               ` Thomas Hellström
2024-05-02 19:25                                                 ` Zeng, Oak
2024-05-03 13:37                                                   ` Jason Gunthorpe
2024-05-03 14:43                                                     ` Zeng, Oak
2024-05-03 16:28                                                       ` Jason Gunthorpe
2024-05-03 20:29                                                         ` Zeng, Oak
2024-05-04  1:03                                                           ` Dave Airlie
2024-05-06 13:04                                                             ` Daniel Vetter
2024-05-06 23:50                                                               ` Matthew Brost
2024-05-07 11:56                                                                 ` Jason Gunthorpe
2024-05-06 13:33                                                           ` Jason Gunthorpe
2024-04-09 17:33               ` Matthew Brost
2024-01-17 22:12 ` [PATCH 07/23] drm/xe/svm: Add helper for binding hmm range to gpu Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-01-17 22:12 ` [PATCH 08/23] drm/xe/svm: Add helper to invalidate svm range from GPU Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-01-17 22:12 ` [PATCH 09/23] drm/xe/svm: Remap and provide memmap backing for GPU vram Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-01-17 22:12 ` [PATCH 10/23] drm/xe/svm: Introduce svm migration function Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-01-17 22:12 ` [PATCH 11/23] drm/xe/svm: implement functions to allocate and free device memory Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-01-17 22:12 ` [PATCH 12/23] drm/xe/svm: Trace buddy block allocation and free Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-01-17 22:12 ` Oak Zeng [this message]
2024-01-17 22:12   ` [PATCH 13/23] drm/xe/svm: Handle CPU page fault Oak Zeng
2024-01-17 22:12 ` [PATCH 14/23] drm/xe/svm: trace svm range migration Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-01-17 22:12 ` [PATCH 15/23] drm/xe/svm: Implement functions to register and unregister mmu notifier Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-01-17 22:12 ` [PATCH 16/23] drm/xe/svm: Implement the mmu notifier range invalidate callback Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-01-17 22:12 ` [PATCH 17/23] drm/xe/svm: clean up svm range during process exit Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-01-17 22:12 ` [PATCH 18/23] drm/xe/svm: Move a few structures to xe_gt.h Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-01-17 22:12 ` [PATCH 19/23] drm/xe/svm: migrate svm range to vram Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-01-17 22:12 ` [PATCH 20/23] drm/xe/svm: Populate svm range Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-01-17 22:12 ` [PATCH 21/23] drm/xe/svm: GPU page fault support Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-01-23  2:06   ` Welty, Brian
2024-01-23  2:06     ` Welty, Brian
2024-01-23  3:09     ` Zeng, Oak
2024-01-23  3:09       ` Zeng, Oak
2024-01-23  3:21       ` Making drm_gpuvm work across gpu devices Zeng, Oak
2024-01-23  3:21         ` Zeng, Oak
2024-01-23 11:13         ` Christian König
2024-01-23 11:13           ` Christian König
2024-01-23 19:37           ` Zeng, Oak
2024-01-23 19:37             ` Zeng, Oak
2024-01-23 20:17             ` Felix Kuehling
2024-01-23 20:17               ` Felix Kuehling
2024-01-25  1:39               ` Zeng, Oak
2024-01-25  1:39                 ` Zeng, Oak
2024-01-23 23:56             ` Danilo Krummrich
2024-01-23 23:56               ` Danilo Krummrich
2024-01-24  3:57               ` Zeng, Oak
2024-01-24  3:57                 ` Zeng, Oak
2024-01-24  4:14                 ` Zeng, Oak
2024-01-24  4:14                   ` Zeng, Oak
2024-01-24  6:48                   ` Christian König
2024-01-24  6:48                     ` Christian König
2024-01-25 22:13                 ` Danilo Krummrich
2024-01-25 22:13                   ` Danilo Krummrich
2024-01-24  8:33             ` Christian König
2024-01-24  8:33               ` Christian König
2024-01-25  1:17               ` Zeng, Oak
2024-01-25  1:17                 ` Zeng, Oak
2024-01-25  1:25                 ` David Airlie
2024-01-25  1:25                   ` David Airlie
2024-01-25  5:25                   ` Zeng, Oak
2024-01-25  5:25                     ` Zeng, Oak
2024-01-26 10:09                     ` Christian König
2024-01-26 10:09                       ` Christian König
2024-01-26 20:13                       ` Zeng, Oak
2024-01-26 20:13                         ` Zeng, Oak
2024-01-29 10:10                         ` Christian König
2024-01-29 10:10                           ` Christian König
2024-01-29 20:09                           ` Zeng, Oak
2024-01-29 20:09                             ` Zeng, Oak
2024-01-25 11:00                 ` 回复:Making " 周春明(日月)
2024-01-25 11:00                   ` 周春明(日月)
2024-01-25 17:00                   ` Zeng, Oak
2024-01-25 17:00                     ` Zeng, Oak
2024-01-25 17:15                 ` Making " Felix Kuehling
2024-01-25 17:15                   ` Felix Kuehling
2024-01-25 18:37                   ` Zeng, Oak
2024-01-25 18:37                     ` Zeng, Oak
2024-01-26 13:23                     ` Christian König
2024-01-26 13:23                       ` Christian König
2024-01-25 16:42               ` Zeng, Oak
2024-01-25 16:42                 ` Zeng, Oak
2024-01-25 18:32               ` Daniel Vetter
2024-01-25 18:32                 ` Daniel Vetter
2024-01-25 21:02                 ` Zeng, Oak
2024-01-25 21:02                   ` Zeng, Oak
2024-01-26  8:21                 ` Thomas Hellström
2024-01-26  8:21                   ` Thomas Hellström
2024-01-26 12:52                   ` Christian König
2024-01-26 12:52                     ` Christian König
2024-01-27  2:21                     ` Zeng, Oak
2024-01-27  2:21                       ` Zeng, Oak
2024-01-29 10:19                       ` Christian König
2024-01-29 10:19                         ` Christian König
2024-01-30  0:21                         ` Zeng, Oak
2024-01-30  0:21                           ` Zeng, Oak
2024-01-30  8:39                           ` Christian König
2024-01-30  8:39                             ` Christian König
2024-01-30 22:29                             ` Zeng, Oak
2024-01-30 22:29                               ` Zeng, Oak
2024-01-30 23:12                               ` David Airlie
2024-01-30 23:12                                 ` David Airlie
2024-01-31  9:15                                 ` Daniel Vetter
2024-01-31  9:15                                   ` Daniel Vetter
2024-01-31 20:17                                   ` Zeng, Oak
2024-01-31 20:17                                     ` Zeng, Oak
2024-01-31 20:59                                     ` Zeng, Oak
2024-01-31 20:59                                       ` Zeng, Oak
2024-02-01  8:52                                     ` Christian König
2024-02-01  8:52                                       ` Christian König
2024-02-29 18:22                                       ` Zeng, Oak
2024-03-08  4:43                                         ` Zeng, Oak
2024-03-08 10:07                                           ` Christian König
2024-01-30  8:43                           ` Thomas Hellström
2024-01-30  8:43                             ` Thomas Hellström
2024-01-29 15:03                 ` Felix Kuehling
2024-01-29 15:03                   ` Felix Kuehling
2024-01-29 15:33                   ` Christian König
2024-01-29 15:33                     ` Christian König
2024-01-29 16:24                     ` Felix Kuehling
2024-01-29 16:24                       ` Felix Kuehling
2024-01-29 16:28                       ` Christian König
2024-01-29 16:28                         ` Christian König
2024-01-29 17:52                         ` Felix Kuehling
2024-01-29 17:52                           ` Felix Kuehling
2024-01-29 19:03                           ` Christian König
2024-01-29 19:03                             ` Christian König
2024-01-29 20:24                             ` Felix Kuehling
2024-01-29 20:24                               ` Felix Kuehling
2024-02-23 20:12               ` Zeng, Oak
2024-02-27  6:54                 ` Christian König
2024-02-27 15:58                   ` Zeng, Oak
2024-02-28 19:51                     ` Zeng, Oak
2024-02-29  9:41                       ` Christian König
2024-02-29 16:05                         ` Zeng, Oak
2024-02-29 17:12                         ` Thomas Hellström
2024-03-01  7:01                           ` Christian König
2024-01-17 22:12 ` [PATCH 22/23] drm/xe/svm: Add DRM_XE_SVM kernel config entry Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-01-17 22:12 ` [PATCH 23/23] drm/xe/svm: Add svm memory hints interface Oak Zeng
2024-01-17 22:12   ` Oak Zeng
2024-01-18  2:45 ` ✓ CI.Patch_applied: success for XeKmd basic SVM support Patchwork
2024-01-18  2:46 ` ✗ CI.checkpatch: warning " Patchwork
2024-01-18  2:46 ` ✗ CI.KUnit: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240117221223.18540-14-oak.zeng@intel.com \
    --to=oak.zeng@intel.com \
    --cc=Thomas.Hellstrom@linux.intel.com \
    --cc=brian.welty@intel.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=himal.prasad.ghimiray@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=krishnaiah.bommu@intel.com \
    --cc=matthew.brost@intel.com \
    --cc=niranjana.vishwanathapura@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.