Skip to content

Commit

Permalink
drm/xe: Add GPUSVM device memory copy vfunc functions
Browse files Browse the repository at this point in the history
Add GPUSVM device memory copy vfunc functions and connect to migration
layer. Used for device memory migration.

v2:
 - Allow NULL device pages in xe_svm_copy
 - Use new drm_gpusvm_devmem_ops
v3:
 - Prefix defines with XE_ (Thomas)
 - Change copy chunk size to 8M
 - Add a bunch of comments to xe_svm_copy to clarify behavior (Thomas)
 - Better commit message (Thomas)
v5:
 - s/xe_mem_region/xe_vram_region (Rebase)

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250306012657.3505757-25-matthew.brost@intel.com
  • Loading branch information
Matthew Brost committed Mar 6, 2025
1 parent 11bbe0d commit c5b3eb5
Showing 1 changed file with 152 additions and 0 deletions.
152 changes: 152 additions & 0 deletions drivers/gpu/drm/xe/xe_svm.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
*/

#include "xe_gt_tlb_invalidation.h"
#include "xe_migrate.h"
#include "xe_pt.h"
#include "xe_svm.h"
#include "xe_vm.h"
Expand Down Expand Up @@ -315,6 +316,157 @@ static u64 xe_vram_region_page_to_dpa(struct xe_vram_region *vr,
return dpa;
}

enum xe_svm_copy_dir {
XE_SVM_COPY_TO_VRAM,
XE_SVM_COPY_TO_SRAM,
};

static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr,
unsigned long npages, const enum xe_svm_copy_dir dir)
{
struct xe_vram_region *vr = NULL;
struct xe_tile *tile;
struct dma_fence *fence = NULL;
unsigned long i;
#define XE_VRAM_ADDR_INVALID ~0x0ull
u64 vram_addr = XE_VRAM_ADDR_INVALID;
int err = 0, pos = 0;
bool sram = dir == XE_SVM_COPY_TO_SRAM;

/*
* This flow is complex: it locates physically contiguous device pages,
* derives the starting physical address, and performs a single GPU copy
* to for every 8M chunk in a DMA address array. Both device pages and
* DMA addresses may be sparsely populated. If either is NULL, a copy is
* triggered based on the current search state. The last GPU copy is
* waited on to ensure all copies are complete.
*/

for (i = 0; i < npages; ++i) {
struct page *spage = pages[i];
struct dma_fence *__fence;
u64 __vram_addr;
bool match = false, chunk, last;

#define XE_MIGRATE_CHUNK_SIZE SZ_8M
chunk = (i - pos) == (XE_MIGRATE_CHUNK_SIZE / PAGE_SIZE);
last = (i + 1) == npages;

/* No CPU page and no device pages queue'd to copy */
if (!dma_addr[i] && vram_addr == XE_VRAM_ADDR_INVALID)
continue;

if (!vr && spage) {
vr = page_to_vr(spage);
tile = vr_to_tile(vr);
}
XE_WARN_ON(spage && page_to_vr(spage) != vr);

/*
* CPU page and device page valid, capture physical address on
* first device page, check if physical contiguous on subsequent
* device pages.
*/
if (dma_addr[i] && spage) {
__vram_addr = xe_vram_region_page_to_dpa(vr, spage);
if (vram_addr == XE_VRAM_ADDR_INVALID) {
vram_addr = __vram_addr;
pos = i;
}

match = vram_addr + PAGE_SIZE * (i - pos) == __vram_addr;
}

/*
* Mismatched physical address, 8M copy chunk, or last page -
* trigger a copy.
*/
if (!match || chunk || last) {
/*
* Extra page for first copy if last page and matching
* physical address.
*/
int incr = (match && last) ? 1 : 0;

if (vram_addr != XE_VRAM_ADDR_INVALID) {
if (sram)
__fence = xe_migrate_from_vram(tile->migrate,
i - pos + incr,
vram_addr,
dma_addr + pos);
else
__fence = xe_migrate_to_vram(tile->migrate,
i - pos + incr,
dma_addr + pos,
vram_addr);
if (IS_ERR(__fence)) {
err = PTR_ERR(__fence);
goto err_out;
}

dma_fence_put(fence);
fence = __fence;
}

/* Setup physical address of next device page */
if (dma_addr[i] && spage) {
vram_addr = __vram_addr;
pos = i;
} else {
vram_addr = XE_VRAM_ADDR_INVALID;
}

/* Extra mismatched device page, copy it */
if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) {
if (sram)
__fence = xe_migrate_from_vram(tile->migrate, 1,
vram_addr,
dma_addr + pos);
else
__fence = xe_migrate_to_vram(tile->migrate, 1,
dma_addr + pos,
vram_addr);
if (IS_ERR(__fence)) {
err = PTR_ERR(__fence);
goto err_out;
}

dma_fence_put(fence);
fence = __fence;
}
}
}

err_out:
/* Wait for all copies to complete */
if (fence) {
dma_fence_wait(fence, false);
dma_fence_put(fence);
}

return err;
#undef XE_MIGRATE_CHUNK_SIZE
#undef XE_VRAM_ADDR_INVALID
}

static int xe_svm_copy_to_devmem(struct page **pages, dma_addr_t *dma_addr,
unsigned long npages)
{
return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_VRAM);
}

static int xe_svm_copy_to_ram(struct page **pages, dma_addr_t *dma_addr,
unsigned long npages)
{
return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_SRAM);
}

__maybe_unused
static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = {
.copy_to_devmem = xe_svm_copy_to_devmem,
.copy_to_ram = xe_svm_copy_to_ram,
};

static const struct drm_gpusvm_ops gpusvm_ops = {
.range_alloc = xe_svm_range_alloc,
.range_free = xe_svm_range_free,
Expand Down

0 comments on commit c5b3eb5

Please sign in to comment.