Skip to content

Commit

Permalink
drm/xe: Implement VM snapshot support for BO's and userptr
Browse files Browse the repository at this point in the history
Since we cannot immediately capture the BO's and userptr, perform it in
2 stages. The immediate stage takes a reference to each BO and userptr,
while a delayed worker captures the contents and then frees the
reference.

This is required because in signaling context, no locks can be taken, no
memory can be allocated, and no waits on userspace can be performed.

With the delayed worker, all of this can be performed very easily,
without having to resort to hacks.

Changes since v1:
- Fix crash on NULL captured vm.
- Use ascii85_encode to capture BO contents and save some space.
- Add length to coredump output for each captured area.
Changes since v2:
- Dump each mapping on their own line, to simplify tooling.
- Fix null pointer deref in xe_vm_snapshot_free.
Changes since v3:
- Don't add uninitialized value to snap->ofs. (Souza)
- Use kernel types for u32 and u64.
- Move snap_mutex destruction to final vm destruction. (Souza)
Changes since v4:
- Remove extra memset. (Souza)

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240221133024.898315-6-maarten.lankhorst@linux.intel.com
  • Loading branch information
Maarten Lankhorst committed Feb 21, 2024
1 parent 0cd9904 commit 0eb2a18
Show file tree
Hide file tree
Showing 4 changed files with 211 additions and 4 deletions.
32 changes: 30 additions & 2 deletions drivers/gpu/drm/xe/xe_devcoredump.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "xe_guc_submit.h"
#include "xe_hw_engine.h"
#include "xe_sched_job.h"
#include "xe_vm.h"

/**
* DOC: Xe device coredump
Expand Down Expand Up @@ -59,12 +60,22 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q)
return &q->gt->uc.guc;
}

static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
{
struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);

xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
if (ss->vm)
xe_vm_snapshot_capture_delayed(ss->vm);
xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
}

static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
size_t count, void *data, size_t datalen)
{
struct xe_devcoredump *coredump = data;
struct xe_device *xe = coredump_to_xe(coredump);
struct xe_devcoredump_snapshot *ss;
struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
struct drm_printer p;
struct drm_print_iterator iter;
struct timespec64 ts;
Expand All @@ -74,12 +85,14 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
if (!data || !coredump_to_xe(coredump))
return -ENODEV;

/* Ensure delayed work is captured before continuing */
flush_work(&ss->work);

iter.data = buffer;
iter.offset = 0;
iter.start = offset;
iter.remain = count;

ss = &coredump->snapshot;
p = drm_coredump_printer(&iter);

drm_printf(&p, "**** Xe Device Coredump ****\n");
Expand All @@ -104,6 +117,10 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
if (coredump->snapshot.hwe[i])
xe_hw_engine_snapshot_print(coredump->snapshot.hwe[i],
&p);
if (coredump->snapshot.vm) {
drm_printf(&p, "\n**** VM state ****\n");
xe_vm_snapshot_print(coredump->snapshot.vm, &p);
}

return count - iter.remain;
}
Expand All @@ -117,12 +134,15 @@ static void xe_devcoredump_free(void *data)
if (!data || !coredump_to_xe(coredump))
return;

cancel_work_sync(&coredump->snapshot.work);

xe_guc_ct_snapshot_free(coredump->snapshot.ct);
xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge);
xe_sched_job_snapshot_free(coredump->snapshot.job);
for (i = 0; i < XE_NUM_HW_ENGINES; i++)
if (coredump->snapshot.hwe[i])
xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]);
xe_vm_snapshot_free(coredump->snapshot.vm);

/* To prevent stale data on next snapshot, clear everything */
memset(&coredump->snapshot, 0, sizeof(coredump->snapshot));
Expand All @@ -147,6 +167,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
ss->snapshot_time = ktime_get_real();
ss->boot_time = ktime_get_boottime();

ss->gt = q->gt;
INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work);

cookie = dma_fence_begin_signalling();
for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
if (adj_logical_mask & BIT(i)) {
Expand All @@ -162,6 +185,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(job);
coredump->snapshot.job = xe_sched_job_snapshot_capture(job);
coredump->snapshot.vm = xe_vm_snapshot_capture(q->vm);

for_each_hw_engine(hwe, q->gt, id) {
if (hwe->class != q->hwe->class ||
Expand All @@ -172,6 +196,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe);
}

if (ss->vm)
queue_work(system_unbound_wq, &ss->work);

xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
dma_fence_end_signalling(cookie);
}
Expand Down Expand Up @@ -205,3 +232,4 @@ void xe_devcoredump(struct xe_sched_job *job)
xe_devcoredump_read, xe_devcoredump_free);
}
#endif

8 changes: 8 additions & 0 deletions drivers/gpu/drm/xe/xe_devcoredump_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "xe_hw_engine_types.h"

struct xe_device;
struct xe_gt;

/**
* struct xe_devcoredump_snapshot - Crash snapshot
Expand All @@ -26,6 +27,11 @@ struct xe_devcoredump_snapshot {
/** @boot_time: Relative boot time so the uptime can be calculated. */
ktime_t boot_time;

/** @gt: Affected GT, used by forcewake for delayed capture */
struct xe_gt *gt;
/** @work: Workqueue for deferred capture outside of signaling context */
struct work_struct work;

/* GuC snapshots */
/** @ct: GuC CT snapshot */
struct xe_guc_ct_snapshot *ct;
Expand All @@ -36,6 +42,8 @@ struct xe_devcoredump_snapshot {
struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES];
/** @job: Snapshot of job state */
struct xe_sched_job_snapshot *job;
/** @vm: Snapshot of VM state */
struct xe_vm_snapshot *vm;
};

/**
Expand Down
170 changes: 168 additions & 2 deletions drivers/gpu/drm/xe/xe_vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <drm/ttm/ttm_execbuf_util.h>
#include <drm/ttm/ttm_tt.h>
#include <drm/xe_drm.h>
#include <linux/ascii85.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/mm.h>
Expand Down Expand Up @@ -1523,8 +1524,6 @@ void xe_vm_close_and_put(struct xe_vm *vm)

up_write(&vm->lock);

mutex_destroy(&vm->snap_mutex);

mutex_lock(&xe->usm.lock);
if (vm->flags & XE_VM_FLAG_FAULT_MODE)
xe->usm.num_vm_in_fault_mode--;
Expand All @@ -1550,6 +1549,8 @@ static void vm_destroy_work_func(struct work_struct *w)
/* xe_vm_close_and_put was not called? */
xe_assert(xe, !vm->size);

mutex_destroy(&vm->snap_mutex);

if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
xe_device_mem_access_put(xe);

Expand Down Expand Up @@ -3269,3 +3270,168 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)

return 0;
}

struct xe_vm_snapshot {
unsigned long num_snaps;
struct {
u64 ofs, bo_ofs;
unsigned long len;
struct xe_bo *bo;
void *data;
struct mm_struct *mm;
} snap[];
};

struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
{
unsigned long num_snaps = 0, i;
struct xe_vm_snapshot *snap = NULL;
struct drm_gpuva *gpuva;

if (!vm)
return NULL;

mutex_lock(&vm->snap_mutex);
drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
if (gpuva->flags & XE_VMA_DUMPABLE)
num_snaps++;
}

if (num_snaps)
snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
if (!snap)
goto out_unlock;

snap->num_snaps = num_snaps;
i = 0;
drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
struct xe_vma *vma = gpuva_to_vma(gpuva);
struct xe_bo *bo = vma->gpuva.gem.obj ?
gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;

if (!(gpuva->flags & XE_VMA_DUMPABLE))
continue;

snap->snap[i].ofs = xe_vma_start(vma);
snap->snap[i].len = xe_vma_size(vma);
if (bo) {
snap->snap[i].bo = xe_bo_get(bo);
snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
} else if (xe_vma_is_userptr(vma)) {
struct mm_struct *mm =
to_userptr_vma(vma)->userptr.notifier.mm;

if (mmget_not_zero(mm))
snap->snap[i].mm = mm;
else
snap->snap[i].data = ERR_PTR(-EFAULT);

snap->snap[i].bo_ofs = xe_vma_userptr(vma);
} else {
snap->snap[i].data = ERR_PTR(-ENOENT);
}
i++;
}

out_unlock:
mutex_unlock(&vm->snap_mutex);
return snap;
}

void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
{
for (int i = 0; i < snap->num_snaps; i++) {
struct xe_bo *bo = snap->snap[i].bo;
struct iosys_map src;
int err;

if (IS_ERR(snap->snap[i].data))
continue;

snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
if (!snap->snap[i].data) {
snap->snap[i].data = ERR_PTR(-ENOMEM);
goto cleanup_bo;
}

if (bo) {
dma_resv_lock(bo->ttm.base.resv, NULL);
err = ttm_bo_vmap(&bo->ttm, &src);
if (!err) {
xe_map_memcpy_from(xe_bo_device(bo),
snap->snap[i].data,
&src, snap->snap[i].bo_ofs,
snap->snap[i].len);
ttm_bo_vunmap(&bo->ttm, &src);
}
dma_resv_unlock(bo->ttm.base.resv);
} else {
void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;

kthread_use_mm(snap->snap[i].mm);
if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
err = 0;
else
err = -EFAULT;
kthread_unuse_mm(snap->snap[i].mm);

mmput(snap->snap[i].mm);
snap->snap[i].mm = NULL;
}

if (err) {
kvfree(snap->snap[i].data);
snap->snap[i].data = ERR_PTR(err);
}

cleanup_bo:
xe_bo_put(bo);
snap->snap[i].bo = NULL;
}
}

void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
{
unsigned long i, j;

for (i = 0; i < snap->num_snaps; i++) {
if (IS_ERR(snap->snap[i].data))
goto uncaptured;

drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
drm_printf(p, "[%llx].data: ",
snap->snap[i].ofs);

for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
u32 *val = snap->snap[i].data + j;
char dumped[ASCII85_BUFSZ];

drm_puts(p, ascii85_encode(*val, dumped));
}

drm_puts(p, "\n");
continue;

uncaptured:
drm_printf(p, "Unable to capture range [%llx-%llx]: %li\n",
snap->snap[i].ofs, snap->snap[i].ofs + snap->snap[i].len - 1,
PTR_ERR(snap->snap[i].data));
}
}

void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
{
unsigned long i;

if (!snap)
return;

for (i = 0; i < snap->num_snaps; i++) {
if (!IS_ERR(snap->snap[i].data))
kvfree(snap->snap[i].data);
xe_bo_put(snap->snap[i].bo);
if (snap->snap[i].mm)
mmput(snap->snap[i].mm);
}
kvfree(snap);
}
5 changes: 5 additions & 0 deletions drivers/gpu/drm/xe/xe_vm.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,3 +271,8 @@ static inline void vm_dbg(const struct drm_device *dev,
{ /* noop */ }
#endif
#endif

struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm);
void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap);
void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p);
void xe_vm_snapshot_free(struct xe_vm_snapshot *snap);

0 comments on commit 0eb2a18

Please sign in to comment.