Skip to content

Commit

Permalink
drm/xe: Add vm snapshot mutex for easily taking a vm snapshot during …
Browse files Browse the repository at this point in the history
…devcoredump

The devcoredump is done in fence signaling context. Because of this, we
cannot take any of the normal mutexes or we would invert.

Normal: Take vm->lock, dma_fence_wait()
Devcoredump: from dma_fence_wait() context, take vm->lock.

This doesn't work, and we only care about integrity, so take the locks
around additions and removals of vma's.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240221133024.898315-5-maarten.lankhorst@linux.intel.com
  • Loading branch information
Maarten Lankhorst committed Feb 21, 2024
1 parent ffb7249 commit 0cd9904
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 0 deletions.
8 changes: 8 additions & 0 deletions drivers/gpu/drm/xe/xe_vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1055,7 +1055,9 @@ static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
xe_assert(vm->xe, xe_vma_vm(vma) == vm);
lockdep_assert_held(&vm->lock);

mutex_lock(&vm->snap_mutex);
err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
mutex_unlock(&vm->snap_mutex);
XE_WARN_ON(err); /* Shouldn't be possible */

return err;
Expand All @@ -1066,7 +1068,9 @@ static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
xe_assert(vm->xe, xe_vma_vm(vma) == vm);
lockdep_assert_held(&vm->lock);

mutex_lock(&vm->snap_mutex);
drm_gpuva_remove(&vma->gpuva);
mutex_unlock(&vm->snap_mutex);
if (vm->usm.last_fault_vma == vma)
vm->usm.last_fault_vma = NULL;
}
Expand Down Expand Up @@ -1293,6 +1297,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
vm->flags = flags;

init_rwsem(&vm->lock);
mutex_init(&vm->snap_mutex);

INIT_LIST_HEAD(&vm->rebind_list);

Expand Down Expand Up @@ -1418,6 +1423,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
return ERR_PTR(err);

err_no_resv:
mutex_destroy(&vm->snap_mutex);
for_each_tile(tile, xe, id)
xe_range_fence_tree_fini(&vm->rftree[id]);
kfree(vm);
Expand Down Expand Up @@ -1517,6 +1523,8 @@ void xe_vm_close_and_put(struct xe_vm *vm)

up_write(&vm->lock);

mutex_destroy(&vm->snap_mutex);

mutex_lock(&xe->usm.lock);
if (vm->flags & XE_VM_FLAG_FAULT_MODE)
xe->usm.num_vm_in_fault_mode--;
Expand Down
5 changes: 5 additions & 0 deletions drivers/gpu/drm/xe/xe_vm_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,11 @@ struct xe_vm {
* VM
*/
struct rw_semaphore lock;
/**
* @snap_mutex: Mutex used to guard insertions and removals from gpuva,
* so we can take a snapshot safely from devcoredump.
*/
struct mutex snap_mutex;

/**
* @rebind_list: list of VMAs that need rebinding. Protected by the
Expand Down

0 comments on commit 0cd9904

Please sign in to comment.