Skip to content

Commit

Permalink
drm/i915/dgfx: Release mmap on rpm suspend
Browse files Browse the repository at this point in the history
Release all mmap mapping for all lmem objects which are associated
with userfault such that, while pcie function in D3hot, any access
to memory mappings will raise a userfault.

Runtime resume the dgpu(when gem object lies in lmem).
This will transition the dgpu graphics function to D0
state if it was in D3 in order to access the mmap memory
mappings.

v2:
- Squashes the patches. [Matt Auld]
- Add adequate locking for lmem_userfault_list addition. [Matt Auld]
- Reused obj->userfault_count to avoid double addition. [Matt Auld]
- Added i915_gem_object_lock to check
  i915_gem_object_is_lmem. [Matt Auld]

v3:
- Use i915_ttm_cpu_maps_iomem. [Matt Auld]
- Fix 'ret == 0 to ret == VM_FAULT_NOPAGE'. [Matt Auld]
- Reuse obj->userfault_count as a bool 0 or 1. [Matt Auld]
- Delete the mmaped obj from lmem_userfault_list in obj
  destruction path. [Matt Auld]
- Get a wakeref for object destruction patch. [Matt Auld]
- Use intel_wakeref_auto to delay runtime PM. [Matt Auld]

v4:
- Avoid using mmo offset to get the vma_node. [Matt Auld]
- Added comment to use the lmem_userfault_lock. [Matt Auld]
- Get lmem_userfault_lock in i915_gem_object_release_mmap_offset.
  [Matt Auld]
- Fixed kernel test robot generated warning.

v5:
- Addressed the cosmetics comments. [Andi]
- Changed i915_gem_runtime_pm_object_release_mmap_offset() name to
  i915_gem_object_runtime_pm_release_mmap_offset() to be rhythmic.

PCIe Specs 5.3.1.4.1

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/6331
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220913152714.16541-3-anshuman.gupta@intel.com
  • Loading branch information
Anshuman Gupta committed Sep 14, 2022
1 parent f5e92d2 commit ad74457
Show file tree
Hide file tree
Showing 8 changed files with 79 additions and 4 deletions.
21 changes: 21 additions & 0 deletions drivers/gpu/drm/i915/gem/i915_gem_mman.c
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,20 @@ void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
}

void i915_gem_object_runtime_pm_release_mmap_offset(struct drm_i915_gem_object *obj)
{
struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
struct ttm_device *bdev = bo->bdev;

drm_vma_node_unmap(&bo->base.vma_node, bdev->dev_mapping);

if (obj->userfault_count) {
/* rpm wakeref provide exclusive access */
list_del(&obj->userfault_link);
obj->userfault_count = 0;
}
}

void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
{
struct i915_mmap_offset *mmo, *mn;
Expand All @@ -573,6 +587,13 @@ void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
spin_lock(&obj->mmo.lock);
}
spin_unlock(&obj->mmo.lock);

if (obj->userfault_count) {
mutex_lock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
list_del(&obj->userfault_link);
mutex_unlock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
obj->userfault_count = 0;
}
}

static struct i915_mmap_offset *
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/i915/gem/i915_gem_mman.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ int i915_gem_dumb_mmap_offset(struct drm_file *file_priv,
void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj);
void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj);

void i915_gem_object_runtime_pm_release_mmap_offset(struct drm_i915_gem_object *obj);
void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj);

#endif
2 changes: 1 addition & 1 deletion drivers/gpu/drm/i915/gem/i915_gem_object.c
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj)
{
/* Skip serialisation and waking the device if known to be not used. */

if (obj->userfault_count)
if (obj->userfault_count && !IS_DGFX(to_i915(obj->base.dev)))
i915_gem_object_release_mmap_gtt(obj);

if (!RB_EMPTY_ROOT(&obj->mmo.offsets)) {
Expand Down
3 changes: 2 additions & 1 deletion drivers/gpu/drm/i915/gem/i915_gem_object_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,8 @@ struct drm_i915_gem_object {
};

/**
* Whether the object is currently in the GGTT mmap.
* Whether the object is currently in the GGTT or any other supported
* fake offset mmap backed by lmem.
*/
unsigned int userfault_count;
struct list_head userfault_link;
Expand Down
36 changes: 34 additions & 2 deletions drivers/gpu/drm/i915/gem/i915_gem_ttm.c
Original file line number Diff line number Diff line change
Expand Up @@ -548,9 +548,18 @@ static int i915_ttm_shrink(struct drm_i915_gem_object *obj, unsigned int flags)
static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo)
{
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
intel_wakeref_t wakeref = 0;

if (likely(obj)) {
/* ttm_bo_release() already has dma_resv_lock */
if (i915_ttm_cpu_maps_iomem(bo->resource))
wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);

__i915_gem_object_pages_fini(obj);

if (wakeref)
intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref);

i915_ttm_free_cached_io_rsgt(obj);
}
}
Expand Down Expand Up @@ -1020,6 +1029,7 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
struct ttm_buffer_object *bo = area->vm_private_data;
struct drm_device *dev = bo->base.dev;
struct drm_i915_gem_object *obj;
intel_wakeref_t wakeref = 0;
vm_fault_t ret;
int idx;

Expand All @@ -1041,6 +1051,9 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}

if (i915_ttm_cpu_maps_iomem(bo->resource))
wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);

if (!i915_ttm_resource_mappable(bo->resource)) {
int err = -ENODEV;
int i;
Expand All @@ -1062,7 +1075,8 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
if (err) {
drm_dbg(dev, "Unable to make resource CPU accessible\n");
dma_resv_unlock(bo->base.resv);
return VM_FAULT_SIGBUS;
ret = VM_FAULT_SIGBUS;
goto out_rpm;
}
}

Expand All @@ -1073,12 +1087,30 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
} else {
ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
}

if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
return ret;
goto out_rpm;

/* ttm_bo_vm_reserve() already has dma_resv_lock */
if (ret == VM_FAULT_NOPAGE && wakeref && !obj->userfault_count) {
obj->userfault_count = 1;
mutex_lock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
list_add(&obj->userfault_link, &to_gt(to_i915(obj->base.dev))->lmem_userfault_list);
mutex_unlock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
}

if (wakeref & CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
intel_wakeref_auto(&to_gt(to_i915(obj->base.dev))->userfault_wakeref,
msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));

i915_ttm_adjust_lru(obj);

dma_resv_unlock(bo->base.resv);

out_rpm:
if (wakeref)
intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref);

return ret;
}

Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/i915/gt/intel_gt.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ void intel_gt_common_init_early(struct intel_gt *gt)
{
spin_lock_init(gt->irq_lock);

INIT_LIST_HEAD(&gt->lmem_userfault_list);
mutex_init(&gt->lmem_userfault_lock);
INIT_LIST_HEAD(&gt->closed_vma);
spin_lock_init(&gt->closed_lock);

Expand Down
14 changes: 14 additions & 0 deletions drivers/gpu/drm/i915/gt/intel_gt_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,20 @@ struct intel_gt {
struct intel_wakeref wakeref;
atomic_t user_wakeref;

/**
* Protects access to lmem usefault list.
* It is required, if we are outside of the runtime suspend path,
* access to @lmem_userfault_list requires always first grabbing the
* runtime pm, to ensure we can't race against runtime suspend.
* Once we have that we also need to grab @lmem_userfault_lock,
* at which point we have exclusive access.
* The runtime suspend path is special since it doesn't really hold any locks,
* but instead has exclusive access by virtue of all other accesses requiring
* holding the runtime pm wakeref.
*/
struct mutex lmem_userfault_lock;
struct list_head lmem_userfault_list;

struct list_head closed_vma;
spinlock_t closed_lock; /* guards the list of closed_vma */

Expand Down
4 changes: 4 additions & 0 deletions drivers/gpu/drm/i915/i915_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -842,6 +842,10 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
&to_gt(i915)->ggtt->userfault_list, userfault_link)
__i915_gem_object_release_mmap_gtt(obj);

list_for_each_entry_safe(obj, on,
&to_gt(i915)->lmem_userfault_list, userfault_link)
i915_gem_object_runtime_pm_release_mmap_offset(obj);

/*
* The fence will be lost when the device powers down. If any were
* in use by hardware (i.e. they are pinned), we should not be powering
Expand Down

0 comments on commit ad74457

Please sign in to comment.