Skip to content

Commit

Permalink
Merge amd-staging-dkms-5.16 into amd-mainline-dkms-5.16
Browse files Browse the repository at this point in the history
Signed-off-by: Rui Teng <rui.teng@amd.com>
Change-Id: I8da68bd4793ff5667f7e2f805ee0fbdcadf07aff
  • Loading branch information
Rui Teng committed Aug 24, 2022
2 parents 18e196b + 2aa3a46 commit 69954a8
Show file tree
Hide file tree
Showing 85 changed files with 1,168 additions and 3,373 deletions.
57 changes: 57 additions & 0 deletions Documentation/gpu/amdgpu/thermal.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,60 @@ gpu_metrics

.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
:doc: gpu_metrics

GFXOFF
======

GFXOFF is a feature found in most recent GPUs that saves power at runtime. The
card's RLC (RunList Controller) firmware powers off the gfx engine
dynamically when there is no workload on gfx or compute pipes. GFXOFF is on by
default on supported GPUs.

Userspace can interact with GFXOFF through a debugfs interface (all values in
`uint32_t`, unless otherwise noted):

``amdgpu_gfxoff``
-----------------

Use it to enable/disable GFXOFF, and to check if it's current enabled/disabled::

$ xxd -l1 -p /sys/kernel/debug/dri/0/amdgpu_gfxoff
01

- Write 0 to disable it, and 1 to enable it.
- Read 0 means it's disabled, 1 it's enabled.

If it's enabled, that means that the GPU is free to enter into GFXOFF mode as
needed. Disabled means that it will never enter GFXOFF mode.

``amdgpu_gfxoff_status``
------------------------

Read it to check current GFXOFF's status of a GPU::

$ xxd -l1 -p /sys/kernel/debug/dri/0/amdgpu_gfxoff_status
02

- 0: GPU is in GFXOFF state, the gfx engine is powered down.
- 1: Transition out of GFXOFF state
- 2: Not in GFXOFF state
- 3: Transition into GFXOFF state

If GFXOFF is enabled, the value will be transitioning around [0, 3], always
getting into 0 when possible. When it's disabled, it's always at 2. Returns
``-EINVAL`` if it's not supported.

``amdgpu_gfxoff_count``
-----------------------

Read it to get the total GFXOFF entry count at the time of query since system
power-up. The value is an `uint64_t` type, however, due to firmware limitations,
it can currently overflow as an `uint32_t`. *Only supported in vangogh*

``amdgpu_gfxoff_residency``
---------------------------

Write 1 to amdgpu_gfxoff_residency to start logging, and 0 to stop. Read it to
get average GFXOFF residency % multiplied by 100 during the last logging
interval. E.g. a value of 7854 means 78.54% of the time in the last logging
interval the GPU was in GFXOFF mode. *Only supported in vangogh*
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
AMDGPU_CP_KIQ_IRQ_LAST
};

#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */
#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
#define MAX_KIQ_REG_TRY 1000
Expand Down
7 changes: 1 addition & 6 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,11 +164,6 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint16_t pasid, enum TLB_FLUSH_TYPE flush_type);

int amdgpu_amdkfd_copy_mem_to_mem(struct amdgpu_device *adev, struct kgd_mem *src_mem,
uint64_t src_offset, struct kgd_mem *dst_mem,
uint64_t dest_offset, uint64_t size, struct dma_fence **f,
uint64_t *actual_size);

bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);

int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev);
Expand Down Expand Up @@ -292,7 +287,7 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_device *adev, uint64_t va, uint64_t size,
void *drm_priv, struct sg_table *sg, struct kgd_mem **mem,
void *drm_priv, struct kgd_mem **mem,
uint64_t *offset, uint32_t flags, bool criu_resume);
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,
Expand Down
84 changes: 2 additions & 82 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1668,11 +1668,12 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)

int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_device *adev, uint64_t va, uint64_t size,
void *drm_priv, struct sg_table *sg, struct kgd_mem **mem,
void *drm_priv, struct kgd_mem **mem,
uint64_t *offset, uint32_t flags, bool criu_resume)
{
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
enum ttm_bo_type bo_type = ttm_bo_type_device;
struct sg_table *sg = NULL;
uint64_t user_addr = 0;
struct amdgpu_bo *bo;
struct drm_gem_object *gobj = NULL;
Expand Down Expand Up @@ -1713,10 +1714,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
}
}

if (sg) {
alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
bo_type = ttm_bo_type_sg;
}
*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
if (!*mem) {
ret = -ENOMEM;
Expand Down Expand Up @@ -3250,83 +3247,6 @@ int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
return 0;
}

int amdgpu_amdkfd_copy_mem_to_mem(struct amdgpu_device *adev, struct kgd_mem *src_mem,
uint64_t src_offset, struct kgd_mem *dst_mem,
uint64_t dst_offset, uint64_t size,
struct dma_fence **f, uint64_t *actual_size)
{
struct amdgpu_copy_mem src, dst;
struct ww_acquire_ctx ticket;
struct list_head list, duplicates;
struct ttm_validate_buffer resv_list[2];
struct dma_fence *fence = NULL;
int i, r;

if (!adev|| !src_mem || !dst_mem || !actual_size)
return -EINVAL;

*actual_size = 0;

INIT_LIST_HEAD(&list);
INIT_LIST_HEAD(&duplicates);

src.bo = &src_mem->bo->tbo;
dst.bo = &dst_mem->bo->tbo;
src.mem = src.bo->resource;
dst.mem = dst.bo->resource;
src.offset = src_offset;
dst.offset = dst_offset;

resv_list[0].bo = src.bo;
resv_list[1].bo = dst.bo;

for (i = 0; i < 2; i++) {
resv_list[i].num_shared = 1;
list_add_tail(&resv_list[i].head, &list);
}

r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
if (r) {
pr_err("Copy buffer failed. Unable to reserve bo (%d)\n", r);
return r;
}

/* The process to which the Source and Dest BOs belong to could be
* evicted and the BOs invalidated. So validate BOs before use
*/
r = amdgpu_amdkfd_bo_validate(src_mem->bo, src_mem->domain, false);
if (r) {
pr_err("CMA fail: SRC BO validate failed %d\n", r);
goto validate_fail;
}


r = amdgpu_amdkfd_bo_validate(dst_mem->bo, dst_mem->domain, false);
if (r) {
pr_err("CMA fail: DST BO validate failed %d\n", r);
goto validate_fail;
}


r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, size, false, NULL,
&fence);
if (r)
pr_err("Copy buffer failed %d\n", r);
else
*actual_size = size;
if (fence) {
amdgpu_bo_fence(src_mem->bo, fence, true);
amdgpu_bo_fence(dst_mem->bo, fence, true);
}
if (f)
*f = dma_fence_get(fence);
dma_fence_put(fence);

validate_fail:
ttm_eu_backoff_reservation(&ticket, &list);
return r;
}

/* Returns GPU-specific tiling mode information */
int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
struct tile_config *config)
Expand Down
8 changes: 2 additions & 6 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
Original file line number Diff line number Diff line change
Expand Up @@ -957,16 +957,12 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
continue;

r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r) {
mutex_unlock(&p->bo_list->bo_list_mutex);
if (r)
return r;
}

r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
if (r) {
mutex_unlock(&p->bo_list->bo_list_mutex);
if (r)
return r;
}
}

r = amdgpu_vm_handle_moved(adev, vm);
Expand Down
Loading

0 comments on commit 69954a8

Please sign in to comment.