Skip to content

Commit

Permalink
Merge tag 'amd-drm-next-6.9-2024-03-08-1' of https://gitlab.freedeskt…
Browse files Browse the repository at this point in the history
…op.org/agd5f/linux into drm-next

amd-drm-next-6.9-2024-03-08-1:

amdgpu:
- DCN 3.5.1 support
- Fixes for IOMMUv2 removal
- UAF fix
- Misc small fixes and cleanups
- SR-IOV fixes
- MCBP cleanup
- devcoredump update
- NBIF 6.3.1 support
- VPE 6.1.1 support

amdkfd:
- Misc fixes and cleanups
- GFX10.1 trap fixes

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240308170741.3691166-1-alexander.deucher@amd.com
  • Loading branch information
Dave Airlie committed Mar 11, 2024
2 parents b9511c6 + 5eabf0c commit 119b225
Show file tree
Hide file tree
Showing 74 changed files with 122,906 additions and 791 deletions.
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ amdgpu-y += \
vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \
nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \
sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \
nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o
nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o

# add DF block
amdgpu-y += \
Expand Down
9 changes: 7 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1782,9 +1782,14 @@ static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused)
list_for_each_entry(file, &dev->filelist, lhead) {
struct amdgpu_fpriv *fpriv = file->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_task_info *ti;

ti = amdgpu_vm_get_task_info_vm(vm);
if (ti) {
seq_printf(m, "pid:%d\tProcess:%s ----------\n", ti->pid, ti->process_name);
amdgpu_vm_put_task_info(ti);
}

seq_printf(m, "pid:%d\tProcess:%s ----------\n",
vm->task_info.pid, vm->task_info.process_name);
r = amdgpu_bo_reserve(vm->root.bo, true);
if (r)
break;
Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -4056,13 +4056,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
goto unmap_memory;
}

amdgpu_device_set_mcbp(adev);

/* early init functions */
r = amdgpu_device_ip_early_init(adev);
if (r)
goto unmap_memory;

amdgpu_device_set_mcbp(adev);

/* Get rid of things like offb */
r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
if (r)
Expand Down
16 changes: 16 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
#include "nbio_v4_3.h"
#include "nbio_v7_2.h"
#include "nbio_v7_7.h"
#include "nbif_v6_3_1.h"
#include "hdp_v5_0.h"
#include "hdp_v5_2.h"
#include "hdp_v6_0.h"
Expand Down Expand Up @@ -1319,6 +1320,15 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
}
}

if (le16_to_cpu(ip->hw_id) == VPE_HWID) {
if (adev->vpe.num_instances < AMDGPU_MAX_VPE_INSTANCES)
adev->vpe.num_instances++;
else
dev_err(adev->dev, "Too many VPE instances: %d vs %d\n",
adev->vpe.num_instances + 1,
AMDGPU_MAX_VPE_INSTANCES);
}

if (le16_to_cpu(ip->hw_id) == UMC_HWID) {
adev->gmc.num_umc++;
adev->umc.node_inst_num++;
Expand Down Expand Up @@ -1936,6 +1946,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(3, 2, 0):
case IP_VERSION(3, 2, 1):
case IP_VERSION(3, 5, 0):
case IP_VERSION(3, 5, 1):
if (amdgpu_sriov_vf(adev))
amdgpu_discovery_set_sriov_display(adev);
else
Expand Down Expand Up @@ -2212,6 +2223,7 @@ static int amdgpu_discovery_set_vpe_ip_blocks(struct amdgpu_device *adev)
{
switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) {
case IP_VERSION(6, 1, 0):
case IP_VERSION(6, 1, 1):
amdgpu_device_ip_block_add(adev, &vpe_v6_1_ip_block);
break;
default:
Expand Down Expand Up @@ -2558,6 +2570,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->nbio.funcs = &nbio_v7_7_funcs;
adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg;
break;
case IP_VERSION(6, 3, 1):
adev->nbio.funcs = &nbif_v6_3_1_funcs;
adev->nbio.hdp_flush_reg = &nbif_v6_3_1_hdp_flush_reg;
break;
default:
break;
}
Expand Down
24 changes: 12 additions & 12 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -2693,7 +2693,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
}

adev->in_runpm = true;
if (amdgpu_device_supports_px(drm_dev))
if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX)
drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;

/*
Expand All @@ -2703,7 +2703,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
* platforms.
* TODO: this may be also needed for PX capable platform.
*/
if (amdgpu_device_supports_boco(drm_dev))
if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO)
adev->mp1_state = PP_MP1_STATE_UNLOAD;

ret = amdgpu_device_prepare(drm_dev);
Expand All @@ -2712,15 +2712,15 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
ret = amdgpu_device_suspend(drm_dev, false);
if (ret) {
adev->in_runpm = false;
if (amdgpu_device_supports_boco(drm_dev))
if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO)
adev->mp1_state = PP_MP1_STATE_NONE;
return ret;
}

if (amdgpu_device_supports_boco(drm_dev))
if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO)
adev->mp1_state = PP_MP1_STATE_NONE;

if (amdgpu_device_supports_px(drm_dev)) {
if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) {
/* Only need to handle PCI state in the driver for ATPX
* PCI core handles it for _PR3.
*/
Expand All @@ -2729,9 +2729,9 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
pci_ignore_hotplug(pdev);
pci_set_power_state(pdev, PCI_D3cold);
drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
} else if (amdgpu_device_supports_boco(drm_dev)) {
} else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) {
/* nothing to do */
} else if (amdgpu_device_supports_baco(drm_dev)) {
} else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
amdgpu_device_baco_enter(drm_dev);
}

Expand All @@ -2754,7 +2754,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
if (!pci_device_is_present(adev->pdev))
adev->no_hw_access = true;

if (amdgpu_device_supports_px(drm_dev)) {
if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) {
drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;

/* Only need to handle PCI state in the driver for ATPX
Expand All @@ -2766,22 +2766,22 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
if (ret)
return ret;
pci_set_master(pdev);
} else if (amdgpu_device_supports_boco(drm_dev)) {
} else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) {
/* Only need to handle PCI state in the driver for ATPX
* PCI core handles it for _PR3.
*/
pci_set_master(pdev);
} else if (amdgpu_device_supports_baco(drm_dev)) {
} else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
amdgpu_device_baco_exit(drm_dev);
}
ret = amdgpu_device_resume(drm_dev, false);
if (ret) {
if (amdgpu_device_supports_px(drm_dev))
if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX)
pci_disable_device(pdev);
return ret;
}

if (amdgpu_device_supports_px(drm_dev))
if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX)
drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
adev->in_runpm = false;
return 0;
Expand Down
12 changes: 9 additions & 3 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,15 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
if (!WARN_ON(!vm->process_info->eviction_fence)) {
r = amdgpu_amdkfd_bo_validate_and_fence(abo, AMDGPU_GEM_DOMAIN_GTT,
&vm->process_info->eviction_fence->base);
if (r)
dev_warn(adev->dev, "%d: validate_and_fence failed: %d\n",
vm->task_info.pid, r);
if (r) {
struct amdgpu_task_info *ti = amdgpu_vm_get_task_info_vm(vm);

dev_warn(adev->dev, "validate_and_fence failed: %d\n", r);
if (ti) {
dev_warn(adev->dev, "pid %d\n", ti->pid);
amdgpu_vm_put_task_info(ti);
}
}
}
mutex_unlock(&vm->process_info->lock);

Expand Down
21 changes: 7 additions & 14 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,18 +131,18 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
struct amdgpu_ib *ib = &ibs[0];
struct dma_fence *tmp = NULL;
bool need_ctx_switch;
unsigned int patch_offset = ~0;
struct amdgpu_vm *vm;
uint64_t fence_ctx;
uint32_t status = 0, alloc_size;
unsigned int fence_flags = 0;
bool secure, init_shadow;
u64 shadow_va, csa_va, gds_va;
int vmid = AMDGPU_JOB_GET_VMID(job);
bool need_pipe_sync = false;
unsigned int cond_exec;

unsigned int i;
int r = 0;
bool need_pipe_sync = false;

if (num_ibs == 0)
return -EINVAL;
Expand Down Expand Up @@ -228,7 +228,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
init_shadow, vmid);

if (ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
cond_exec = amdgpu_ring_init_cond_exec(ring,
ring->cond_exe_gpu_addr);

amdgpu_device_flush_hdp(adev, ring);

Expand Down Expand Up @@ -278,16 +279,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
fence_flags | AMDGPU_FENCE_FLAG_64BIT);
}

if (ring->funcs->emit_gfx_shadow) {
if (ring->funcs->emit_gfx_shadow && ring->funcs->init_cond_exec) {
amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0);

if (ring->funcs->init_cond_exec) {
unsigned int ce_offset = ~0;

ce_offset = amdgpu_ring_init_cond_exec(ring);
if (ce_offset != ~0 && ring->funcs->patch_cond_exec)
amdgpu_ring_patch_cond_exec(ring, ce_offset);
}
amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr);
}

r = amdgpu_fence_emit(ring, f, job, fence_flags);
Expand All @@ -302,8 +296,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
if (ring->funcs->insert_end)
ring->funcs->insert_end(ring);

if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
amdgpu_ring_patch_cond_exec(ring, patch_offset);
amdgpu_ring_patch_cond_exec(ring, cond_exec);

ring->current_ctx = fence_ctx;
if (vm && ring->funcs->emit_switch_buffer)
Expand Down
18 changes: 11 additions & 7 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
{
struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
struct amdgpu_job *job = to_amdgpu_job(s_job);
struct amdgpu_task_info ti;
struct amdgpu_task_info *ti;
struct amdgpu_device *adev = ring->adev;
int idx;
int r;
Expand All @@ -48,7 +48,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
return DRM_GPU_SCHED_STAT_ENODEV;
}

memset(&ti, 0, sizeof(struct amdgpu_task_info));

adev->job_hang = true;

if (amdgpu_gpu_recovery &&
Expand All @@ -58,12 +58,16 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
goto exit;
}

amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
ring->fence_drv.sync_seq);
DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
ti.process_name, ti.tgid, ti.task_name, ti.pid);
job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
ring->fence_drv.sync_seq);

ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid);
if (ti) {
DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
ti->process_name, ti->tgid, ti->task_name, ti->pid);
amdgpu_vm_put_task_info(ti);
}

dma_fence_set_error(&s_job->s_fence->finished, -ETIME);

Expand Down
26 changes: 24 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,13 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
coredump->reset_task_info.process_name,
coredump->reset_task_info.pid);

if (coredump->ring) {
drm_printf(&p, "\nRing timed out details\n");
drm_printf(&p, "IP Type: %d Ring Name: %s\n",
coredump->ring->funcs->type,
coredump->ring->name);
}

if (coredump->reset_vram_lost)
drm_printf(&p, "VRAM is lost due to GPU reset!\n");
if (coredump->adev->reset_info.num_regs) {
Expand All @@ -220,6 +227,8 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
{
struct amdgpu_coredump_info *coredump;
struct drm_device *dev = adev_to_drm(adev);
struct amdgpu_job *job = reset_context->job;
struct drm_sched_job *s_job;

coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT);

Expand All @@ -230,8 +239,21 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,

coredump->reset_vram_lost = vram_lost;

if (reset_context->job && reset_context->job->vm)
coredump->reset_task_info = reset_context->job->vm->task_info;
if (reset_context->job && reset_context->job->vm) {
struct amdgpu_task_info *ti;
struct amdgpu_vm *vm = reset_context->job->vm;

ti = amdgpu_vm_get_task_info_vm(vm);
if (ti) {
coredump->reset_task_info = *ti;
amdgpu_vm_put_task_info(ti);
}
}

if (job) {
s_job = &job->base;
coredump->ring = to_amdgpu_ring(s_job->sched);
}

coredump->adev = adev;

Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ struct amdgpu_coredump_info {
struct amdgpu_task_info reset_task_info;
struct timespec64 reset_time;
bool reset_vram_lost;
struct amdgpu_ring *ring;
};
#endif

Expand Down
Loading

0 comments on commit 119b225

Please sign in to comment.