Skip to content

Commit

Permalink
Merge tag 'amd-drm-next-6.11-2024-07-12' of https://gitlab.freedeskto…
Browse files Browse the repository at this point in the history
…p.org/agd5f/linux into drm-next

amd-drm-next-6.11-2024-07-12:

amdgpu:
- RAS fixes
- SMU fixes
- GC 12 updates
- SR-IOV fixes
- IH 7 updates
- DCC fixes
- GC 11.5 fixes
- DP MST fixes
- GFX 9.4.4 fixes
- SMU 14 updates
- Documentation updates
- MAINTAINERS updates
- PSR SU fix
- Misc small fixes

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240712171637.2581787-1-alexander.deucher@amd.com
  • Loading branch information
Dave Airlie committed Jul 17, 2024
2 parents c58c391 + 1cff101 commit 478a527
Show file tree
Hide file tree
Showing 40 changed files with 1,039 additions and 134 deletions.
4 changes: 2 additions & 2 deletions Documentation/gpu/amdgpu/dgpu-asic-info-table.csv
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ Radeon (RX/Pro) 500 /540(X) /550 /640 /WX2100 /WX3100 /WX200 Series, POLARIS12,
Radeon (RX|TM) (PRO|WX) Vega /MI25 /V320 /V340L /8200 /9100 /SSG MxGPU, VEGA10, DCE 12, 9.0.1, VCE 4.0.0 / UVD 7.0.0, 4.0.0
AMD Radeon (Pro) VII /MI50 /MI60, VEGA20, DCE 12, 9.4.0, VCE 4.1.0 / UVD 7.2.0, 4.2.0
MI100, ARCTURUS, *, 9.4.1, VCN 2.5.0, 4.2.2
MI200, ALDEBARAN, *, 9.4.2, VCN 2.6.0, 4.4.0
MI300, AQUA_VANGARAM, *, 9.4.3, VCN 4.0.3, 4.4.2
MI200 Series, ALDEBARAN, *, 9.4.2, VCN 2.6.0, 4.4.0
MI300 Series, AQUA_VANJARAM, *, 9.4.3, VCN 4.0.3, 4.4.2
AMD Radeon (RX|Pro) 5600(M|XT) /5700 (M|XT|XTB) /W5700, NAVI10, DCN 2.0.0, 10.1.10, VCN 2.0.0, 5.0.0
AMD Radeon (Pro) 5300 /5500XTB/5500(XT|M) /W5500M /W5500, NAVI14, DCN 2.0.0, 10.1.1, VCN 2.0.2, 5.0.2
AMD Radeon RX 6800(XT) /6900(XT) /W6800, SIENNA_CICHLID, DCN 3.0.0, 10.3.0, VCN 3.0.0, 5.2.0
Expand Down
4 changes: 2 additions & 2 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -1092,7 +1092,7 @@ F: Documentation/ABI/testing/sysfs-amd-pmf
F: drivers/platform/x86/amd/pmf/

AMD POWERPLAY AND SWSMU
M: Evan Quan <evan.quan@amd.com>
M: Kenneth Feng <kenneth.feng@amd.com>
L: amd-gfx@lists.freedesktop.org
S: Supported
T: git https://gitlab.freedesktop.org/agd5f/linux.git
Expand Down Expand Up @@ -18656,7 +18656,7 @@ F: drivers/net/wireless/quantenna
RADEON and AMDGPU DRM DRIVERS
M: Alex Deucher <alexander.deucher@amd.com>
M: Christian König <christian.koenig@amd.com>
M: Pan, Xinhui <Xinhui.Pan@amd.com>
M: Xinhui Pan <Xinhui.Pan@amd.com>
L: amd-gfx@lists.freedesktop.org
S: Supported
B: https://gitlab.freedesktop.org/drm/amd/-/issues
Expand Down
2 changes: 0 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/aldebaran.c
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,6 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev)
adev->ip_blocks[i].status.late_initialized = true;
}

amdgpu_ras_set_error_query_ready(adev, true);

amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ static struct aca_regs_dump {
static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank,
struct ras_query_context *qctx)
{
u64 event_id = qctx ? qctx->event_id : 0ULL;
u64 event_id = qctx ? qctx->evid.event_id : RAS_EVENT_INVALID_ID;
int i;

RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n");
Expand Down
15 changes: 15 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1093,6 +1093,21 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
unsigned int i;
int r;

/*
* We can't use gang submit on with reserved VMIDs when the VM changes
* can't be invalidated by more than one engine at the same time.
*/
if (p->gang_size > 1 && !p->adev->vm_manager.concurrent_flush) {
for (i = 0; i < p->gang_size; ++i) {
struct drm_sched_entity *entity = p->entities[i];
struct drm_gpu_scheduler *sched = entity->rq->sched;
struct amdgpu_ring *ring = to_amdgpu_ring(sched);

if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub))
return -EINVAL;
}
}

r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
return r;
Expand Down
3 changes: 2 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -3142,7 +3142,8 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
return r;
}

amdgpu_ras_set_error_query_ready(adev, true);
if (!amdgpu_in_reset(adev))
amdgpu_ras_set_error_query_ready(adev, true);

amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
Expand Down
2 changes: 0 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
Original file line number Diff line number Diff line change
Expand Up @@ -2161,8 +2161,6 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
break;
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
if (!amdgpu_exp_hw_support)
return -EINVAL;
amdgpu_device_ip_block_add(adev, &gfx_v12_0_ip_block);
break;
default:
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ struct amdgpu_gfx_funcs {
int (*switch_partition_mode)(struct amdgpu_device *adev,
int num_xccs_per_xcp);
int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node);
int (*get_xccs_per_xcp)(struct amdgpu_device *adev);
};

struct sq_work {
Expand Down
15 changes: 14 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
if (r || !idle)
goto error;

if (vm->reserved_vmid[vmhub] || (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0)))) {
if (amdgpu_vmid_uses_reserved(vm, vmhub)) {
r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence);
if (r || !id)
goto error;
Expand Down Expand Up @@ -474,6 +474,19 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
return r;
}

/*
* amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID
* @vm: the VM to check
* @vmhub: the VMHUB which will be used
*
* Returns: True if the VM will use a reserved VMID.
*/
bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub)
{
return vm->reserved_vmid[vmhub] ||
(enforce_isolation && (vmhub == AMDGPU_GFXHUB(0)));
}

int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
unsigned vmhub)
{
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,

bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
struct amdgpu_vmid *id);
bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub);
int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
unsigned vmhub);
void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
Expand Down
21 changes: 11 additions & 10 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
int r;

if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
DRM_INFO("%s - device unplugged skipping recovery on scheduler:%s",
dev_info(adev->dev, "%s - device unplugged skipping recovery on scheduler:%s",
__func__, s_job->sched->name);

/* Effectively the job is aborted as the device is gone */
Expand All @@ -53,19 +53,20 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)

if (amdgpu_gpu_recovery &&
amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
DRM_ERROR("ring %s timeout, but soft recovered\n",
s_job->sched->name);
dev_err(adev->dev, "ring %s timeout, but soft recovered\n",
s_job->sched->name);
goto exit;
}

DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
ring->fence_drv.sync_seq);
dev_err(adev->dev, "ring %s timeout, signaled seq=%u, emitted seq=%u\n",
job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
ring->fence_drv.sync_seq);

ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid);
if (ti) {
DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
ti->process_name, ti->tgid, ti->task_name, ti->pid);
dev_err(adev->dev,
"Process information: process %s pid %d thread %s pid %d\n",
ti->process_name, ti->tgid, ti->task_name, ti->pid);
amdgpu_vm_put_task_info(ti);
}

Expand All @@ -82,7 +83,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)

r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context);
if (r)
DRM_ERROR("GPU Recovery Failed: %d\n", r);
dev_err(adev->dev, "GPU Recovery Failed: %d\n", r);
} else {
drm_sched_suspend_timeout(&ring->sched);
if (amdgpu_sriov_vf(adev))
Expand Down Expand Up @@ -274,7 +275,7 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
while (!fence && job->vm && !job->vmid) {
r = amdgpu_vmid_grab(job->vm, ring, job, &fence);
if (r) {
DRM_ERROR("Error getting VM ID (%d)\n", r);
dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r);
goto error;
}
}
Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry,
struct ras_query_context *qctx)
{
u64 event_id = qctx->event_id;
u64 event_id = qctx ? qctx->evid.event_id : RAS_EVENT_INVALID_ID;

RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n");
RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].STATUS=0x%016llx\n",
Expand Down Expand Up @@ -543,7 +543,7 @@ static int mca_dump_show(struct seq_file *m, enum amdgpu_mca_error_type type)

amdgpu_mca_bank_set_init(&mca_set);

qctx.event_id = 0ULL;
qctx.evid.event_id = RAS_EVENT_INVALID_ID;
ret = amdgpu_mca_smu_get_mca_set(adev, type, &mca_set, &qctx);
if (ret)
goto err_free_mca_set;
Expand Down
Loading

0 comments on commit 478a527

Please sign in to comment.