Skip to content

Commit

Permalink
Merge tag 'amd-drm-next-6.15-2025-03-14' of https://gitlab.freedeskto…
Browse files Browse the repository at this point in the history
…p.org/agd5f/linux into drm-next

amd-drm-next-6.15-2025-03-14:

amdgpu:
- GC 12.x DCC fixes
- VCN 2.5 fix
- Replay/PSR fixes
- HPD fixes
- DMUB fixes
- Backlight fixes
- DM suspend/resume cleanup
- Misc DC fixes
- HDCP UAF fix
- Misc code cleanups
- VCE 2.x fix
- Wedged event support
- GC 12.x PTE fixes
- Misc multimedia cap fixes
- Enable unique id support for GC 12.x
- XGMI code cleanup
- GC 11.x and 12.x MQD cleanups
- SMU 13.x updates
- SMU 14.x fan speed reporting
- Enable VCN activity reporting for additional chips
- SR-IOV fixes
- RAS fixes
- MES fixes

amdkfd:
- Dequeue wait count API cleanups
- Queue eviction cleanup fixes
- Retry fault fixes
- Dequeue retry timeout adjustments
- GC 12.x trap handler fixes
- GC 9.5.x updates

radeon:
- VCE command parser fix

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250314170618.3142042-1-alexander.deucher@amd.com
  • Loading branch information
Dave Airlie committed Mar 24, 2025
2 parents 0f04462 + eb6cdfb commit f72e21e
Show file tree
Hide file tree
Showing 109 changed files with 2,315 additions and 1,853 deletions.
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.set_address_watch = kgd_gfx_aldebaran_set_address_watch,
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
.build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
.hqd_reset = kgd_gfx_v9_hqd_reset,
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
.set_address_watch = kgd_gfx_v9_set_address_watch,
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
.build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
Original file line number Diff line number Diff line change
Expand Up @@ -541,8 +541,8 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings =
kgd_gfx_v9_program_trap_handler_settings,
.build_grace_period_packet_info =
kgd_gfx_v9_build_grace_period_packet_info,
.build_dequeue_wait_counts_packet_info =
kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.enable_debug_trap = kgd_aldebaran_enable_debug_trap,
.disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap,
Expand Down
28 changes: 14 additions & 14 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
Original file line number Diff line number Diff line change
Expand Up @@ -1021,25 +1021,25 @@ void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
*wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
}

void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t sch_wave,
uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data)
{
*reg_data = wait_times;

/*
* The CP cannont handle a 0 grace period input and will result in
* an infinite grace period being set so set to 1 to prevent this.
*/
if (grace_period == 0)
grace_period = 1;

*reg_data = REG_SET_FIELD(*reg_data,
CP_IQ_WAIT_TIME2,
SCH_WAVE,
grace_period);
if (sch_wave)
*reg_data = REG_SET_FIELD(*reg_data,
CP_IQ_WAIT_TIME2,
SCH_WAVE,
sch_wave);
if (que_sleep)
*reg_data = REG_SET_FIELD(*reg_data,
CP_IQ_WAIT_TIME2,
QUE_SLEEP,
que_sleep);

*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
}
Expand Down Expand Up @@ -1115,7 +1115,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.set_address_watch = kgd_gfx_v10_set_address_watch,
.clear_address_watch = kgd_gfx_v10_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
.build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
.program_trap_handler_settings = program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
.hqd_reset = kgd_gfx_v10_hqd_reset,
Expand Down
5 changes: 3 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,10 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
uint32_t *wait_times,
uint32_t inst);
void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t sch_wave,
uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data);
uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
Original file line number Diff line number Diff line change
Expand Up @@ -673,7 +673,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
.set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
.program_trap_handler_settings = program_trap_handler_settings_v10_3,
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
.build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
.enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
.disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
.validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
Expand Down
28 changes: 14 additions & 14 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
Original file line number Diff line number Diff line change
Expand Up @@ -1077,25 +1077,25 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
adev->gfx.cu_info.max_waves_per_simd;
}

void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t sch_wave,
uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data)
{
*reg_data = wait_times;

/*
* The CP cannot handle a 0 grace period input and will result in
* an infinite grace period being set so set to 1 to prevent this.
*/
if (grace_period == 0)
grace_period = 1;

*reg_data = REG_SET_FIELD(*reg_data,
CP_IQ_WAIT_TIME2,
SCH_WAVE,
grace_period);
if (sch_wave)
*reg_data = REG_SET_FIELD(*reg_data,
CP_IQ_WAIT_TIME2,
SCH_WAVE,
sch_wave);
if (que_sleep)
*reg_data = REG_SET_FIELD(*reg_data,
CP_IQ_WAIT_TIME2,
QUE_SLEEP,
que_sleep);

*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
}
Expand Down Expand Up @@ -1255,7 +1255,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
.set_address_watch = kgd_gfx_v9_set_address_watch,
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
.build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
Expand Down
5 changes: 3 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,10 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
uint32_t *wait_times,
uint32_t inst);
void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t sch_wave,
uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data);
uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1990,7 +1990,7 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val)
uint32_t max_freq, min_freq;
struct amdgpu_device *adev = (struct amdgpu_device *)data;

if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
if (amdgpu_sriov_multi_vf_mode(adev))
return -EINVAL;

ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
Expand Down
3 changes: 3 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -2757,6 +2757,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
if (!total)
return -ENODEV;

if (adev->gmc.xgmi.supported)
amdgpu_xgmi_early_init(adev);

ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
if (ip_block->status.valid != false)
amdgpu_amdkfd_device_probe(adev);
Expand Down
3 changes: 0 additions & 3 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
Original file line number Diff line number Diff line change
Expand Up @@ -2772,9 +2772,6 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
break;
}

if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(4, 8, 0))
adev->gmc.xgmi.supported = true;

if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 4, 0);
Expand Down
3 changes: 2 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,10 @@
* - 3.60.0 - Add AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE (Vulkan requirement)
* - 3.61.0 - Contains fix for RV/PCO compute queues
* - 3.62.0 - Add AMDGPU_IDS_FLAGS_MODE_PF, AMDGPU_IDS_FLAGS_MODE_VF & AMDGPU_IDS_FLAGS_MODE_PT
* - 3.63.0 - GFX12 display DCC supports 256B max compressed block size
*/
#define KMS_DRIVER_MAJOR 3
#define KMS_DRIVER_MINOR 62
#define KMS_DRIVER_MINOR 63
#define KMS_DRIVER_PATCHLEVEL 0

/*
Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
Original file line number Diff line number Diff line change
Expand Up @@ -2002,8 +2002,8 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work)
if (adev->kfd.init_complete) {
WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]);
WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]);
amdgpu_amdkfd_start_sched(adev, idx);
adev->gfx.kfd_sch_inactive[idx] = false;
amdgpu_amdkfd_start_sched(adev, idx);
adev->gfx.kfd_sch_inactive[idx] = false;
}
}
mutex_unlock(&adev->enforce_isolation_mutex);
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
if (amdgpu_ring_sched_ready(ring))
drm_sched_start(&ring->sched, 0);
dev_err(adev->dev, "Ring %s reset succeeded\n", ring->sched.name);
drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE);
goto exit;
}
dev_err(adev->dev, "Ring %s reset failure\n", ring->sched.name);
Expand Down
6 changes: 3 additions & 3 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,9 @@ struct amdgpu_mes {
const struct amdgpu_mes_funcs *funcs;

/* mes resource_1 bo*/
struct amdgpu_bo *resource_1;
uint64_t resource_1_gpu_addr;
void *resource_1_addr;
struct amdgpu_bo *resource_1[AMDGPU_MAX_MES_PIPES];
uint64_t resource_1_gpu_addr[AMDGPU_MAX_MES_PIPES];
void *resource_1_addr[AMDGPU_MAX_MES_PIPES];

};

Expand Down
30 changes: 25 additions & 5 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
Original file line number Diff line number Diff line change
Expand Up @@ -2836,6 +2836,13 @@ static int __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev,

save_nps = (bps[0].retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;

/*old asics just have pa in eeprom*/
if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) {
memcpy(err_data->err_addr, bps,
sizeof(struct eeprom_table_record) * adev->umc.retire_unit);
goto out;
}

for (i = 0; i < adev->umc.retire_unit; i++)
bps[i].retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);

Expand All @@ -2858,6 +2865,7 @@ static int __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev,
}
}

out:
return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr, adev->umc.retire_unit);
}

Expand Down Expand Up @@ -2981,14 +2989,24 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,

/* only new entries are saved */
if (save_count > 0) {
for (i = 0; i < unit_num; i++) {
/*old asics only save pa to eeprom like before*/
if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) {
if (amdgpu_ras_eeprom_append(control,
&data->bps[bad_page_num + i * adev->umc.retire_unit],
1)) {
&data->bps[bad_page_num], save_count)) {
dev_err(adev->dev, "Failed to save EEPROM table data!");
return -EIO;
}
} else {
for (i = 0; i < unit_num; i++) {
if (amdgpu_ras_eeprom_append(control,
&data->bps[bad_page_num +
i * adev->umc.retire_unit], 1)) {
dev_err(adev->dev, "Failed to save EEPROM table data!");
return -EIO;
}
}
}

dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count);
}

Expand Down Expand Up @@ -3767,9 +3785,11 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 :
adev->ras_hw_enabled & amdgpu_ras_mask;

/* aca is disabled by default except for psp v13_0_12 */
/* aca is disabled by default except for psp v13_0_6/v13_0_12/v13_0_14 */
adev->aca.is_enabled =
(amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12));
(amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14));

/* bad page feature is not applicable to specific app platform */
if (adev->gmc.is_app_apu &&
Expand Down
9 changes: 7 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
Original file line number Diff line number Diff line change
Expand Up @@ -727,9 +727,14 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control,
- control->ras_fri)
% control->ras_max_record_count;

control->ras_num_mca_recs += num;
control->ras_num_bad_pages += num * adev->umc.retire_unit;
/*old asics only save pa to eeprom like before*/
if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12)
control->ras_num_pa_recs += num;
else
control->ras_num_mca_recs += num;

control->ras_num_bad_pages = control->ras_num_pa_recs +
control->ras_num_mca_recs * adev->umc.retire_unit;
Out:
kfree(buf);
return res;
Expand Down
9 changes: 5 additions & 4 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
Original file line number Diff line number Diff line change
Expand Up @@ -614,10 +614,11 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
vf2pf_info->decode_usage = 0;

vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr;
vf2pf_info->mes_info_addr = (uint64_t)adev->mes.resource_1_gpu_addr;

if (adev->mes.resource_1) {
vf2pf_info->mes_info_size = adev->mes.resource_1->tbo.base.size;
if (amdgpu_sriov_is_mes_info_enable(adev)) {
vf2pf_info->mes_info_addr =
(uint64_t)(adev->mes.resource_1_gpu_addr[0] + AMDGPU_GPU_PAGE_SIZE);
vf2pf_info->mes_info_size =
adev->mes.resource_1[0]->tbo.base.size - AMDGPU_GPU_PAGE_SIZE;
}
vf2pf_info->checksum =
amd_sriov_msg_checksum(
Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,8 @@ static inline bool is_virtual_machine(void)

#define amdgpu_sriov_is_pp_one_vf(adev) \
((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
#define amdgpu_sriov_multi_vf_mode(adev) \
(amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
#define amdgpu_sriov_is_debug(adev) \
((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug)
#define amdgpu_sriov_is_normal(adev) \
Expand Down
4 changes: 3 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
Original file line number Diff line number Diff line change
Expand Up @@ -844,7 +844,9 @@ int amdgpu_xgmi_get_bandwidth(struct amdgpu_device *adev, struct amdgpu_device *
{
bool peer_mode = bw_mode == AMDGPU_XGMI_BW_MODE_PER_PEER;
int unit_scale = bw_unit == AMDGPU_XGMI_BW_UNIT_MBYTES ? 1000 : 1;
int speed = 25, num_lanes = 16, num_links = !peer_mode ? 1 : -1;
int num_lanes = adev->gmc.xgmi.max_width;
int speed = adev->gmc.xgmi.max_speed;
int num_links = !peer_mode ? 1 : -1;

if (!(min_bw && max_bw))
return -EINVAL;
Expand Down
Loading

0 comments on commit f72e21e

Please sign in to comment.