Skip to content

Commit

Permalink
drm/amdgpu: implement smu send rma reason for smu v13.0.6
Browse files Browse the repository at this point in the history
implement smu send rma reason function for smu v13.0.6

Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Yang Wang authored and Alex Deucher committed Feb 12, 2024
1 parent 53edf77 commit e3bfb8d
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 2 deletions.
15 changes: 15 additions & 0 deletions drivers/gpu/drm/amd/pm/amdgpu_dpm.c
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,21 @@ int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t si
return ret;
}

int amdgpu_dpm_send_rma_reason(struct amdgpu_device *adev)
{
struct smu_context *smu = adev->powerplay.pp_handle;
int ret;

if (!is_support_sw_smu(adev))
return -EOPNOTSUPP;

mutex_lock(&adev->pm.mutex);
ret = smu_send_rma_reason(smu);
mutex_unlock(&adev->pm.mutex);

return ret;
}

int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
enum pp_clock_type type,
uint32_t *min,
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_versio
int amdgpu_dpm_handle_passthrough_sbr(struct amdgpu_device *adev, bool enable);
int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size);
int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t size);
int amdgpu_dpm_send_rma_reason(struct amdgpu_device *adev);
int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
enum pp_clock_type type,
uint32_t *min,
Expand Down
10 changes: 10 additions & 0 deletions drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
Original file line number Diff line number Diff line change
Expand Up @@ -3679,3 +3679,13 @@ int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size)

return ret;
}

int smu_send_rma_reason(struct smu_context *smu)
{
int ret = 0;

if (smu->ppt_funcs && smu->ppt_funcs->send_rma_reason)
ret = smu->ppt_funcs->send_rma_reason(smu);

return ret;
}
6 changes: 6 additions & 0 deletions drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
Original file line number Diff line number Diff line change
Expand Up @@ -1341,6 +1341,11 @@ struct pptable_funcs {
*/
int (*send_hbm_bad_pages_num)(struct smu_context *smu, uint32_t size);

/**
* @send_rma_reason: message rma reason event to SMU.
*/
int (*send_rma_reason)(struct smu_context *smu);

/**
* @get_ecc_table: message SMU to get ECC INFO table.
*/
Expand Down Expand Up @@ -1588,5 +1593,6 @@ int smu_stb_collect_info(struct smu_context *smu, void *buff, uint32_t size);
void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device *adev);
int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t size);
int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size);
int smu_send_rma_reason(struct smu_context *smu);
#endif
#endif
3 changes: 2 additions & 1 deletion drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@
#define PPSMC_MSG_QueryValidMcaCeCount 0x3A
#define PPSMC_MSG_McaBankCeDumpDW 0x3B
#define PPSMC_MSG_SelectPLPDMode 0x40
#define PPSMC_Message_Count 0x41
#define PPSMC_MSG_RmaDueToBadPageThreshold 0x43
#define PPSMC_Message_Count 0x44

//PPSMC Reset Types for driver msg argument
#define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x1
Expand Down
3 changes: 2 additions & 1 deletion drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,8 @@
__SMU_DUMMY_MAP(SetSoftMaxVpe), \
__SMU_DUMMY_MAP(SetSoftMinVpe), \
__SMU_DUMMY_MAP(GetMetricsVersion), \
__SMU_DUMMY_MAP(EnableUCLKShadow),
__SMU_DUMMY_MAP(EnableUCLKShadow), \
__SMU_DUMMY_MAP(RmaDueToBadPageThreshold),

#undef __SMU_DUMMY_MAP
#define __SMU_DUMMY_MAP(type) SMU_MSG_##type
Expand Down
20 changes: 20 additions & 0 deletions drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU
MSG_MAP(McaBankDumpDW, PPSMC_MSG_McaBankDumpDW, 0),
MSG_MAP(McaBankCeDumpDW, PPSMC_MSG_McaBankCeDumpDW, 0),
MSG_MAP(SelectPLPDMode, PPSMC_MSG_SelectPLPDMode, 0),
MSG_MAP(RmaDueToBadPageThreshold, PPSMC_MSG_RmaDueToBadPageThreshold, 0),
};

// clang-format on
Expand Down Expand Up @@ -2381,6 +2382,24 @@ static int smu_v13_0_6_smu_send_hbm_bad_page_num(struct smu_context *smu,
return ret;
}

static int smu_v13_0_6_send_rma_reason(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
int ret;

/* NOTE: the message is only valid on dGPU with pmfw 85.90.0 and above */
if ((adev->flags & AMD_IS_APU) || smu->smc_fw_version < 0x00555a00)
return 0;

ret = smu_cmn_send_smc_msg(smu, SMU_MSG_RmaDueToBadPageThreshold, NULL);
if (ret)
dev_err(smu->adev->dev,
"[%s] failed to send BadPageThreshold event to SMU\n",
__func__);

return ret;
}

static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
{
struct smu_context *smu = adev->powerplay.pp_handle;
Expand Down Expand Up @@ -3095,6 +3114,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
.i2c_init = smu_v13_0_6_i2c_control_init,
.i2c_fini = smu_v13_0_6_i2c_control_fini,
.send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num,
.send_rma_reason = smu_v13_0_6_send_rma_reason,
};

void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu)
Expand Down

0 comments on commit e3bfb8d

Please sign in to comment.