Skip to content

Commit

Permalink
drm/amd/pm: add send bad channel info function
Browse files Browse the repository at this point in the history
support message SMU update bad channel info to update HBM bad channel
info in OOB table

Change-Id: I1e50ed8118f4c1aaefb04c040e59ae4918cdc295
Signed-off-by: Stanley.Yang <Stanley.Yang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
  • Loading branch information
Stanley.Yang authored and Stanley.Yang committed Mar 21, 2022
1 parent 3201cb8 commit 9338d13
Showing 7 changed files with 77 additions and 2 deletions.
12 changes: 12 additions & 0 deletions drivers/gpu/drm/amd/pm/amdgpu_dpm.c
Original file line number Diff line number Diff line change
@@ -507,6 +507,18 @@ int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size)
return ret;
}

int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t size)
{
struct smu_context *smu = adev->powerplay.pp_handle;
int ret = 0;

mutex_lock(&adev->pm.mutex);
ret = smu_send_hbm_bad_channel_flag(smu, size);
mutex_unlock(&adev->pm.mutex);

return ret;
}

int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
enum pp_clock_type type,
uint32_t *min,
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
Original file line number Diff line number Diff line change
@@ -412,6 +412,7 @@ void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable);
int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version);
int amdgpu_dpm_handle_passthrough_sbr(struct amdgpu_device *adev, bool enable);
int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size);
int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t size);
int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
enum pp_clock_type type,
uint32_t *min,
10 changes: 10 additions & 0 deletions drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
Original file line number Diff line number Diff line change
@@ -3048,3 +3048,13 @@ int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t size)

return ret;
}

int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size)
{
int ret = 0;

if (smu->ppt_funcs && smu->ppt_funcs->send_hbm_bad_channel_flag)
ret = smu->ppt_funcs->send_hbm_bad_channel_flag(smu, size);

return ret;
}
7 changes: 7 additions & 0 deletions drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
Original file line number Diff line number Diff line change
@@ -1292,6 +1292,12 @@ struct pptable_funcs {
* @set_config_table: Apply the input DriverSmuConfig table settings.
*/
int (*set_config_table)(struct smu_context *smu, struct config_table_setting *table);

/**
* @sned_hbm_bad_channel_flag: message SMU to update bad channel info
* of SMUBUS table.
*/
int (*send_hbm_bad_channel_flag)(struct smu_context *smu, uint32_t size);
};

typedef enum {
@@ -1428,5 +1434,6 @@ int smu_get_ecc_info(struct smu_context *smu, void *umc_ecc);
int smu_stb_collect_info(struct smu_context *smu, void *buff, uint32_t size);
void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device *adev);
int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t size);
int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size);
#endif
#endif
3 changes: 2 additions & 1 deletion drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/aldebaran_ppsmc.h
Original file line number Diff line number Diff line change
@@ -103,7 +103,8 @@
#define PPSMC_MSG_GfxDriverResetRecovery 0x42
#define PPSMC_MSG_BoardPowerCalibration 0x43
#define PPSMC_MSG_HeavySBR 0x45
#define PPSMC_Message_Count 0x46
#define PPSMC_MSG_SetBadHBMPagesRetiredFlagsPerChannel 0x46
#define PPSMC_Message_Count 0x47


//PPSMC Reset Types
3 changes: 2 additions & 1 deletion drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
Original file line number Diff line number Diff line change
@@ -230,7 +230,8 @@
__SMU_DUMMY_MAP(RequestGfxclk), \
__SMU_DUMMY_MAP(ForceGfxVid), \
__SMU_DUMMY_MAP(UnforceGfxVid), \
__SMU_DUMMY_MAP(HeavySBR),
__SMU_DUMMY_MAP(HeavySBR), \
__SMU_DUMMY_MAP(SetBadHBMPagesRetiredFlagsPerChannel),

#undef __SMU_DUMMY_MAP
#define __SMU_DUMMY_MAP(type) SMU_MSG_##type
43 changes: 43 additions & 0 deletions drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
Original file line number Diff line number Diff line change
@@ -82,6 +82,12 @@
*/
#define SUPPORT_ECCTABLE_SMU_VERSION 0x00442a00

/*
* SMU support BAD CHENNEL info MSG since version 68.51.00,
* use this to check ECCTALE feature whether support
*/
#define SUPPORT_BAD_CHANNEL_INFO_MSG_VERSION 0x00443300

static const struct smu_temperature_range smu13_thermal_policy[] =
{
{-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
@@ -140,6 +146,7 @@ static const struct cmn2asic_msg_mapping aldebaran_message_map[SMU_MSG_MAX_COUNT
MSG_MAP(GfxDriverResetRecovery, PPSMC_MSG_GfxDriverResetRecovery, 0),
MSG_MAP(BoardPowerCalibration, PPSMC_MSG_BoardPowerCalibration, 0),
MSG_MAP(HeavySBR, PPSMC_MSG_HeavySBR, 0),
MSG_MAP(SetBadHBMPagesRetiredFlagsPerChannel, PPSMC_MSG_SetBadHBMPagesRetiredFlagsPerChannel, 0),
};

static const struct cmn2asic_mapping aldebaran_clk_map[SMU_CLK_COUNT] = {
@@ -1997,6 +2004,41 @@ static int aldebaran_smu_send_hbm_bad_page_num(struct smu_context *smu,
return ret;
}

static int aldebaran_check_bad_channel_info_support(struct smu_context *smu)
{
uint32_t if_version = 0xff, smu_version = 0xff;
int ret = 0;

ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version);
if (ret) {
/* return not support if failed get smu_version */
ret = -EOPNOTSUPP;
}

if (smu_version < SUPPORT_BAD_CHANNEL_INFO_MSG_VERSION)
ret = -EOPNOTSUPP;

return ret;
}

static int aldebaran_send_hbm_bad_channel_flag(struct smu_context *smu,
uint32_t size)
{
int ret = 0;

ret = aldebaran_check_bad_channel_info_support(smu);
if (ret)
return ret;

/* message SMU to update the bad channel info on SMUBUS */
ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetBadHBMPagesRetiredFlagsPerChannel, size, NULL);
if (ret)
dev_err(smu->adev->dev, "[%s] failed to message SMU to update HBM bad channel info\n",
__func__);

return ret;
}

static const struct pptable_funcs aldebaran_ppt_funcs = {
/* init dpm */
.get_allowed_feature_mask = aldebaran_get_allowed_feature_mask,
@@ -2062,6 +2104,7 @@ static const struct pptable_funcs aldebaran_ppt_funcs = {
.i2c_fini = aldebaran_i2c_control_fini,
.send_hbm_bad_pages_num = aldebaran_smu_send_hbm_bad_page_num,
.get_ecc_info = aldebaran_get_ecc_info,
.send_hbm_bad_channel_flag = aldebaran_send_hbm_bad_channel_flag,
};

void aldebaran_set_ppt_funcs(struct smu_context *smu)

0 comments on commit 9338d13

Please sign in to comment.