Skip to content

Commit

Permalink
drm/amdgpu: Optimize the enablement of GECC
Browse files Browse the repository at this point in the history
Enable GECC only when the default memory ECC mode or
the module parameter amdgpu_ras_enable is activated.

v2: Add kernel message to remind users explicitly set
    amdgpu_ras_enable=1 before driver loading to enable GECC
    and set amdgpu_ras_enable=0 to disable GECC when GECC is
    currently enabled if needed.

Signed-off-by: Candice Li <candice.li@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Candice Li authored and Alex Deucher committed Feb 17, 2025
1 parent 92d5d2a commit 76b1f8b
Showing 3 changed files with 52 additions and 32 deletions.
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu.h
Original file line number Diff line number Diff line change
@@ -1154,6 +1154,7 @@ struct amdgpu_device {
struct ratelimit_state throttling_logging_rs;
uint32_t ras_hw_enabled;
uint32_t ras_enabled;
bool ras_default_ecc_enabled;

bool no_hw_access;
struct pci_saved_state *pci_state;
18 changes: 12 additions & 6 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
Original file line number Diff line number Diff line change
@@ -549,9 +549,10 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
u16 data_offset, size;
union umc_info *umc_info;
u8 frev, crev;
bool ecc_default_enabled = false;
bool mem_ecc_enabled = false;
u8 umc_config;
u32 umc_config1;
adev->ras_default_ecc_enabled = false;

index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
umc_info);
@@ -563,20 +564,22 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
switch (crev) {
case 1:
umc_config = le32_to_cpu(umc_info->v31.umc_config);
ecc_default_enabled =
mem_ecc_enabled =
(umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
break;
case 2:
umc_config = le32_to_cpu(umc_info->v32.umc_config);
ecc_default_enabled =
mem_ecc_enabled =
(umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
break;
case 3:
umc_config = le32_to_cpu(umc_info->v33.umc_config);
umc_config1 = le32_to_cpu(umc_info->v33.umc_config1);
ecc_default_enabled =
mem_ecc_enabled =
((umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ||
(umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE)) ? true : false;
adev->ras_default_ecc_enabled =
(umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
break;
default:
/* unsupported crev */
@@ -585,9 +588,12 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
} else if (frev == 4) {
switch (crev) {
case 0:
umc_config = le32_to_cpu(umc_info->v40.umc_config);
umc_config1 = le32_to_cpu(umc_info->v40.umc_config1);
ecc_default_enabled =
mem_ecc_enabled =
(umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE) ? true : false;
adev->ras_default_ecc_enabled =
(umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
break;
default:
/* unsupported crev */
@@ -599,7 +605,7 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
}
}

return ecc_default_enabled;
return mem_ecc_enabled;
}

/*
65 changes: 39 additions & 26 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
Original file line number Diff line number Diff line change
@@ -1794,34 +1794,47 @@ int psp_ras_initialize(struct psp_context *psp)
if (ret)
dev_warn(adev->dev, "PSP get boot config failed\n");

if (!amdgpu_ras_is_supported(psp->adev, AMDGPU_RAS_BLOCK__UMC)) {
if (!boot_cfg) {
dev_info(adev->dev, "GECC is disabled\n");
} else {
/* disable GECC in next boot cycle if ras is
* disabled by module parameter amdgpu_ras_enable
* and/or amdgpu_ras_mask, or boot_config_get call
* is failed
*/
ret = psp_boot_config_set(adev, 0);
if (ret)
dev_warn(adev->dev, "PSP set boot config failed\n");
else
dev_warn(adev->dev, "GECC will be disabled in next boot cycle if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n");
}
if (boot_cfg == 1 && !adev->ras_default_ecc_enabled &&
amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) {
dev_warn(adev->dev, "GECC is currently enabled, which may affect performance\n");
dev_warn(adev->dev,
"To disable GECC, please reboot the system and load the amdgpu driver with the parameter amdgpu_ras_enable=0\n");
} else {
if (boot_cfg == 1) {
dev_info(adev->dev, "GECC is enabled\n");
if ((adev->ras_default_ecc_enabled || amdgpu_ras_enable == 1) &&
amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) {
if (boot_cfg == 1) {
dev_info(adev->dev, "GECC is enabled\n");
} else {
/* enable GECC in next boot cycle if it is disabled
* in boot config, or force enable GECC if failed to
* get boot configuration
*/
ret = psp_boot_config_set(adev, BOOT_CONFIG_GECC);
if (ret)
dev_warn(adev->dev, "PSP set boot config failed\n");
else
dev_warn(adev->dev, "GECC will be enabled in next boot cycle\n");
}
} else {
/* enable GECC in next boot cycle if it is disabled
* in boot config, or force enable GECC if failed to
* get boot configuration
*/
ret = psp_boot_config_set(adev, BOOT_CONFIG_GECC);
if (ret)
dev_warn(adev->dev, "PSP set boot config failed\n");
else
dev_warn(adev->dev, "GECC will be enabled in next boot cycle\n");
if (!boot_cfg) {
if (!adev->ras_default_ecc_enabled &&
amdgpu_ras_enable != 1 &&
amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
dev_warn(adev->dev, "GECC is disabled, set amdgpu_ras_enable=1 to enable GECC in next boot cycle if needed\n");
else
dev_info(adev->dev, "GECC is disabled\n");
} else {
/* disable GECC in next boot cycle if ras is
* disabled by module parameter amdgpu_ras_enable
* and/or amdgpu_ras_mask, or boot_config_get call
* is failed
*/
ret = psp_boot_config_set(adev, 0);
if (ret)
dev_warn(adev->dev, "PSP set boot config failed\n");
else
dev_warn(adev->dev, "GECC will be disabled in next boot cycle if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n");
}
}
}
}

0 comments on commit 76b1f8b

Please sign in to comment.