Skip to content

Commit

Permalink
drm/amdgpu: Parse all deferred errors with UMC aca handle
Browse files Browse the repository at this point in the history
We should only increase the deferred errors in UMC block.

Signed-off-by: Xiang Liu <xiang.liu@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Xiang Liu authored and Alex Deucher committed Mar 26, 2025
1 parent cc11dff commit aedc92b
Show file tree
Hide file tree
Showing 9 changed files with 14 additions and 19 deletions.
4 changes: 4 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,10 @@ static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
{
const struct aca_bank_ops *bank_ops = handle->bank_ops;

/* Parse all deferred errors with UMC aca handle */
if (ACA_BANK_ERR_IS_DEFFERED(bank))
return handle->hwip == ACA_HWIP_TYPE_UMC;

if (!aca_bank_hwip_is_matched(bank, handle->hwip))
return false;

Expand Down
8 changes: 0 additions & 8 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,6 @@ struct ras_query_context;
(ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \
ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS]))

#define ACA_BANK_ERR_CE_DE_DECODE(bank) \
(ACA_BANK_ERR_IS_DEFFERED(bank) ? ACA_ERROR_TYPE_DEFERRED : \
ACA_ERROR_TYPE_CE)

#define ACA_BANK_ERR_UE_DE_DECODE(bank) \
(ACA_BANK_ERR_IS_DEFFERED(bank) ? ACA_ERROR_TYPE_DEFERRED : \
ACA_ERROR_TYPE_UE)

enum aca_reg_idx {
ACA_REG_IDX_CTL = 0,
ACA_REG_IDX_STATUS = 1,
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
Original file line number Diff line number Diff line change
Expand Up @@ -1172,7 +1172,7 @@ static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_ban
break;
case ACA_SMU_TYPE_CE:
count = ext_error_code == 6 ? count : 0ULL;
bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank);
bank->aca_err_type = ACA_ERROR_TYPE_CE;
ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, count);
break;
default:
Expand Down
8 changes: 3 additions & 5 deletions drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
Original file line number Diff line number Diff line change
Expand Up @@ -867,15 +867,13 @@ static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle,

switch (type) {
case ACA_SMU_TYPE_UE:
bank->aca_err_type = ACA_BANK_ERR_UE_DE_DECODE(bank);
bank->aca_err_type = ACA_ERROR_TYPE_UE;
ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, 1ULL);
break;
case ACA_SMU_TYPE_CE:
bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank);
bank->aca_err_type = ACA_ERROR_TYPE_CE;
ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
(bank->aca_err_type == ACA_ERROR_TYPE_CE) ?
ACA_REG__MISC0__ERRCNT(misc0) :
1);
ACA_REG__MISC0__ERRCNT(misc0));
break;
default:
return -EINVAL;
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
Original file line number Diff line number Diff line change
Expand Up @@ -1328,7 +1328,7 @@ static int jpeg_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_ban
1ULL);
break;
case ACA_SMU_TYPE_CE:
bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank);
bank->aca_err_type = ACA_ERROR_TYPE_CE;
ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
ACA_REG__MISC0__ERRCNT(misc0));
break;
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,7 @@ static int mmhub_v1_8_aca_bank_parser(struct aca_handle *handle, struct aca_bank
1ULL);
break;
case ACA_SMU_TYPE_CE:
bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank);
bank->aca_err_type = ACA_ERROR_TYPE_CE;
ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
ACA_REG__MISC0__ERRCNT(misc0));
break;
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
Original file line number Diff line number Diff line change
Expand Up @@ -2595,7 +2595,7 @@ static int sdma_v4_4_2_aca_bank_parser(struct aca_handle *handle, struct aca_ban
1ULL);
break;
case ACA_SMU_TYPE_CE:
bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank);
bank->aca_err_type = ACA_ERROR_TYPE_CE;
ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
ACA_REG__MISC0__ERRCNT(misc0));
break;
Expand Down
3 changes: 2 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_sta

return (amdgpu_ras_is_poison_mode_supported(adev) &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1));
((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) ||
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Poison) == 1)));
}

bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
Original file line number Diff line number Diff line change
Expand Up @@ -1965,7 +1965,7 @@ static int vcn_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank
1ULL);
break;
case ACA_SMU_TYPE_CE:
bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank);
bank->aca_err_type = ACA_ERROR_TYPE_CE;
ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
ACA_REG__MISC0__ERRCNT(misc0));
break;
Expand Down

0 comments on commit aedc92b

Please sign in to comment.