From 56316ee91bcefaf535dbe8ba601cf5f14051d09a Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Sun, 26 Jan 2025 16:32:57 +0800 Subject: [PATCH] drm/amdgpu: Include ACA error type in aca bank ACA error types managed by driver a direct 1:1 correspondence with those managed by firmware. To address this, for each ACA bank, include both the ACA error type and the ACA SMU type. This addition is useful for creating CPER records. Signed-off-by: Hawking Zhang Reviewed-by: Yang Wang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 2 ++ drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 2 ++ drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 2 ++ drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 2 ++ drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 2 ++ 9 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 9d6345146495f..1a26b8ad14cb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -168,7 +168,7 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_ if (ret) return ret; - bank.type = type; + bank.smu_err_type = type; aca_smu_bank_dump(adev, i, count, &bank, qctx); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index f3289d2899130..3cd0115b02448 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -108,13 +108,15 @@ enum aca_error_type { }; enum aca_smu_type { + ACA_SMU_TYPE_INVALID = -1, ACA_SMU_TYPE_UE = 0, ACA_SMU_TYPE_CE, ACA_SMU_TYPE_COUNT, }; struct aca_bank { - enum aca_smu_type type; + enum aca_error_type aca_err_type; + enum aca_smu_type smu_err_type; u64 regs[ACA_MAX_REGS_COUNT]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 74b4349e345a6..c98b6b35cfdfd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1123,10 +1123,12 @@ static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_ban if (ext_error_code != 0 && ext_error_code != 9) count = 0ULL; + bank->aca_err_type = ACA_ERROR_TYPE_UE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, count); break; case ACA_SMU_TYPE_CE: count = ext_error_code == 6 ? count : 0ULL; + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, count); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index f4635fc8a7ca4..f5b1d2edf8054 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -891,10 +891,12 @@ static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle, switch (type) { case ACA_SMU_TYPE_UE: + bank->aca_err_type = ACA_ERROR_TYPE_UE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, 1ULL); break; case ACA_SMU_TYPE_CE: + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, ACA_REG__MISC0__ERRCNT(misc0)); break; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 574083c7378e8..c67ba961de91a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -1291,10 +1291,12 @@ static int jpeg_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_ban misc0 = bank->regs[ACA_REG_IDX_MISC0]; switch (type) { case ACA_SMU_TYPE_UE: + bank->aca_err_type = ACA_ERROR_TYPE_UE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, 1ULL); break; case ACA_SMU_TYPE_CE: + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, ACA_REG__MISC0__ERRCNT(misc0)); break; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index ce013a715b864..58d22f0d5a68f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -746,10 +746,12 @@ static int mmhub_v1_8_aca_bank_parser(struct aca_handle *handle, struct aca_bank misc0 = bank->regs[ACA_REG_IDX_MISC0]; switch (type) { case ACA_SMU_TYPE_UE: + bank->aca_err_type = ACA_ERROR_TYPE_UE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, 1ULL); break; case ACA_SMU_TYPE_CE: + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, ACA_REG__MISC0__ERRCNT(misc0)); break; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 5e0066cd6c515..3dc0ffa81484c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -2392,10 +2392,12 @@ static int sdma_v4_4_2_aca_bank_parser(struct aca_handle *handle, struct aca_ban misc0 = bank->regs[ACA_REG_IDX_MISC0]; switch (type) { case ACA_SMU_TYPE_UE: + bank->aca_err_type = ACA_ERROR_TYPE_UE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, 1ULL); break; case ACA_SMU_TYPE_CE: + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, ACA_REG__MISC0__ERRCNT(misc0)); break; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index a7b9c358a2d4c..74f57b2d30a52 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -415,6 +415,7 @@ static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank err_type = ACA_ERROR_TYPE_CE; else return 0; + bank->aca_err_type = err_type; ret = aca_bank_info_decode(bank, &info); if (ret) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index f1f07badb9b7f..75211366f8f66 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1925,10 +1925,12 @@ static int vcn_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank misc0 = bank->regs[ACA_REG_IDX_MISC0]; switch (type) { case ACA_SMU_TYPE_UE: + bank->aca_err_type = ACA_ERROR_TYPE_UE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, 1ULL); break; case ACA_SMU_TYPE_CE: + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, ACA_REG__MISC0__ERRCNT(misc0)); break;