Skip to content

Commit

Permalink
drm/amdgpu: Include ACA error type in aca bank
Browse files Browse the repository at this point in the history
ACA error types managed by driver a direct 1:1
correspondence with those managed by firmware.

To address this, for each ACA bank, include
both the ACA error type and the ACA SMU type.

This addition is useful for creating CPER records.

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Yang Wang <keivnyang.wang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Hawking Zhang authored and Alex Deucher committed Feb 17, 2025
1 parent 76b1f8b commit 56316ee
Show file tree
Hide file tree
Showing 9 changed files with 17 additions and 2 deletions.
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_
if (ret)
return ret;

bank.type = type;
bank.smu_err_type = type;

aca_smu_bank_dump(adev, i, count, &bank, qctx);

Expand Down
4 changes: 3 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,15 @@ enum aca_error_type {
};

enum aca_smu_type {
ACA_SMU_TYPE_INVALID = -1,
ACA_SMU_TYPE_UE = 0,
ACA_SMU_TYPE_CE,
ACA_SMU_TYPE_COUNT,
};

struct aca_bank {
enum aca_smu_type type;
enum aca_error_type aca_err_type;
enum aca_smu_type smu_err_type;
u64 regs[ACA_MAX_REGS_COUNT];
};

Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
Original file line number Diff line number Diff line change
Expand Up @@ -1123,10 +1123,12 @@ static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_ban
if (ext_error_code != 0 && ext_error_code != 9)
count = 0ULL;

bank->aca_err_type = ACA_ERROR_TYPE_UE;
ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, count);
break;
case ACA_SMU_TYPE_CE:
count = ext_error_code == 6 ? count : 0ULL;
bank->aca_err_type = ACA_ERROR_TYPE_CE;
ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, count);
break;
default:
Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
Original file line number Diff line number Diff line change
Expand Up @@ -891,10 +891,12 @@ static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle,

switch (type) {
case ACA_SMU_TYPE_UE:
bank->aca_err_type = ACA_ERROR_TYPE_UE;
ret = aca_error_cache_log_bank_error(handle, &info,
ACA_ERROR_TYPE_UE, 1ULL);
break;
case ACA_SMU_TYPE_CE:
bank->aca_err_type = ACA_ERROR_TYPE_CE;
ret = aca_error_cache_log_bank_error(handle, &info,
ACA_ERROR_TYPE_CE, ACA_REG__MISC0__ERRCNT(misc0));
break;
Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
Original file line number Diff line number Diff line change
Expand Up @@ -1291,10 +1291,12 @@ static int jpeg_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_ban
misc0 = bank->regs[ACA_REG_IDX_MISC0];
switch (type) {
case ACA_SMU_TYPE_UE:
bank->aca_err_type = ACA_ERROR_TYPE_UE;
ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
1ULL);
break;
case ACA_SMU_TYPE_CE:
bank->aca_err_type = ACA_ERROR_TYPE_CE;
ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE,
ACA_REG__MISC0__ERRCNT(misc0));
break;
Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
Original file line number Diff line number Diff line change
Expand Up @@ -746,10 +746,12 @@ static int mmhub_v1_8_aca_bank_parser(struct aca_handle *handle, struct aca_bank
misc0 = bank->regs[ACA_REG_IDX_MISC0];
switch (type) {
case ACA_SMU_TYPE_UE:
bank->aca_err_type = ACA_ERROR_TYPE_UE;
ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
1ULL);
break;
case ACA_SMU_TYPE_CE:
bank->aca_err_type = ACA_ERROR_TYPE_CE;
ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE,
ACA_REG__MISC0__ERRCNT(misc0));
break;
Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
Original file line number Diff line number Diff line change
Expand Up @@ -2392,10 +2392,12 @@ static int sdma_v4_4_2_aca_bank_parser(struct aca_handle *handle, struct aca_ban
misc0 = bank->regs[ACA_REG_IDX_MISC0];
switch (type) {
case ACA_SMU_TYPE_UE:
bank->aca_err_type = ACA_ERROR_TYPE_UE;
ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
1ULL);
break;
case ACA_SMU_TYPE_CE:
bank->aca_err_type = ACA_ERROR_TYPE_CE;
ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE,
ACA_REG__MISC0__ERRCNT(misc0));
break;
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,7 @@ static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank
err_type = ACA_ERROR_TYPE_CE;
else
return 0;
bank->aca_err_type = err_type;

ret = aca_bank_info_decode(bank, &info);
if (ret)
Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
Original file line number Diff line number Diff line change
Expand Up @@ -1925,10 +1925,12 @@ static int vcn_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank
misc0 = bank->regs[ACA_REG_IDX_MISC0];
switch (type) {
case ACA_SMU_TYPE_UE:
bank->aca_err_type = ACA_ERROR_TYPE_UE;
ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
1ULL);
break;
case ACA_SMU_TYPE_CE:
bank->aca_err_type = ACA_ERROR_TYPE_CE;
ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE,
ACA_REG__MISC0__ERRCNT(misc0));
break;
Expand Down

0 comments on commit 56316ee

Please sign in to comment.