Skip to content

Commit

Permalink
drm/amdgpu: add function to clear MMEA error status for aldebaran
Browse files Browse the repository at this point in the history
For aldebaran, hardware will not clear error status automatically when
reading error status register, insteadly driver should set clear bit of
the error status register explicitly to clear error status.

Signed-off-by: Dennis Li <Dennis.Li@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Dennis Li authored and Alex Deucher committed May 10, 2021
1 parent 4f64f1c commit 7780f50
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 0 deletions.
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ struct amdgpu_mmhub_ras_funcs {
void *ras_error_status);
void (*query_ras_error_status)(struct amdgpu_device *adev);
void (*reset_ras_error_count)(struct amdgpu_device *adev);
void (*reset_ras_error_status)(struct amdgpu_device *adev);
};

struct amdgpu_mmhub_funcs {
Expand Down
4 changes: 4 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
Original file line number Diff line number Diff line change
Expand Up @@ -938,6 +938,10 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
if (adev->mmhub.ras_funcs &&
adev->mmhub.ras_funcs->reset_ras_error_count)
adev->mmhub.ras_funcs->reset_ras_error_count(adev);

if (adev->mmhub.ras_funcs &&
adev->mmhub.ras_funcs->reset_ras_error_status)
adev->mmhub.ras_funcs->reset_ras_error_status(adev);
break;
case AMDGPU_RAS_BLOCK__SDMA:
if (adev->sdma.funcs->reset_ras_error_count)
Expand Down
19 changes: 19 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
Original file line number Diff line number Diff line change
Expand Up @@ -1315,12 +1315,31 @@ static void mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev)
}
}

static void mmhub_v1_7_reset_ras_error_status(struct amdgpu_device *adev)
{
int i;
uint32_t reg_value;

if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB))
return;

for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) {
reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
mmhub_v1_7_ea_err_status_regs[i]));
reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS,
CLEAR_ERROR_STATUS, 0x01);
WREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_regs[i]),
reg_value);
}
}

const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs = {
.ras_late_init = amdgpu_mmhub_ras_late_init,
.ras_fini = amdgpu_mmhub_ras_fini,
.query_ras_error_count = mmhub_v1_7_query_ras_error_count,
.reset_ras_error_count = mmhub_v1_7_reset_ras_error_count,
.query_ras_error_status = mmhub_v1_7_query_ras_error_status,
.reset_ras_error_status = mmhub_v1_7_reset_ras_error_status,
};

const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs = {
Expand Down

0 comments on commit 7780f50

Please sign in to comment.