Skip to content

Commit

Permalink
drm/amdgpu: add RAS poison consumption handler for SRIOV
Browse files Browse the repository at this point in the history
Send message to PF if VF receives RAS poison consumption interrupt.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Tao Zhou authored and Alex Deucher committed Dec 15, 2022
1 parent ae844dd commit e643823
Showing 1 changed file with 26 additions and 18 deletions.
44 changes: 26 additions & 18 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
Original file line number Diff line number Diff line change
Expand Up @@ -169,25 +169,33 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset)
{
int ret = AMDGPU_RAS_SUCCESS;

if (!adev->gmc.xgmi.connected_to_cpu) {
struct ras_err_data err_data = {0, 0, 0, NULL};
struct ras_common_if head = {
.block = AMDGPU_RAS_BLOCK__UMC,
};
struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);

ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);

if (ret == AMDGPU_RAS_SUCCESS && obj) {
obj->err_data.ue_count += err_data.ue_count;
obj->err_data.ce_count += err_data.ce_count;
if (!amdgpu_sriov_vf(adev)) {
if (!adev->gmc.xgmi.connected_to_cpu) {
struct ras_err_data err_data = {0, 0, 0, NULL};
struct ras_common_if head = {
.block = AMDGPU_RAS_BLOCK__UMC,
};
struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);

ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);

if (ret == AMDGPU_RAS_SUCCESS && obj) {
obj->err_data.ue_count += err_data.ue_count;
obj->err_data.ce_count += err_data.ce_count;
}
} else if (reset) {
/* MCA poison handler is only responsible for GPU reset,
* let MCA notifier do page retirement.
*/
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
amdgpu_ras_reset_gpu(adev);
}
} else if (reset) {
/* MCA poison handler is only responsible for GPU reset,
* let MCA notifier do page retirement.
*/
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
amdgpu_ras_reset_gpu(adev);
} else {
if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
adev->virt.ops->ras_poison_handler(adev);
else
dev_warn(adev->dev,
"No ras_poison_handler interface in SRIOV!\n");
}

return ret;
Expand Down

0 comments on commit e643823

Please sign in to comment.