Skip to content

Commit

Permalink
drm/amdgpu: bypass some cleanup work after err_event_athub (v2)
Browse files Browse the repository at this point in the history
PSP lost connection when err_event_athub occurs. These cleanup work can be
skipped in BACO reset.

v2: squash in missing include (Alex)

Signed-off-by: Le Ma <le.ma@amd.com>
Reviewed-by: Hawking Zhang <hawking.zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Le Ma authored and Alex Deucher committed Oct 30, 2019
1 parent 8baaadb commit bff77e8
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 11 deletions.
6 changes: 6 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -2271,6 +2271,12 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
/* displays are handled in phase1 */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
continue;
/* PSP lost connection when err_event_athub occurs */
if (amdgpu_ras_intr_triggered() &&
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
adev->ip_blocks[i].status.hw = false;
continue;
}
/* XXX handle errors */
r = adev->ip_blocks[i].version->funcs->suspend(adev);
/* XXX handle errors */
Expand Down
9 changes: 9 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
#include "psp_v11_0.h"
#include "psp_v12_0.h"

#include "amdgpu_ras.h"

static void psp_set_funcs(struct amdgpu_device *adev);

static int psp_early_init(void *handle)
Expand Down Expand Up @@ -167,6 +169,13 @@ psp_cmd_submit_buf(struct psp_context *psp,
while (*((unsigned int *)psp->fence_buf) != index) {
if (--timeout == 0)
break;
/*
* Shouldn't wait for timeout when err_event_athub occurs,
* because gpu reset thread triggered and lock resource should
* be released for psp resume sequence.
*/
if (amdgpu_ras_intr_triggered())
break;
msleep(1);
amdgpu_asic_invalidate_hdp(psp->adev, NULL);
}
Expand Down
20 changes: 11 additions & 9 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
Original file line number Diff line number Diff line change
Expand Up @@ -558,15 +558,17 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head)))
return 0;

ret = psp_ras_enable_features(&adev->psp, &info, enable);
if (ret) {
DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n",
enable ? "enable":"disable",
ras_block_str(head->block),
ret);
if (ret == TA_RAS_STATUS__RESET_NEEDED)
return -EAGAIN;
return -EINVAL;
if (!amdgpu_ras_intr_triggered()) {
ret = psp_ras_enable_features(&adev->psp, &info, enable);
if (ret) {
DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n",
enable ? "enable":"disable",
ras_block_str(head->block),
ret);
if (ret == TA_RAS_STATUS__RESET_NEEDED)
return -EAGAIN;
return -EINVAL;
}
}

/* setup the obj */
Expand Down
6 changes: 4 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -3736,8 +3736,10 @@ static int gfx_v9_0_hw_fini(void *handle)
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);

/* disable KCQ to avoid CPC touch memory not valid anymore */
gfx_v9_0_kcq_disable(adev);
/* DF freeze and kcq disable will fail */
if (!amdgpu_ras_intr_triggered())
/* disable KCQ to avoid CPC touch memory not valid anymore */
gfx_v9_0_kcq_disable(adev);

if (amdgpu_sriov_vf(adev)) {
gfx_v9_0_cp_gfx_enable(adev, false);
Expand Down

0 comments on commit bff77e8

Please sign in to comment.