Skip to content

Commit

Permalink
accel/ivpu: Fix DevTLB errors on suspend/resume and recovery
Browse files Browse the repository at this point in the history
Issue IP reset before shutdown in order to
complete all upstream requests to the SOC.
Without this DevTLB is complaining about
incomplete transactions and NPU cannot resume from
suspend.
This problem is only happening on recent IFWI
releases.

IP reset in rare corner cases can mess up PCI
configuration, so save it before the reset.
After this happens it is also impossible to
issue PLL requests and D0->D3->D0 cycle is needed
to recover the NPU. Add WP 0 request on power up,
so the PUNIT is always notified about NPU reset.

Use D0/D3 cycle for recovery as it can recover
from failed IP reset and FLR cannot.

Fixes: 3f7c063 ("accel/ivpu/37xx: Fix hangs related to MMIO reset")
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240207102446.3126981-1-jacek.lawrynowicz@linux.intel.com
  • Loading branch information
Jacek Lawrynowicz committed Feb 12, 2024
1 parent 5f8408a commit 28083ff
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 29 deletions.
44 changes: 32 additions & 12 deletions drivers/accel/ivpu/ivpu_hw_37xx.c
Original file line number Diff line number Diff line change
Expand Up @@ -510,16 +510,6 @@ static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev)
return ret;
}

static int ivpu_boot_pwr_domain_disable(struct ivpu_device *vdev)
{
ivpu_boot_dpu_active_drive(vdev, false);
ivpu_boot_pwr_island_isolation_drive(vdev, true);
ivpu_boot_pwr_island_trickle_drive(vdev, false);
ivpu_boot_pwr_island_drive(vdev, false);

return ivpu_boot_wait_for_pwr_island_status(vdev, 0x0);
}

static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev)
{
u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES);
Expand Down Expand Up @@ -616,12 +606,37 @@ static int ivpu_hw_37xx_info_init(struct ivpu_device *vdev)
return 0;
}

static int ivpu_hw_37xx_ip_reset(struct ivpu_device *vdev)
{
int ret;
u32 val;

if (IVPU_WA(punit_disabled))
return 0;

ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US);
if (ret) {
ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n");
return ret;
}

val = REGB_RD32(VPU_37XX_BUTTRESS_VPU_IP_RESET);
val = REG_SET_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, val);
REGB_WR32(VPU_37XX_BUTTRESS_VPU_IP_RESET, val);

ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US);
if (ret)
ivpu_err(vdev, "Timed out waiting for RESET completion\n");

return ret;
}

static int ivpu_hw_37xx_reset(struct ivpu_device *vdev)
{
int ret = 0;

if (ivpu_boot_pwr_domain_disable(vdev)) {
ivpu_err(vdev, "Failed to disable power domain\n");
if (ivpu_hw_37xx_ip_reset(vdev)) {
ivpu_err(vdev, "Failed to reset NPU\n");
ret = -EIO;
}

Expand Down Expand Up @@ -661,6 +676,11 @@ static int ivpu_hw_37xx_power_up(struct ivpu_device *vdev)
{
int ret;

/* PLL requests may fail when powering down, so issue WP 0 here */
ret = ivpu_pll_disable(vdev);
if (ret)
ivpu_warn(vdev, "Failed to disable PLL: %d\n", ret);

ret = ivpu_hw_37xx_d0i3_disable(vdev);
if (ret)
ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret);
Expand Down
39 changes: 22 additions & 17 deletions drivers/accel/ivpu/ivpu_pm.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,14 @@ static int ivpu_suspend(struct ivpu_device *vdev)
{
int ret;

/* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */
pci_save_state(to_pci_dev(vdev->drm.dev));

ret = ivpu_shutdown(vdev);
if (ret) {
if (ret)
ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret);
return ret;
}

pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);

return ret;
}
Expand All @@ -71,6 +74,9 @@ static int ivpu_resume(struct ivpu_device *vdev)
{
int ret;

pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0);
pci_restore_state(to_pci_dev(vdev->drm.dev));

retry:
ret = ivpu_hw_power_up(vdev);
if (ret) {
Expand Down Expand Up @@ -120,15 +126,20 @@ static void ivpu_pm_recovery_work(struct work_struct *work)

ivpu_fw_log_dump(vdev);

retry:
ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev));
if (ret == -EAGAIN && !drm_dev_is_unplugged(&vdev->drm)) {
cond_resched();
goto retry;
}
atomic_inc(&vdev->pm->reset_counter);
atomic_set(&vdev->pm->reset_pending, 1);
down_write(&vdev->pm->reset_lock);

ivpu_suspend(vdev);
ivpu_pm_prepare_cold_boot(vdev);
ivpu_jobs_abort_all(vdev);

ret = ivpu_resume(vdev);
if (ret)
ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);

if (ret && ret != -EAGAIN)
ivpu_err(vdev, "Failed to reset VPU: %d\n", ret);
up_write(&vdev->pm->reset_lock);
atomic_set(&vdev->pm->reset_pending, 0);

kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt);
pm_runtime_mark_last_busy(vdev->drm.dev);
Expand Down Expand Up @@ -200,9 +211,6 @@ int ivpu_pm_suspend_cb(struct device *dev)
ivpu_suspend(vdev);
ivpu_pm_prepare_warm_boot(vdev);

pci_save_state(to_pci_dev(dev));
pci_set_power_state(to_pci_dev(dev), PCI_D3hot);

ivpu_dbg(vdev, PM, "Suspend done.\n");

return 0;
Expand All @@ -216,9 +224,6 @@ int ivpu_pm_resume_cb(struct device *dev)

ivpu_dbg(vdev, PM, "Resume..\n");

pci_set_power_state(to_pci_dev(dev), PCI_D0);
pci_restore_state(to_pci_dev(dev));

ret = ivpu_resume(vdev);
if (ret)
ivpu_err(vdev, "Failed to resume: %d\n", ret);
Expand Down

0 comments on commit 28083ff

Please sign in to comment.