diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c index f88069ab71abd..e73d4440c5e82 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c @@ -166,6 +166,29 @@ int intel_gsc_uc_fw_upload(struct intel_gsc_uc *gsc) if (err) goto fail; + /* + * GSC is only killed by an FLR, so we need to trigger one on unload to + * make sure we stop it. This is because we assign a chunk of memory to + * the GSC as part of the FW load , so we need to make sure it stops + * using it when we release it to the system on driver unload. Note that + * this is not a problem of the unload per-se, because the GSC will not + * touch that memory unless there are requests for it coming from the + * driver; therefore, no accesses will happen while i915 is not loaded, + * but if we re-load the driver then the GSC might wake up and try to + * access that old memory location again. + * Given that an FLR is a very disruptive action (see the FLR function + * for details), we want to do it as the last action before releasing + * the access to the MMIO bar, which means we need to do it as part of + * the primary uncore cleanup. + * An alternative approach to the FLR would be to use a memory location + * that survives driver unload, like e.g. stolen memory, and keep the + * GSC loaded across reloads. However, this requires us to make sure we + * preserve that memory location on unload and then determine and + * reserve its offset on each subsequent load, which is not trivial, so + * it is easier to just kill everything and start fresh. + */ + intel_uncore_set_flr_on_fini(>->i915->uncore); + err = gsc_fw_load(gsc); if (err) goto fail; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8db5293c38738..358036e3fc6e3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -186,6 +186,9 @@ #define GU_CNTL _MMIO(0x101010) #define LMEM_INIT REG_BIT(7) +#define DRIVERFLR REG_BIT(31) +#define GU_DEBUG _MMIO(0x101018) +#define DRIVERFLR_STATUS REG_BIT(31) #define GEN6_STOLEN_RESERVED _MMIO(0x1082C0) #define GEN6_STOLEN_RESERVED_ADDR_MASK (0xFFF << 20) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 6c25c9e7090ab..6a4006d259492 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -2702,6 +2702,59 @@ void intel_uncore_prune_engine_fw_domains(struct intel_uncore *uncore, } } +/* + * The driver-initiated FLR is the highest level of reset that we can trigger + * from within the driver. It is different from the PCI FLR in that it doesn't + * fully reset the SGUnit and doesn't modify the PCI config space and therefore + * it doesn't require a re-enumeration of the PCI BARs. However, the + * driver-initiated FLR does still cause a reset of both GT and display and a + * memory wipe of local and stolen memory, so recovery would require a full HW + * re-init and saving/restoring (or re-populating) the wiped memory. Since we + * perform the FLR as the very last action before releasing access to the HW + * during the driver release flow, we don't attempt recovery at all, because + * if/when a new instance of i915 is bound to the device it will do a full + * re-init anyway. + */ +static void driver_initiated_flr(struct intel_uncore *uncore) +{ + struct drm_i915_private *i915 = uncore->i915; + const unsigned int flr_timeout_ms = 3000; /* specs recommend a 3s wait */ + int ret; + + drm_dbg(&i915->drm, "Triggering Driver-FLR\n"); + + /* + * Make sure any pending FLR requests have cleared by waiting for the + * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS + * to make sure it's not still set from a prior attempt (it's a write to + * clear bit). + * Note that we should never be in a situation where a previous attempt + * is still pending (unless the HW is totally dead), but better to be + * safe in case something unexpected happens + */ + ret = intel_wait_for_register_fw(uncore, GU_CNTL, DRIVERFLR, 0, flr_timeout_ms); + if (ret) { + drm_err(&i915->drm, + "Failed to wait for Driver-FLR bit to clear! %d\n", + ret); + return; + } + intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS); + + /* Trigger the actual Driver-FLR */ + intel_uncore_rmw_fw(uncore, GU_CNTL, 0, DRIVERFLR); + + ret = intel_wait_for_register_fw(uncore, GU_DEBUG, + DRIVERFLR_STATUS, DRIVERFLR_STATUS, + flr_timeout_ms); + if (ret) { + drm_err(&i915->drm, "wait for Driver-FLR completion failed! %d\n", ret); + return; + } + + intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS); +} + /* Called via drm-managed action */ void intel_uncore_fini_mmio(struct drm_device *dev, void *data) { @@ -2715,6 +2768,9 @@ void intel_uncore_fini_mmio(struct drm_device *dev, void *data) intel_uncore_fw_domains_fini(uncore); iosf_mbi_punit_release(); } + + if (intel_uncore_needs_flr_on_fini(uncore)) + driver_initiated_flr(uncore); } /** diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index e9e38490815d0..9ea1f4864a3a4 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -153,6 +153,7 @@ struct intel_uncore { #define UNCORE_HAS_FPGA_DBG_UNCLAIMED BIT(1) #define UNCORE_HAS_DBG_UNCLAIMED BIT(2) #define UNCORE_HAS_FIFO BIT(3) +#define UNCORE_NEEDS_FLR_ON_FINI BIT(4) const struct intel_forcewake_range *fw_domains_table; unsigned int fw_domains_table_entries; @@ -223,6 +224,18 @@ intel_uncore_has_fifo(const struct intel_uncore *uncore) return uncore->flags & UNCORE_HAS_FIFO; } +static inline bool +intel_uncore_needs_flr_on_fini(const struct intel_uncore *uncore) +{ + return uncore->flags & UNCORE_NEEDS_FLR_ON_FINI; +} + +static inline bool +intel_uncore_set_flr_on_fini(struct intel_uncore *uncore) +{ + return uncore->flags |= UNCORE_NEEDS_FLR_ON_FINI; +} + void intel_uncore_mmio_debug_init_early(struct drm_i915_private *i915); void intel_uncore_init_early(struct intel_uncore *uncore, struct intel_gt *gt);