diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 816781f209d66..f8d13ee2d5382 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2166,6 +2166,8 @@ struct drm_i915_private { struct intel_uncore uncore; + struct mutex tlb_invalidate_lock; + struct i915_virtual_gpu vgpu; struct intel_gvt *gvt; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9263b65720bc6..08d31744e2d92 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2220,6 +2220,76 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) rcu_read_unlock(); } +struct reg_and_bit { + i915_reg_t reg; + u32 bit; +}; + +static struct reg_and_bit +get_reg_and_bit(const struct intel_engine_cs *engine, + const i915_reg_t *regs, const unsigned int num) +{ + const unsigned int class = engine->class; + struct reg_and_bit rb = { .bit = 1 }; + + if (WARN_ON_ONCE(class >= num || !regs[class].reg)) + return rb; + + rb.reg = regs[class]; + if (class == VIDEO_DECODE_CLASS) + rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */ + + return rb; +} + +static void invalidate_tlbs(struct drm_i915_private *dev_priv) +{ + static const i915_reg_t gen8_regs[] = { + [RENDER_CLASS] = GEN8_RTCR, + [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */ + [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR, + [COPY_ENGINE_CLASS] = GEN8_BTCR, + }; + const unsigned int num = ARRAY_SIZE(gen8_regs); + const i915_reg_t *regs = gen8_regs; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + if (INTEL_GEN(dev_priv) < 8) + return; + + assert_rpm_wakelock_held(dev_priv); + + mutex_lock(&dev_priv->tlb_invalidate_lock); + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + for_each_engine(engine, dev_priv, id) { + /* + * HW architecture suggest typical invalidation time at 40us, + * with pessimistic cases up to 100us and a recommendation to + * cap at 1ms. We go a bit higher just in case. + */ + const unsigned int timeout_us = 100; + const unsigned int timeout_ms = 4; + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + I915_WRITE_FW(rb.reg, rb.bit); + if (__intel_wait_for_register_fw(dev_priv, + rb.reg, rb.bit, 0, + timeout_us, timeout_ms, + NULL)) + DRM_ERROR_RATELIMITED("%s TLB invalidation did not complete in %ums!\n", + engine->name, timeout_ms); + } + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + mutex_unlock(&dev_priv->tlb_invalidate_lock); +} + void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, enum i915_mm_subclass subclass) { @@ -2257,8 +2327,18 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, __i915_gem_object_reset_page_iter(obj); - if (!IS_ERR(pages)) + if (!IS_ERR(pages)) { + if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + + if (intel_runtime_pm_get_if_in_use(i915)) { + invalidate_tlbs(i915); + intel_runtime_pm_put(i915); + } + } + obj->ops->put_pages(obj, pages); + } unlock: mutex_unlock(&obj->mm.lock); @@ -4972,6 +5052,8 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) spin_lock_init(&dev_priv->fb_tracking.lock); + mutex_init(&dev_priv->tlb_invalidate_lock); + return 0; err_priorities: diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 39cfe04dcdb8b..180a8bf247918 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -135,6 +135,7 @@ struct drm_i915_gem_object { * activity? */ #define I915_BO_ACTIVE_REF 0 +#define I915_BO_WAS_BOUND_BIT 1 /* * Is the object to be mapped as read-only to the GPU diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 1db70350af0bf..333e943817896 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2380,6 +2380,12 @@ enum i915_power_well_id { #define GAMT_CHKN_BIT_REG _MMIO(0x4ab8) #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1<<28) +#define GEN8_RTCR _MMIO(0x4260) +#define GEN8_M1TCR _MMIO(0x4264) +#define GEN8_M2TCR _MMIO(0x4268) +#define GEN8_BTCR _MMIO(0x426c) +#define GEN8_VTCR _MMIO(0x4270) + #if 0 #define PRB0_TAIL _MMIO(0x2030) #define PRB0_HEAD _MMIO(0x2034) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 76eed1fdac096..5653e7bac914b 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -272,6 +272,10 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, return ret; vma->flags |= bind_flags; + + if (vma->obj) + set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags); + return 0; }