Skip to content

Commit

Permalink
drm/i915/gt: Ignore TLB invalidations on idle engines
Browse files Browse the repository at this point in the history
Check if the device is powered down prior to any engine activity,
as, on such cases, all the TLBs were already invalidated, so an
explicit TLB invalidation is not needed, thus reducing the
performance regression impact due to it.

This becomes more significant with GuC, as it can only do so when
the connection to the GuC is awake.

Cc: stable@vger.kernel.org
Fixes: 7938d61 ("drm/i915: Flush TLBs before releasing backing store")
Signed-off-by: Chris Wilson <chris.p.wilson@intel.com>
Cc: Fei Yang <fei.yang@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Acked-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/278a57a672edac75683f0818b292e95da583a5fe.1658924372.git.mchehab@kernel.org
  • Loading branch information
Chris Wilson authored and Andi Shyti committed Jul 28, 2022
1 parent 353819d commit 4bedcea
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 11 deletions.
10 changes: 6 additions & 4 deletions drivers/gpu/drm/i915/gem/i915_gem_pages.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@

#include <drm/drm_cache.h>

#include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h"

#include "i915_drv.h"
#include "i915_gem_object.h"
#include "i915_scatterlist.h"
#include "i915_gem_lmem.h"
#include "i915_gem_mman.h"

#include "gt/intel_gt.h"

void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
struct sg_table *pages,
unsigned int sg_page_sizes)
Expand Down Expand Up @@ -217,10 +218,11 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)

if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct intel_gt *gt = to_gt(i915);
intel_wakeref_t wakeref;

with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref)
intel_gt_invalidate_tlbs(to_gt(i915));
with_intel_gt_pm_if_awake(gt, wakeref)
intel_gt_invalidate_tlbs(gt);
}

return pages;
Expand Down
17 changes: 10 additions & 7 deletions drivers/gpu/drm/i915/gt/intel_gt.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "i915_drv.h"
#include "intel_context.h"
#include "intel_engine_pm.h"
#include "intel_engine_regs.h"
#include "intel_ggtt_gmch.h"
#include "intel_gt.h"
Expand Down Expand Up @@ -924,6 +925,7 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
struct drm_i915_private *i915 = gt->i915;
struct intel_uncore *uncore = gt->uncore;
struct intel_engine_cs *engine;
intel_engine_mask_t awake, tmp;
enum intel_engine_id id;
const i915_reg_t *regs;
unsigned int num = 0;
Expand All @@ -947,39 +949,40 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)

GEM_TRACE("\n");

assert_rpm_wakelock_held(&i915->runtime_pm);

mutex_lock(&gt->tlb_invalidate_lock);
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);

spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */

awake = 0;
for_each_engine(engine, gt, id) {
struct reg_and_bit rb;

if (!intel_engine_pm_is_awake(engine))
continue;

rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
if (!i915_mmio_reg_offset(rb.reg))
continue;

intel_uncore_write_fw(uncore, rb.reg, rb.bit);
awake |= engine->mask;
}

spin_unlock_irq(&uncore->lock);

for_each_engine(engine, gt, id) {
for_each_engine_masked(engine, gt, awake, tmp) {
struct reg_and_bit rb;

/*
* HW architecture suggest typical invalidation time at 40us,
* with pessimistic cases up to 100us and a recommendation to
* cap at 1ms. We go a bit higher just in case.
*/
const unsigned int timeout_us = 100;
const unsigned int timeout_ms = 4;
struct reg_and_bit rb;

rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
if (!i915_mmio_reg_offset(rb.reg))
continue;

if (__intel_wait_for_register_fw(uncore,
rb.reg, rb.bit, 0,
timeout_us, timeout_ms,
Expand Down
3 changes: 3 additions & 0 deletions drivers/gpu/drm/i915/gt/intel_gt_pm.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt)
for (tmp = 1, intel_gt_pm_get(gt); tmp; \
intel_gt_pm_put(gt), tmp = 0)

#define with_intel_gt_pm_if_awake(gt, wf) \
for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0)

static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
{
return intel_wakeref_wait_for_idle(&gt->wakeref);
Expand Down

0 comments on commit 4bedcea

Please sign in to comment.