Skip to content

Commit

Permalink
drm/i915: Use RPM as the barrier for controlling user mmap access
Browse files Browse the repository at this point in the history
We can remove the false coupling between RPM and struct mutex by the
observation that we can use the RPM wakeref as the barrier around user
mmap access. That is as we tear down the user's PTE atomically from
within rpm suspend and then to fault in new PTE requires the rpm
wakeref, means that no user access is possible through those PTE without
RPM being awake. Having made that observation, we can then remove the
presumption of having to take rpm outside of struct_mutex and so allow
fine grained acquisition of a wakeref around hw access rather than
having to remember to acquire the wakeref early on.

v2: Rejig placement of the new intel_runtime_pm_get() to be as tight
as possible around the GTT pread/pwrite.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Daniel Vetter <daniel@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161024124218.18252-2-chris@chris-wilson.co.uk
  • Loading branch information
Chris Wilson committed Oct 24, 2016
1 parent 275f039 commit 9c870d0
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 69 deletions.
56 changes: 33 additions & 23 deletions drivers/gpu/drm/i915/i915_debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -743,17 +743,32 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
I915_READ(VLV_IIR_RW));
seq_printf(m, "Display IMR:\t%08x\n",
I915_READ(VLV_IMR));
for_each_pipe(dev_priv, pipe)
for_each_pipe(dev_priv, pipe) {
enum intel_display_power_domain power_domain;

power_domain = POWER_DOMAIN_PIPE(pipe);
if (!intel_display_power_get_if_enabled(dev_priv,
power_domain)) {
seq_printf(m, "Pipe %c power disabled\n",
pipe_name(pipe));
continue;
}

seq_printf(m, "Pipe %c stat:\t%08x\n",
pipe_name(pipe),
I915_READ(PIPESTAT(pipe)));

intel_display_power_put(dev_priv, power_domain);
}

intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
seq_printf(m, "Port hotplug:\t%08x\n",
I915_READ(PORT_HOTPLUG_EN));
seq_printf(m, "DPFLIPSTAT:\t%08x\n",
I915_READ(VLV_DPFLIPSTAT));
seq_printf(m, "DPINVGTT:\t%08x\n",
I915_READ(DPINVGTT));
intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);

for (i = 0; i < 4; i++) {
seq_printf(m, "GT Interrupt IMR %d:\t%08x\n",
Expand Down Expand Up @@ -1396,22 +1411,16 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
static int ironlake_drpc_info(struct seq_file *m)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
u32 rgvmodectl, rstdbyctl;
u16 crstandvid;
int ret;

ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);

rgvmodectl = I915_READ(MEMMODECTL);
rstdbyctl = I915_READ(RSTDBYCTL);
crstandvid = I915_READ16(CRSTANDVID);

intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);

seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN));
seq_printf(m, "Boost freq: %d\n",
Expand Down Expand Up @@ -1757,6 +1766,7 @@ static int i915_sr_status(struct seq_file *m, void *unused)
bool sr_enabled = false;

intel_runtime_pm_get(dev_priv);
intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);

if (HAS_PCH_SPLIT(dev_priv))
sr_enabled = I915_READ(WM1_LP_ILK) & WM1_LP_SR_EN;
Expand All @@ -1770,6 +1780,7 @@ static int i915_sr_status(struct seq_file *m, void *unused)
else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
sr_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;

intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
intel_runtime_pm_put(dev_priv);

seq_printf(m, "self-refresh: %s\n",
Expand Down Expand Up @@ -2091,12 +2102,7 @@ static const char *swizzle_string(unsigned swizzle)
static int i915_swizzle_info(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_device *dev = &dev_priv->drm;
int ret;

ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);

seq_printf(m, "bit6 swizzle for X-tiling = %s\n",
Expand Down Expand Up @@ -2136,7 +2142,6 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
seq_puts(m, "L-shaped memory detected\n");

intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);

return 0;
}
Expand Down Expand Up @@ -2542,11 +2547,22 @@ static int i915_edp_psr_status(struct seq_file *m, void *data)
enabled = I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE;
else {
for_each_pipe(dev_priv, pipe) {
enum transcoder cpu_transcoder =
intel_pipe_to_cpu_transcoder(dev_priv, pipe);
enum intel_display_power_domain power_domain;

power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder);
if (!intel_display_power_get_if_enabled(dev_priv,
power_domain))
continue;

stat[pipe] = I915_READ(VLV_PSRSTAT(pipe)) &
VLV_EDP_PSR_CURR_STATE_MASK;
if ((stat[pipe] == VLV_EDP_PSR_ACTIVE_NORFB_UP) ||
(stat[pipe] == VLV_EDP_PSR_ACTIVE_SF_UPDATE))
enabled = true;

intel_display_power_put(dev_priv, power_domain);
}
}

Expand Down Expand Up @@ -3094,6 +3110,8 @@ static int i915_engine_info(struct seq_file *m, void *unused)
struct intel_engine_cs *engine;
enum intel_engine_id id;

intel_runtime_pm_get(dev_priv);

for_each_engine(engine, dev_priv, id) {
struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct drm_i915_gem_request *rq;
Expand Down Expand Up @@ -3213,6 +3231,8 @@ static int i915_engine_info(struct seq_file *m, void *unused)
seq_puts(m, "\n");
}

intel_runtime_pm_put(dev_priv);

return 0;
}

Expand Down Expand Up @@ -4799,13 +4819,9 @@ i915_wedged_set(void *data, u64 val)
if (i915_reset_in_progress(&dev_priv->gpu_error))
return -EAGAIN;

intel_runtime_pm_get(dev_priv);

i915_handle_error(dev_priv, val,
"Manually setting wedged to %llu", val);

intel_runtime_pm_put(dev_priv);

return 0;
}

Expand Down Expand Up @@ -5040,22 +5056,16 @@ static int
i915_cache_sharing_get(void *data, u64 *val)
{
struct drm_i915_private *dev_priv = data;
struct drm_device *dev = &dev_priv->drm;
u32 snpcr;
int ret;

if (!(IS_GEN6(dev_priv) || IS_GEN7(dev_priv)))
return -ENODEV;

ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);

snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);

intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);

*val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT;

Expand Down
19 changes: 0 additions & 19 deletions drivers/gpu/drm/i915/i915_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -2301,32 +2301,13 @@ static int intel_runtime_suspend(struct device *kdev)

DRM_DEBUG_KMS("Suspending device\n");

/*
* We could deadlock here in case another thread holding struct_mutex
* calls RPM suspend concurrently, since the RPM suspend will wait
* first for this RPM suspend to finish. In this case the concurrent
* RPM resume will be followed by its RPM suspend counterpart. Still
* for consistency return -EAGAIN, which will reschedule this suspend.
*/
if (!mutex_trylock(&dev->struct_mutex)) {
DRM_DEBUG_KMS("device lock contention, deffering suspend\n");
/*
* Bump the expiration timestamp, otherwise the suspend won't
* be rescheduled.
*/
pm_runtime_mark_last_busy(kdev);

return -EAGAIN;
}

disable_rpm_wakeref_asserts(dev_priv);

/*
* We are safe here against re-faults, since the fault handler takes
* an RPM reference.
*/
i915_gem_release_all_mmaps(dev_priv);
mutex_unlock(&dev->struct_mutex);

intel_guc_suspend(dev);

Expand Down
42 changes: 23 additions & 19 deletions drivers/gpu/drm/i915/i915_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,7 @@ i915_gem_gtt_pread(struct drm_device *dev,
uint64_t offset;
int ret;

intel_runtime_pm_get(to_i915(dev));
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
if (!IS_ERR(vma)) {
node.start = i915_ggtt_offset(vma);
Expand Down Expand Up @@ -926,6 +927,7 @@ i915_gem_gtt_pread(struct drm_device *dev,
i915_vma_unpin(vma);
}
out:
intel_runtime_pm_put(to_i915(dev));
return ret;
}

Expand Down Expand Up @@ -1060,12 +1062,9 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
ret = i915_gem_shmem_pread(dev, obj, args, file);

/* pread for non shmem backed objects */
if (ret == -EFAULT || ret == -ENODEV) {
intel_runtime_pm_get(to_i915(dev));
if (ret == -EFAULT || ret == -ENODEV)
ret = i915_gem_gtt_pread(dev, obj, args->size,
args->offset, args->data_ptr);
intel_runtime_pm_put(to_i915(dev));
}

i915_gem_object_put(obj);
mutex_unlock(&dev->struct_mutex);
Expand Down Expand Up @@ -1126,6 +1125,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
if (i915_gem_object_is_tiled(obj))
return -EFAULT;

intel_runtime_pm_get(i915);
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
PIN_MAPPABLE | PIN_NONBLOCK);
if (!IS_ERR(vma)) {
Expand Down Expand Up @@ -1234,6 +1234,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
i915_vma_unpin(vma);
}
out:
intel_runtime_pm_put(i915);
return ret;
}

Expand Down Expand Up @@ -1466,12 +1467,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
* perspective, requiring manual detiling by the client.
*/
if (!i915_gem_object_has_struct_page(obj) ||
cpu_write_needs_clflush(obj)) {
ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
cpu_write_needs_clflush(obj))
/* Note that the gtt paths might fail with non-page-backed user
* pointers (e.g. gtt mappings when moving data between
* textures). Fallback to the shmem path in that case. */
}
* textures). Fallback to the shmem path in that case.
*/
ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);

if (ret == -EFAULT || ret == -ENOSPC) {
if (obj->phys_handle)
Expand Down Expand Up @@ -1840,6 +1841,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
goto err_unpin;

/* Mark as being mmapped into userspace for later revocation */
assert_rpm_wakelock_held(dev_priv);
spin_lock(&dev_priv->mm.userfault_lock);
if (list_empty(&obj->userfault_link))
list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
Expand Down Expand Up @@ -1925,8 +1927,13 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
/* Serialisation between user GTT access and our code depends upon
* revoking the CPU's PTE whilst the mutex is held. The next user
* pagefault then has to wait until we release the mutex.
*
* Note that RPM complicates somewhat by adding an additional
* requirement that operations to the GGTT be made holding the RPM
* wakeref.
*/
lockdep_assert_held(&i915->drm.struct_mutex);
intel_runtime_pm_get(i915);

spin_lock(&i915->mm.userfault_lock);
if (!list_empty(&obj->userfault_link)) {
Expand All @@ -1935,7 +1942,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
}
spin_unlock(&i915->mm.userfault_lock);
if (!zap)
return;
goto out;

drm_vma_node_unmap(&obj->base.vma_node,
obj->base.dev->anon_inode->i_mapping);
Expand All @@ -1948,6 +1955,9 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
* memory writes before touching registers / GSM.
*/
wmb();

out:
intel_runtime_pm_put(i915);
}

void
Expand Down Expand Up @@ -3476,7 +3486,7 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_gem_caching *args = data;
struct drm_i915_gem_object *obj;
enum i915_cache_level level;
Expand All @@ -3493,23 +3503,21 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
* cacheline, whereas normally such cachelines would get
* invalidated.
*/
if (!HAS_LLC(dev) && !HAS_SNOOP(dev))
if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
return -ENODEV;

level = I915_CACHE_LLC;
break;
case I915_CACHING_DISPLAY:
level = HAS_WT(dev_priv) ? I915_CACHE_WT : I915_CACHE_NONE;
level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
break;
default:
return -EINVAL;
}

intel_runtime_pm_get(dev_priv);

ret = i915_mutex_lock_interruptible(dev);
if (ret)
goto rpm_put;
return ret;

obj = i915_gem_object_lookup(file, args->handle);
if (!obj) {
Expand All @@ -3518,13 +3526,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
}

ret = i915_gem_object_set_cache_level(obj, level);

i915_gem_object_put(obj);
unlock:
mutex_unlock(&dev->struct_mutex);
rpm_put:
intel_runtime_pm_put(dev_priv);

return ret;
}

Expand Down
Loading

0 comments on commit 9c870d0

Please sign in to comment.