Skip to content

Commit

Permalink
drm/i915: Store a direct lookup from object handle to vma
Browse files Browse the repository at this point in the history
The advent of full-ppgtt lead to an extra indirection between the object
and its binding. That extra indirection has a noticeable impact on how
fast we can convert from the user handles to our internal vma for
execbuffer. In order to bypass the extra indirection, we use a
resizable hashtable to jump from the object to the per-ctx vma.
rhashtable was considered but we don't need the online resizing feature
and the extra complexity proved to undermine its usefulness. Instead, we
simply reallocate the hastable on demand in a background task and
serialize it before iterating.

In non-full-ppgtt modes, multiple files and multiple contexts can share
the same vma. This leads to having multiple possible handle->vma links,
so we only use the first to establish the fast path. The majority of
buffers are not shared and so we should still be able to realise
speedups with multiple clients.

v2: Prettier names, more magic.
v3: Many style tweaks, most notably hiding the misuse of execobj[].rsvd2

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
  • Loading branch information
Chris Wilson committed Jun 16, 2017
1 parent 4c9c0d0 commit 4ff4b44
Show file tree
Hide file tree
Showing 11 changed files with 322 additions and 112 deletions.
6 changes: 6 additions & 0 deletions drivers/gpu/drm/i915/i915_debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1998,6 +1998,12 @@ static int i915_context_status(struct seq_file *m, void *unused)
seq_putc(m, '\n');
}

seq_printf(m,
"\tvma hashtable size=%u (actual %lu), count=%u\n",
ctx->vma_lut.ht_size,
BIT(ctx->vma_lut.ht_bits),
ctx->vma_lut.ht_count);

seq_putc(m, '\n');
}

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/i915/i915_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
#include <linux/i2c.h>
#include <linux/i2c-algo-bit.h>
#include <linux/backlight.h>
#include <linux/hashtable.h>
#include <linux/hash.h>
#include <linux/intel-iommu.h>
#include <linux/kref.h>
#include <linux/pm_qos.h>
Expand Down
5 changes: 4 additions & 1 deletion drivers/gpu/drm/i915/i915_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -3261,6 +3261,10 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
if (vma->vm->file == fpriv)
i915_vma_close(vma);

vma = obj->vma_hashed;
if (vma && vma->ctx->file_priv == fpriv)
i915_vma_unlink_ctx(vma);

if (i915_gem_object_is_active(obj) &&
!i915_gem_object_has_active_reference(obj)) {
i915_gem_object_set_active_reference(obj);
Expand Down Expand Up @@ -4254,7 +4258,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,

INIT_LIST_HEAD(&obj->global_link);
INIT_LIST_HEAD(&obj->userfault_link);
INIT_LIST_HEAD(&obj->obj_exec_link);
INIT_LIST_HEAD(&obj->vma_list);
INIT_LIST_HEAD(&obj->batch_pool_link);

Expand Down
82 changes: 81 additions & 1 deletion drivers/gpu/drm/i915/i915_gem_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,78 @@
*
*/

#include <linux/log2.h>
#include <drm/drmP.h>
#include <drm/i915_drm.h>
#include "i915_drv.h"
#include "i915_trace.h"

#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1

/* Initial size (as log2) to preallocate the handle->object hashtable */
#define VMA_HT_BITS 2u /* 4 x 2 pointers, 64 bytes minimum */

static void resize_vma_ht(struct work_struct *work)
{
struct i915_gem_context_vma_lut *lut =
container_of(work, typeof(*lut), resize);
unsigned int bits, new_bits, size, i;
struct hlist_head *new_ht;

GEM_BUG_ON(!(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS));

bits = 1 + ilog2(4*lut->ht_count/3 + 1);
new_bits = min_t(unsigned int,
max(bits, VMA_HT_BITS),
sizeof(unsigned int) * BITS_PER_BYTE - 1);
if (new_bits == lut->ht_bits)
goto out;

new_ht = kzalloc(sizeof(*new_ht)<<new_bits, GFP_KERNEL | __GFP_NOWARN);
if (!new_ht)
new_ht = vzalloc(sizeof(*new_ht)<<new_bits);
if (!new_ht)
/* Pretend resize succeeded and stop calling us for a bit! */
goto out;

size = BIT(lut->ht_bits);
for (i = 0; i < size; i++) {
struct i915_vma *vma;
struct hlist_node *tmp;

hlist_for_each_entry_safe(vma, tmp, &lut->ht[i], ctx_node)
hlist_add_head(&vma->ctx_node,
&new_ht[hash_32(vma->ctx_handle,
new_bits)]);
}
kvfree(lut->ht);
lut->ht = new_ht;
lut->ht_bits = new_bits;
out:
smp_store_release(&lut->ht_size, BIT(bits));
GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
}

static void vma_lut_free(struct i915_gem_context *ctx)
{
struct i915_gem_context_vma_lut *lut = &ctx->vma_lut;
unsigned int i, size;

if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)
cancel_work_sync(&lut->resize);

size = BIT(lut->ht_bits);
for (i = 0; i < size; i++) {
struct i915_vma *vma;

hlist_for_each_entry(vma, &lut->ht[i], ctx_node) {
vma->obj->vma_hashed = NULL;
vma->ctx = NULL;
}
}
kvfree(lut->ht);
}

void i915_gem_context_free(struct kref *ctx_ref)
{
struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
Expand All @@ -101,6 +166,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
trace_i915_context_free(ctx);
GEM_BUG_ON(!i915_gem_context_is_closed(ctx));

vma_lut_free(ctx);
i915_ppgtt_put(ctx->ppgtt);

for (i = 0; i < I915_NUM_ENGINES; i++) {
Expand All @@ -118,6 +184,7 @@ void i915_gem_context_free(struct kref *ctx_ref)

kfree(ctx->name);
put_pid(ctx->pid);

list_del(&ctx->link);

ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id);
Expand Down Expand Up @@ -201,13 +268,24 @@ __create_hw_context(struct drm_i915_private *dev_priv,
ctx->i915 = dev_priv;
ctx->priority = I915_PRIORITY_NORMAL;

ctx->vma_lut.ht_bits = VMA_HT_BITS;
ctx->vma_lut.ht_size = BIT(VMA_HT_BITS);
BUILD_BUG_ON(BIT(VMA_HT_BITS) == I915_CTX_RESIZE_IN_PROGRESS);
ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size,
sizeof(*ctx->vma_lut.ht),
GFP_KERNEL);
if (!ctx->vma_lut.ht)
goto err_out;

INIT_WORK(&ctx->vma_lut.resize, resize_vma_ht);

/* Default context will never have a file_priv */
ret = DEFAULT_CONTEXT_HANDLE;
if (file_priv) {
ret = idr_alloc(&file_priv->context_idr, ctx,
DEFAULT_CONTEXT_HANDLE, 0, GFP_KERNEL);
if (ret < 0)
goto err_out;
goto err_lut;
}
ctx->user_handle = ret;

Expand Down Expand Up @@ -248,6 +326,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
err_pid:
put_pid(ctx->pid);
idr_remove(&file_priv->context_idr, ctx->user_handle);
err_lut:
kvfree(ctx->vma_lut.ht);
err_out:
context_close(ctx);
return ERR_PTR(ret);
Expand Down
26 changes: 26 additions & 0 deletions drivers/gpu/drm/i915/i915_gem_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,32 @@ struct i915_gem_context {
/** ggtt_offset_bias: placement restriction for context objects */
u32 ggtt_offset_bias;

struct i915_gem_context_vma_lut {
/** ht_size: last request size to allocate the hashtable for. */
unsigned int ht_size;
#define I915_CTX_RESIZE_IN_PROGRESS BIT(0)
/** ht_bits: real log2(size) of hashtable. */
unsigned int ht_bits;
/** ht_count: current number of entries inside the hashtable */
unsigned int ht_count;

/** ht: the array of buckets comprising the simple hashtable */
struct hlist_head *ht;

/**
* resize: After an execbuf completes, we check the load factor
* of the hashtable. If the hashtable is too full, or too empty,
* we schedule a task to resize the hashtable. During the
* resize, the entries are moved between different buckets and
* so we cannot simultaneously read the hashtable as it is
* being resized (unlike rhashtable). Therefore we treat the
* active work as a strong barrier, pausing a subsequent
* execbuf to wait for the resize worker to complete, if
* required.
*/
struct work_struct resize;
} vma_lut;

/** engine: per-engine logical HW state */
struct intel_context {
struct i915_vma *state;
Expand Down
Loading

0 comments on commit 4ff4b44

Please sign in to comment.