Skip to content

Commit

Permalink
drm/i915: Make request allocation caches global
Browse files Browse the repository at this point in the history
As kmem_caches share the same properties (size, allocation/free behaviour)
for all potential devices, we can use global caches. While this
potential has worse fragmentation behaviour (one can argue that
different devices would have different activity lifetimes, but you can
also argue that activity is temporal across the system) it is the
default behaviour of the system at large to amalgamate matching caches.

The benefit for us is much reduced pointer dancing along the frequent
allocation paths.

v2: Defer shrinking until after a global grace period for futureproofing
multiple consumers of the slab caches, similar to the current strategy
for avoiding shrinking too early.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190228102035.5857-1-chris@chris-wilson.co.uk
  • Loading branch information
Chris Wilson committed Feb 28, 2019
1 parent bd2be14 commit 32eb6bc
Show file tree
Hide file tree
Showing 19 changed files with 312 additions and 149 deletions.
1 change: 1 addition & 0 deletions drivers/gpu/drm/i915/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ i915-y += \
i915_gem_tiling.o \
i915_gem_userptr.o \
i915_gemfs.o \
i915_globals.o \
i915_query.o \
i915_request.o \
i915_scheduler.o \
Expand Down
7 changes: 6 additions & 1 deletion drivers/gpu/drm/i915/i915_active.c
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,12 @@ int __init i915_global_active_init(void)
return 0;
}

void __exit i915_global_active_exit(void)
void i915_global_active_shrink(void)
{
kmem_cache_shrink(global.slab_cache);
}

void i915_global_active_exit(void)
{
kmem_cache_destroy(global.slab_cache);
}
1 change: 1 addition & 0 deletions drivers/gpu/drm/i915/i915_active.h
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,7 @@ static inline void i915_active_fini(struct i915_active *ref) { }
#endif

int i915_global_active_init(void);
void i915_global_active_shrink(void);
void i915_global_active_exit(void);

#endif /* _I915_ACTIVE_H_ */
3 changes: 0 additions & 3 deletions drivers/gpu/drm/i915/i915_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -1473,9 +1473,6 @@ struct drm_i915_private {
struct kmem_cache *objects;
struct kmem_cache *vmas;
struct kmem_cache *luts;
struct kmem_cache *requests;
struct kmem_cache *dependencies;
struct kmem_cache *priorities;

const struct intel_device_info __info; /* Use INTEL_INFO() to access. */
struct intel_runtime_info __runtime; /* Use RUNTIME_INFO() to access. */
Expand Down
34 changes: 5 additions & 29 deletions drivers/gpu/drm/i915/i915_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include "i915_drv.h"
#include "i915_gem_clflush.h"
#include "i915_gemfs.h"
#include "i915_globals.h"
#include "i915_reset.h"
#include "i915_trace.h"
#include "i915_vgpu.h"
Expand Down Expand Up @@ -187,6 +188,8 @@ void i915_gem_unpark(struct drm_i915_private *i915)
if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
i915->gt.epoch = 1;

i915_globals_unpark();

intel_enable_gt_powersave(i915);
i915_update_gfx_val(i915);
if (INTEL_GEN(i915) >= 6)
Expand Down Expand Up @@ -2892,12 +2895,11 @@ static void shrink_caches(struct drm_i915_private *i915)
* filled slabs to prioritise allocating from the mostly full slabs,
* with the aim of reducing fragmentation.
*/
kmem_cache_shrink(i915->priorities);
kmem_cache_shrink(i915->dependencies);
kmem_cache_shrink(i915->requests);
kmem_cache_shrink(i915->luts);
kmem_cache_shrink(i915->vmas);
kmem_cache_shrink(i915->objects);

i915_globals_park();
}

struct sleep_rcu_work {
Expand Down Expand Up @@ -5237,23 +5239,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
if (!dev_priv->luts)
goto err_vmas;

dev_priv->requests = KMEM_CACHE(i915_request,
SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
SLAB_TYPESAFE_BY_RCU);
if (!dev_priv->requests)
goto err_luts;

dev_priv->dependencies = KMEM_CACHE(i915_dependency,
SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT);
if (!dev_priv->dependencies)
goto err_requests;

dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
if (!dev_priv->priorities)
goto err_dependencies;

INIT_LIST_HEAD(&dev_priv->gt.active_rings);
INIT_LIST_HEAD(&dev_priv->gt.closed_vma);

Expand All @@ -5278,12 +5263,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)

return 0;

err_dependencies:
kmem_cache_destroy(dev_priv->dependencies);
err_requests:
kmem_cache_destroy(dev_priv->requests);
err_luts:
kmem_cache_destroy(dev_priv->luts);
err_vmas:
kmem_cache_destroy(dev_priv->vmas);
err_objects:
Expand All @@ -5301,9 +5280,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)

cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);

kmem_cache_destroy(dev_priv->priorities);
kmem_cache_destroy(dev_priv->dependencies);
kmem_cache_destroy(dev_priv->requests);
kmem_cache_destroy(dev_priv->luts);
kmem_cache_destroy(dev_priv->vmas);
kmem_cache_destroy(dev_priv->objects);
Expand Down
113 changes: 113 additions & 0 deletions drivers/gpu/drm/i915/i915_globals.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* SPDX-License-Identifier: MIT
*
* Copyright © 2019 Intel Corporation
*/

#include <linux/slab.h>
#include <linux/workqueue.h>

#include "i915_active.h"
#include "i915_globals.h"
#include "i915_request.h"
#include "i915_scheduler.h"

int __init i915_globals_init(void)
{
int err;

err = i915_global_active_init();
if (err)
return err;

err = i915_global_request_init();
if (err)
goto err_active;

err = i915_global_scheduler_init();
if (err)
goto err_request;

return 0;

err_request:
i915_global_request_exit();
err_active:
i915_global_active_exit();
return err;
}

static void i915_globals_shrink(void)
{
/*
* kmem_cache_shrink() discards empty slabs and reorders partially
* filled slabs to prioritise allocating from the mostly full slabs,
* with the aim of reducing fragmentation.
*/
i915_global_active_shrink();
i915_global_request_shrink();
i915_global_scheduler_shrink();
}

static atomic_t active;
static atomic_t epoch;
struct park_work {
struct rcu_work work;
int epoch;
};

static void __i915_globals_park(struct work_struct *work)
{
struct park_work *wrk = container_of(work, typeof(*wrk), work.work);

/* Confirm nothing woke up in the last grace period */
if (wrk->epoch == atomic_read(&epoch))
i915_globals_shrink();

kfree(wrk);
}

void i915_globals_park(void)
{
struct park_work *wrk;

/*
* Defer shrinking the global slab caches (and other work) until
* after a RCU grace period has completed with no activity. This
* is to try and reduce the latency impact on the consumers caused
* by us shrinking the caches the same time as they are trying to
* allocate, with the assumption being that if we idle long enough
* for an RCU grace period to elapse since the last use, it is likely
* to be longer until we need the caches again.
*/
if (!atomic_dec_and_test(&active))
return;

wrk = kmalloc(sizeof(*wrk), GFP_KERNEL);
if (!wrk)
return;

wrk->epoch = atomic_inc_return(&epoch);
INIT_RCU_WORK(&wrk->work, __i915_globals_park);
queue_rcu_work(system_wq, &wrk->work);
}

void i915_globals_unpark(void)
{
atomic_inc(&epoch);
atomic_inc(&active);
}

void __exit i915_globals_exit(void)
{
/* Flush any residual park_work */
rcu_barrier();
flush_scheduled_work();

i915_global_scheduler_exit();
i915_global_request_exit();
i915_global_active_exit();

/* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
rcu_barrier();
}
15 changes: 15 additions & 0 deletions drivers/gpu/drm/i915/i915_globals.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/*
* SPDX-License-Identifier: MIT
*
* Copyright © 2019 Intel Corporation
*/

#ifndef _I915_GLOBALS_H_
#define _I915_GLOBALS_H_

int i915_globals_init(void);
void i915_globals_park(void);
void i915_globals_unpark(void);
void i915_globals_exit(void);

#endif /* _I915_GLOBALS_H_ */
8 changes: 5 additions & 3 deletions drivers/gpu/drm/i915/i915_pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@

#include <drm/drm_drv.h>

#include "i915_active.h"
#include "i915_drv.h"
#include "i915_globals.h"
#include "i915_selftest.h"

#define PLATFORM(x) .platform = (x), .platform_mask = BIT(x)
Expand Down Expand Up @@ -802,7 +802,9 @@ static int __init i915_init(void)
bool use_kms = true;
int err;

i915_global_active_init();
err = i915_globals_init();
if (err)
return err;

err = i915_mock_selftests();
if (err)
Expand Down Expand Up @@ -835,7 +837,7 @@ static void __exit i915_exit(void)
return;

pci_unregister_driver(&i915_pci_driver);
i915_global_active_exit();
i915_globals_exit();
}

module_init(i915_init);
Expand Down
53 changes: 45 additions & 8 deletions drivers/gpu/drm/i915/i915_request.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@
#include "i915_active.h"
#include "i915_reset.h"

static struct i915_global_request {
struct kmem_cache *slab_requests;
struct kmem_cache *slab_dependencies;
} global;

static const char *i915_fence_get_driver_name(struct dma_fence *fence)
{
return "i915";
Expand Down Expand Up @@ -86,7 +91,7 @@ static void i915_fence_release(struct dma_fence *fence)
*/
i915_sw_fence_fini(&rq->submit);

kmem_cache_free(rq->i915->requests, rq);
kmem_cache_free(global.slab_requests, rq);
}

const struct dma_fence_ops i915_fence_ops = {
Expand Down Expand Up @@ -292,7 +297,7 @@ static void i915_request_retire(struct i915_request *request)

unreserve_gt(request->i915);

i915_sched_node_fini(request->i915, &request->sched);
i915_sched_node_fini(&request->sched);
i915_request_put(request);
}

Expand Down Expand Up @@ -491,7 +496,7 @@ i915_request_alloc_slow(struct intel_context *ce)
ring_retire_requests(ring);

out:
return kmem_cache_alloc(ce->gem_context->i915->requests, GFP_KERNEL);
return kmem_cache_alloc(global.slab_requests, GFP_KERNEL);
}

static int add_timeline_barrier(struct i915_request *rq)
Expand Down Expand Up @@ -579,7 +584,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
*
* Do not use kmem_cache_zalloc() here!
*/
rq = kmem_cache_alloc(i915->requests,
rq = kmem_cache_alloc(global.slab_requests,
GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
if (unlikely(!rq)) {
rq = i915_request_alloc_slow(ce);
Expand Down Expand Up @@ -666,7 +671,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));

kmem_cache_free(i915->requests, rq);
kmem_cache_free(global.slab_requests, rq);
err_unreserve:
unreserve_gt(i915);
intel_context_unpin(ce);
Expand All @@ -685,9 +690,7 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
return 0;

if (to->engine->schedule) {
ret = i915_sched_node_add_dependency(to->i915,
&to->sched,
&from->sched);
ret = i915_sched_node_add_dependency(&to->sched, &from->sched);
if (ret < 0)
return ret;
}
Expand Down Expand Up @@ -1175,3 +1178,37 @@ void i915_retire_requests(struct drm_i915_private *i915)
#include "selftests/mock_request.c"
#include "selftests/i915_request.c"
#endif

int __init i915_global_request_init(void)
{
global.slab_requests = KMEM_CACHE(i915_request,
SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
SLAB_TYPESAFE_BY_RCU);
if (!global.slab_requests)
return -ENOMEM;

global.slab_dependencies = KMEM_CACHE(i915_dependency,
SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT);
if (!global.slab_dependencies)
goto err_requests;

return 0;

err_requests:
kmem_cache_destroy(global.slab_requests);
return -ENOMEM;
}

void i915_global_request_shrink(void)
{
kmem_cache_shrink(global.slab_dependencies);
kmem_cache_shrink(global.slab_requests);
}

void i915_global_request_exit(void)
{
kmem_cache_destroy(global.slab_dependencies);
kmem_cache_destroy(global.slab_requests);
}
Loading

0 comments on commit 32eb6bc

Please sign in to comment.