Skip to content

Commit

Permalink
drm/i915: Change GEM throttling to be 20ms like the comment says.
Browse files Browse the repository at this point in the history
keithp didn't like the original 20ms plan because a cooperative client could
be starved by an uncooperative client.  There may even have been problems
with cooperative clients versus cooperative clients.  So keithp changed
throttle to just wait for the second to last seqno emitted by that client.
It worked well, until we started getting more round-trips to the server
due to DRI2 -- the server throttles in BlockHandler, and so if you did more
than one round trip after finishing your frame, you'd end up unintentionally
syncing to the swap.

Fix this by keeping track of the client's requests, so the client can wait
when it has an outstanding request over 20ms old.  This should have
non-starving behavior, good behavior in the presence of restarts, and less
waiting.  Improves high-settings openarena performance on my GM45 by 50%.

Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
  • Loading branch information
Eric Anholt committed Jun 4, 2009
1 parent 1fd1c62 commit b962442
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 19 deletions.
4 changes: 2 additions & 2 deletions drivers/gpu/drm/i915/i915_dma.c
Original file line number Diff line number Diff line change
Expand Up @@ -1273,8 +1273,7 @@ int i915_driver_open(struct drm_device *dev, struct drm_file *file_priv)

file_priv->driver_priv = i915_file_priv;

i915_file_priv->mm.last_gem_seqno = 0;
i915_file_priv->mm.last_gem_throttle_seqno = 0;
INIT_LIST_HEAD(&i915_file_priv->mm.request_list);

return 0;
}
Expand Down Expand Up @@ -1311,6 +1310,7 @@ void i915_driver_lastclose(struct drm_device * dev)
void i915_driver_preclose(struct drm_device * dev, struct drm_file *file_priv)
{
drm_i915_private_t *dev_priv = dev->dev_private;
i915_gem_release(dev, file_priv);
if (!drm_core_check_feature(dev, DRIVER_MODESET))
i915_mem_release(dev, file_priv, dev_priv->agp_heap);
}
Expand Down
7 changes: 5 additions & 2 deletions drivers/gpu/drm/i915/i915_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -498,13 +498,16 @@ struct drm_i915_gem_request {
/** Time at which this request was emitted, in jiffies. */
unsigned long emitted_jiffies;

/** global list entry for this request */
struct list_head list;

/** file_priv list entry for this request */
struct list_head client_list;
};

struct drm_i915_file_private {
struct {
uint32_t last_gem_seqno;
uint32_t last_gem_throttle_seqno;
struct list_head request_list;
} mm;
};

Expand Down
69 changes: 54 additions & 15 deletions drivers/gpu/drm/i915/i915_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -1481,14 +1481,19 @@ i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
* Returned sequence numbers are nonzero on success.
*/
static uint32_t
i915_add_request(struct drm_device *dev, uint32_t flush_domains)
i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
uint32_t flush_domains)
{
drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_i915_file_private *i915_file_priv = NULL;
struct drm_i915_gem_request *request;
uint32_t seqno;
int was_empty;
RING_LOCALS;

if (file_priv != NULL)
i915_file_priv = file_priv->driver_priv;

request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER);
if (request == NULL)
return 0;
Expand All @@ -1515,6 +1520,12 @@ i915_add_request(struct drm_device *dev, uint32_t flush_domains)
request->emitted_jiffies = jiffies;
was_empty = list_empty(&dev_priv->mm.request_list);
list_add_tail(&request->list, &dev_priv->mm.request_list);
if (i915_file_priv) {
list_add_tail(&request->client_list,
&i915_file_priv->mm.request_list);
} else {
INIT_LIST_HEAD(&request->client_list);
}

/* Associate any objects on the flushing list matching the write
* domain we're flushing with our flush.
Expand Down Expand Up @@ -1664,6 +1675,7 @@ i915_gem_retire_requests(struct drm_device *dev)
i915_gem_retire_request(dev, request);

list_del(&request->list);
list_del(&request->client_list);
drm_free(request, sizeof(*request), DRM_MEM_DRIVER);
} else
break;
Expand Down Expand Up @@ -1977,7 +1989,7 @@ i915_gem_evict_something(struct drm_device *dev)
i915_gem_flush(dev,
obj->write_domain,
obj->write_domain);
i915_add_request(dev, obj->write_domain);
i915_add_request(dev, NULL, obj->write_domain);

obj = NULL;
continue;
Expand Down Expand Up @@ -2248,7 +2260,7 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write)
i915_gem_flush(dev,
I915_GEM_GPU_DOMAINS,
I915_GEM_GPU_DOMAINS);
seqno = i915_add_request(dev,
seqno = i915_add_request(dev, NULL,
I915_GEM_GPU_DOMAINS);
if (seqno == 0)
return -ENOMEM;
Expand Down Expand Up @@ -2452,7 +2464,7 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)

/* Queue the GPU write cache flushing we need. */
i915_gem_flush(dev, 0, obj->write_domain);
seqno = i915_add_request(dev, obj->write_domain);
seqno = i915_add_request(dev, NULL, obj->write_domain);
obj->write_domain = 0;
i915_gem_object_move_to_active(obj, seqno);
}
Expand Down Expand Up @@ -3089,6 +3101,10 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev,
/* Throttle our rendering by waiting until the ring has completed our requests
* emitted over 20 msec ago.
*
* Note that if we were to use the current jiffies each time around the loop,
* we wouldn't escape the function with any frames outstanding if the time to
* render a frame was over 20ms.
*
* This should get us reasonable parallelism between CPU and GPU but also
* relatively low latency when blocking on a particular request to finish.
*/
Expand All @@ -3097,15 +3113,25 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
{
struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
int ret = 0;
uint32_t seqno;
unsigned long recent_enough = jiffies - msecs_to_jiffies(20);

mutex_lock(&dev->struct_mutex);
seqno = i915_file_priv->mm.last_gem_throttle_seqno;
i915_file_priv->mm.last_gem_throttle_seqno =
i915_file_priv->mm.last_gem_seqno;
if (seqno)
ret = i915_wait_request(dev, seqno);
while (!list_empty(&i915_file_priv->mm.request_list)) {
struct drm_i915_gem_request *request;

request = list_first_entry(&i915_file_priv->mm.request_list,
struct drm_i915_gem_request,
client_list);

if (time_after_eq(request->emitted_jiffies, recent_enough))
break;

ret = i915_wait_request(dev, request->seqno);
if (ret != 0)
break;
}
mutex_unlock(&dev->struct_mutex);

return ret;
}

Expand Down Expand Up @@ -3187,7 +3213,6 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
struct drm_i915_gem_execbuffer *args = data;
struct drm_i915_gem_exec_object *exec_list = NULL;
struct drm_gem_object **object_list = NULL;
Expand Down Expand Up @@ -3363,7 +3388,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
dev->invalidate_domains,
dev->flush_domains);
if (dev->flush_domains)
(void)i915_add_request(dev, dev->flush_domains);
(void)i915_add_request(dev, file_priv,
dev->flush_domains);
}

for (i = 0; i < args->buffer_count; i++) {
Expand Down Expand Up @@ -3412,9 +3438,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
* *some* interrupts representing completion of buffers that we can
* wait on when trying to clear up gtt space).
*/
seqno = i915_add_request(dev, flush_domains);
seqno = i915_add_request(dev, file_priv, flush_domains);
BUG_ON(seqno == 0);
i915_file_priv->mm.last_gem_seqno = seqno;
for (i = 0; i < args->buffer_count; i++) {
struct drm_gem_object *obj = object_list[i];

Expand Down Expand Up @@ -3802,7 +3827,7 @@ i915_gem_idle(struct drm_device *dev)
*/
i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT),
~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
seqno = i915_add_request(dev, ~I915_GEM_DOMAIN_CPU);
seqno = i915_add_request(dev, NULL, ~I915_GEM_DOMAIN_CPU);

if (seqno == 0) {
mutex_unlock(&dev->struct_mutex);
Expand Down Expand Up @@ -4352,3 +4377,17 @@ i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
drm_agp_chipset_flush(dev);
return 0;
}

void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
{
struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;

/* Clean up our request list when the client is going away, so that
* later retire_requests won't dereference our soon-to-be-gone
* file_priv.
*/
mutex_lock(&dev->struct_mutex);
while (!list_empty(&i915_file_priv->mm.request_list))
list_del_init(i915_file_priv->mm.request_list.next);
mutex_unlock(&dev->struct_mutex);
}

0 comments on commit b962442

Please sign in to comment.