Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 329255
b: refs/heads/master
c: 6ac42f4
h: refs/heads/master
i:
  329253: e7ca2a4
  329251: 0c1ea76
  329247: 048718b
v: v3
  • Loading branch information
Daniel Vetter committed Jul 25, 2012
1 parent d81d86a commit cbb7ad0
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 203 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 26b9c4a57fc3ff0ae6032548870bebfa5cd0de3d
refs/heads/master: 6ac42f4148bc27e5ffd18a9ab0eac57f58822af4
222 changes: 20 additions & 202 deletions trunk/drivers/gpu/drm/i915/i915_gem_execbuffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,180 +34,6 @@
#include "intel_drv.h"
#include <linux/dma_remapping.h>

struct change_domains {
uint32_t invalidate_domains;
uint32_t flush_domains;
uint32_t flush_rings;
uint32_t flips;
};

/*
* Set the next domain for the specified object. This
* may not actually perform the necessary flushing/invaliding though,
* as that may want to be batched with other set_domain operations
*
* This is (we hope) the only really tricky part of gem. The goal
* is fairly simple -- track which caches hold bits of the object
* and make sure they remain coherent. A few concrete examples may
* help to explain how it works. For shorthand, we use the notation
* (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
* a pair of read and write domain masks.
*
* Case 1: the batch buffer
*
* 1. Allocated
* 2. Written by CPU
* 3. Mapped to GTT
* 4. Read by GPU
* 5. Unmapped from GTT
* 6. Freed
*
* Let's take these a step at a time
*
* 1. Allocated
* Pages allocated from the kernel may still have
* cache contents, so we set them to (CPU, CPU) always.
* 2. Written by CPU (using pwrite)
* The pwrite function calls set_domain (CPU, CPU) and
* this function does nothing (as nothing changes)
* 3. Mapped by GTT
* This function asserts that the object is not
* currently in any GPU-based read or write domains
* 4. Read by GPU
* i915_gem_execbuffer calls set_domain (COMMAND, 0).
* As write_domain is zero, this function adds in the
* current read domains (CPU+COMMAND, 0).
* flush_domains is set to CPU.
* invalidate_domains is set to COMMAND
* clflush is run to get data out of the CPU caches
* then i915_dev_set_domain calls i915_gem_flush to
* emit an MI_FLUSH and drm_agp_chipset_flush
* 5. Unmapped from GTT
* i915_gem_object_unbind calls set_domain (CPU, CPU)
* flush_domains and invalidate_domains end up both zero
* so no flushing/invalidating happens
* 6. Freed
* yay, done
*
* Case 2: The shared render buffer
*
* 1. Allocated
* 2. Mapped to GTT
* 3. Read/written by GPU
* 4. set_domain to (CPU,CPU)
* 5. Read/written by CPU
* 6. Read/written by GPU
*
* 1. Allocated
* Same as last example, (CPU, CPU)
* 2. Mapped to GTT
* Nothing changes (assertions find that it is not in the GPU)
* 3. Read/written by GPU
* execbuffer calls set_domain (RENDER, RENDER)
* flush_domains gets CPU
* invalidate_domains gets GPU
* clflush (obj)
* MI_FLUSH and drm_agp_chipset_flush
* 4. set_domain (CPU, CPU)
* flush_domains gets GPU
* invalidate_domains gets CPU
* wait_rendering (obj) to make sure all drawing is complete.
* This will include an MI_FLUSH to get the data from GPU
* to memory
* clflush (obj) to invalidate the CPU cache
* Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
* 5. Read/written by CPU
* cache lines are loaded and dirtied
* 6. Read written by GPU
* Same as last GPU access
*
* Case 3: The constant buffer
*
* 1. Allocated
* 2. Written by CPU
* 3. Read by GPU
* 4. Updated (written) by CPU again
* 5. Read by GPU
*
* 1. Allocated
* (CPU, CPU)
* 2. Written by CPU
* (CPU, CPU)
* 3. Read by GPU
* (CPU+RENDER, 0)
* flush_domains = CPU
* invalidate_domains = RENDER
* clflush (obj)
* MI_FLUSH
* drm_agp_chipset_flush
* 4. Updated (written) by CPU again
* (CPU, CPU)
* flush_domains = 0 (no previous write domain)
* invalidate_domains = 0 (no new read domains)
* 5. Read by GPU
* (CPU+RENDER, 0)
* flush_domains = CPU
* invalidate_domains = RENDER
* clflush (obj)
* MI_FLUSH
* drm_agp_chipset_flush
*/
static void
i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
struct intel_ring_buffer *ring,
struct change_domains *cd)
{
uint32_t invalidate_domains = 0, flush_domains = 0;

/*
* If the object isn't moving to a new write domain,
* let the object stay in multiple read domains
*/
if (obj->base.pending_write_domain == 0)
obj->base.pending_read_domains |= obj->base.read_domains;

/*
* Flush the current write domain if
* the new read domains don't match. Invalidate
* any read domains which differ from the old
* write domain
*/
if (obj->base.write_domain &&
(((obj->base.write_domain != obj->base.pending_read_domains ||
obj->ring != ring)) ||
(obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
flush_domains |= obj->base.write_domain;
invalidate_domains |=
obj->base.pending_read_domains & ~obj->base.write_domain;
}
/*
* Invalidate any read caches which may have
* stale data. That is, any new read domains.
*/
invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
i915_gem_clflush_object(obj);

if (obj->base.pending_write_domain)
cd->flips |= atomic_read(&obj->pending_flip);

/* The actual obj->write_domain will be updated with
* pending_write_domain after we emit the accumulated flush for all
* of our domain changes in execbuffers (which clears objects'
* write_domains). So if we have a current write domain that we
* aren't changing, set pending_write_domain to that.
*/
if (flush_domains == 0 && obj->base.pending_write_domain == 0)
obj->base.pending_write_domain = obj->base.write_domain;

cd->invalidate_domains |= invalidate_domains;
cd->flush_domains |= flush_domains;
if (flush_domains & I915_GEM_GPU_DOMAINS)
cd->flush_rings |= intel_ring_flag(obj->ring);
if (invalidate_domains & I915_GEM_GPU_DOMAINS)
cd->flush_rings |= intel_ring_flag(ring);
}

struct eb_objects {
int and;
struct hlist_head buckets[0];
Expand Down Expand Up @@ -810,18 +636,6 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
return ret;
}

static void
i915_gem_execbuffer_flush(struct drm_device *dev,
uint32_t invalidate_domains,
uint32_t flush_domains)
{
if (flush_domains & I915_GEM_DOMAIN_CPU)
intel_gtt_chipset_flush();

if (flush_domains & I915_GEM_DOMAIN_GTT)
wmb();
}

static int
i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
{
Expand Down Expand Up @@ -854,37 +668,41 @@ i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
return 0;
}


static int
i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
struct list_head *objects)
{
struct drm_i915_gem_object *obj;
struct change_domains cd;
uint32_t flush_domains = 0;
uint32_t flips = 0;
int ret;

memset(&cd, 0, sizeof(cd));
list_for_each_entry(obj, objects, exec_list)
i915_gem_object_set_to_gpu_domain(obj, ring, &cd);

if (cd.invalidate_domains | cd.flush_domains) {
i915_gem_execbuffer_flush(ring->dev,
cd.invalidate_domains,
cd.flush_domains);
}

if (cd.flips) {
ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
list_for_each_entry(obj, objects, exec_list) {
ret = i915_gem_object_sync(obj, ring);
if (ret)
return ret;

if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
i915_gem_clflush_object(obj);

if (obj->base.pending_write_domain)
flips |= atomic_read(&obj->pending_flip);

flush_domains |= obj->base.write_domain;
}

list_for_each_entry(obj, objects, exec_list) {
ret = i915_gem_object_sync(obj, ring);
if (flips) {
ret = i915_gem_execbuffer_wait_for_flips(ring, flips);
if (ret)
return ret;
}

if (flush_domains & I915_GEM_DOMAIN_CPU)
intel_gtt_chipset_flush();

if (flush_domains & I915_GEM_DOMAIN_GTT)
wmb();

/* Unconditionally invalidate gpu caches and ensure that we do flush
* any residual writes from the previous batch.
*/
Expand Down

0 comments on commit cbb7ad0

Please sign in to comment.