diff options
author | Daniel Vetter <daniel.vetter@ffwll.ch> | 2011-02-25 23:40:27 +0100 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2011-02-26 21:58:17 +0100 |
commit | 5be73ac77859278d499070e9c20364b4ee61ce5f (patch) | |
tree | 7e527e156ac6c7bb74bc6901fd2f968fe993af3d | |
parent | bef5a135aac5495035e091fe5f7e451c4d103a54 (diff) |
i915g: implement cache flushing
With an extremely dumb strategy. But it's the same i915c employs.
Also improve the hw_atom code slightly by statically specifying the
required batch space. For extremely variably stuff (shaders, constants)
it would probably be better to add a new parameter to the hw_atom->update
function.
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r-- | src/gallium/drivers/i915/i915_blit.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_context.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_context.h | 13 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_flush.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_state_emit.c | 47 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_state_static.c | 3 |
6 files changed, 62 insertions, 8 deletions
diff --git a/src/gallium/drivers/i915/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c index 9a390e5134..f885417f8e 100644 --- a/src/gallium/drivers/i915/i915_blit.c +++ b/src/gallium/drivers/i915/i915_blit.c @@ -81,6 +81,8 @@ i915_fill_blit(struct i915_context *i915, OUT_BATCH(((y + h) << 16) | (x + w)); OUT_RELOC_FENCED(dst_buffer, I915_USAGE_2D_TARGET, dst_offset); OUT_BATCH(color); + + i915_set_flush_dirty(i915, I915_FLUSH_CACHE); } void @@ -153,4 +155,6 @@ i915_copy_blit(struct i915_context *i915, OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(((int) src_pitch & 0xffff)); OUT_RELOC_FENCED(src_buffer, I915_USAGE_2D_SOURCE, src_offset); + + i915_set_flush_dirty(i915, I915_FLUSH_CACHE); } diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index f970f8a700..cbf919754e 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -168,6 +168,7 @@ i915_create_context(struct pipe_screen *screen, void *priv) i915->hardware_dirty = ~0; i915->immediate_dirty = ~0; i915->dynamic_dirty = ~0; + i915->flush_dirty = 0; /* Batch stream debugging is a bit hacked up at the moment: */ diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index e77641149b..a627b7ac86 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -237,6 +237,7 @@ struct i915_context { unsigned hardware_dirty; unsigned immediate_dirty; unsigned dynamic_dirty; + unsigned flush_dirty; struct i915_winsys_buffer *validation_buffers[2 + 1 + I915_TEX_UNITS]; int num_validation_buffers; @@ -280,6 +281,18 @@ struct i915_context { #define I915_HW_CONSTANTS (1<<I915_CACHE_CONSTANTS) #define I915_HW_IMMEDIATE (1<<(I915_MAX_CACHE+0)) #define I915_HW_INVARIANT (1<<(I915_MAX_CACHE+1)) +#define I915_HW_FLUSH (1<<(I915_MAX_CACHE+1)) + +/* hw flush handling */ +#define I915_FLUSH_CACHE 1 +#define I915_PIPELINE_FLUSH 2 + +static INLINE +void i915_set_flush_dirty(struct i915_context *i915, unsigned flush) +{ + i915->hardware_dirty |= I915_HW_FLUSH; + i915->flush_dirty |= flush; +} /*********************************************************************** diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index 911c051d1f..22a2c7b2cb 100644 --- a/src/gallium/drivers/i915/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c @@ -96,4 +96,6 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence) i915->hardware_dirty = ~0; i915->immediate_dirty = ~0; i915->dynamic_dirty = ~0; + /* kernel emits flushes in between batchbuffers */ + i915->flush_dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 29001bc0b5..4fc0133198 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -42,7 +42,7 @@ struct i915_tracked_hw_state { const char *name; void (*update)(struct i915_context *); void (*emit)(struct i915_context *); - unsigned dirty; + unsigned dirty, batch_space; }; static unsigned translate_format( enum pipe_format format ) @@ -114,6 +114,21 @@ buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling) } static void +emit_flush(struct i915_context *i915) +{ + /* Cache handling is very cheap atm. State handling can request to flushes: + * - I915_FLUSH_CACHE which is a flush everything request and + * - I915_PIPELINE_FLUSH which is specifically for the draw_offset flush. + * Because the cache handling is so dumb, no explicit "invalidate map cache". + * Also, the first is a strict superset of the latter, so the following logic + * works. */ + if (i915->flush_dirty & I915_FLUSH_CACHE) + OUT_BATCH(MI_FLUSH | FLUSH_MAP_CACHE); + else if (i915->flush_dirty & I915_PIPELINE_FLUSH) + OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE); +} + +static void update_immediate(struct i915_context *i915) { if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) @@ -155,21 +170,25 @@ update_map(struct i915_context *i915) } const static struct i915_tracked_hw_state hw_atoms[] = { + { "flush", NULL, emit_flush, I915_HW_FLUSH, 1 }, { "immediate", update_immediate, NULL, I915_HW_IMMEDIATE }, { "static", update_static, NULL, I915_HW_STATIC }, { "map", update_map, NULL, I915_HW_MAP } }; static boolean -i915_validate_state(struct i915_context *i915) +i915_validate_state(struct i915_context *i915, unsigned *batch_space) { int i; i915->num_validation_buffers = 0; + *batch_space = 0; for (i = 0; i < Elements(hw_atoms); i++) - if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].update) + if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].update) { hw_atoms[i].update(i915); + *batch_space += hw_atoms[i].batch_space; + } if (i915->num_validation_buffers == 0) return TRUE; @@ -181,11 +200,22 @@ i915_validate_state(struct i915_context *i915) return TRUE; } +static void +emit_state(struct i915_context *i915) +{ + int i; + + for (i = 0; i < Elements(hw_atoms); i++) + if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].emit) + hw_atoms[i].emit(i915); +} + /* Push the state into the sarea and/or texture memory. */ void i915_emit_hardware_state(struct i915_context *i915 ) { + unsigned batch_space; /* XXX: there must be an easier way */ const unsigned dwords = ( 14 + 7 + @@ -211,20 +241,21 @@ i915_emit_hardware_state(struct i915_context *i915 ) if (I915_DBG_ON(DBG_ATOMS)) i915_dump_hardware_dirty(i915, __FUNCTION__); - if (!i915_validate_state(i915)) { + if (!i915_validate_state(i915, &batch_space)) { FLUSH_BATCH(NULL); - assert(i915_validate_state(i915)); + assert(i915_validate_state(i915, &batch_space)); } - if(!BEGIN_BATCH(dwords, relocs)) { + if(!BEGIN_BATCH(batch_space + dwords, relocs)) { FLUSH_BATCH(NULL); - assert(i915_validate_state(i915)); - assert(BEGIN_BATCH(dwords, relocs)); + assert(i915_validate_state(i915, &batch_space)); + assert(BEGIN_BATCH(batch_space + dwords, relocs)); } save_ptr = (uintptr_t)i915->batch->ptr; save_relocs = i915->batch->relocs; + emit_state(i915); /* 14 dwords, 0 relocs */ if (i915->hardware_dirty & I915_HW_INVARIANT) { diff --git a/src/gallium/drivers/i915/i915_state_static.c b/src/gallium/drivers/i915/i915_state_static.c index dc9a4c1e2f..d858e01d89 100644 --- a/src/gallium/drivers/i915/i915_state_static.c +++ b/src/gallium/drivers/i915/i915_state_static.c @@ -38,6 +38,9 @@ static void update_framebuffer(struct i915_context *i915) /* HW emit currently references framebuffer state directly: */ i915->hardware_dirty |= I915_HW_STATIC; + + /* flush the cache in case we sample from the old renderbuffers */ + i915_set_flush_dirty(i915, I915_FLUSH_CACHE); } struct i915_tracked_state i915_hw_framebuffer = { |