diff options
author | Keith Whitwell <keith@tungstengraphics.com> | 2007-03-18 09:51:14 +0000 |
---|---|---|
committer | Keith Whitwell <keith@tungstengraphics.com> | 2007-03-18 09:51:14 +0000 |
commit | 7c8321dd2790f2ec54387b9bf727193988308108 (patch) | |
tree | 03ed7c6f5385e71a66bb5c558a4dd05aefcb14a8 | |
parent | 013b0c3138d9de0fc13b7348f9e8d68c95f81b3b (diff) |
Add a final state differencing step to minimise emitted state.
Rearrange packets so that there is just a single static indirect state.
Move stipple to dynamic state.
Move all actualy state emit code to the differencer.
-rw-r--r-- | src/mesa/drivers/dri/i915tex/Makefile | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i915tex/i915_cache.c | 100 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i915tex/i915_cache.h | 11 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i915tex/i915_context.h | 67 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i915tex/i915_differencer.c | 283 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i915tex/i915_state.c | 30 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i915tex/i915_state.h | 18 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i915tex/i915_state_dynamic.c | 167 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i915tex/i915_state_immediate.c | 119 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i915tex/i915_state_static.c | 354 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i915tex/i915_vtbl.c | 8 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i915tex/intel_batchbuffer.c | 4 |
12 files changed, 643 insertions, 519 deletions
diff --git a/src/mesa/drivers/dri/i915tex/Makefile b/src/mesa/drivers/dri/i915tex/Makefile index 7c7ad84d51..90d0e1396c 100644 --- a/src/mesa/drivers/dri/i915tex/Makefile +++ b/src/mesa/drivers/dri/i915tex/Makefile @@ -9,6 +9,7 @@ MINIGLX_SOURCES = server/intel_dri.c DRIVER_SOURCES = \ i915_cache.c \ i915_context.c \ + i915_differencer.c \ i915_fpc.c \ i915_fpc_debug.c \ i915_fpc_emit.c \ diff --git a/src/mesa/drivers/dri/i915tex/i915_cache.c b/src/mesa/drivers/dri/i915tex/i915_cache.c index 448d8a4300..51d7b5eb73 100644 --- a/src/mesa/drivers/dri/i915tex/i915_cache.c +++ b/src/mesa/drivers/dri/i915tex/i915_cache.c @@ -58,8 +58,6 @@ struct i915_cache { GLuint state_type; struct i915_cache_item **items; GLuint size, n_items; - GLuint force_load_flag; - GLuint last_addr; }; struct i915_cache_context { @@ -68,22 +66,6 @@ struct i915_cache_context { }; -static void emit_load_indirect( struct intel_context *intel, - GLuint state_type, - GLuint force_load_flag, - GLuint offset, - GLuint dwords ) -{ - BEGIN_BATCH(3,0); - OUT_BATCH( _3DSTATE_LOAD_INDIRECT | state_type | (1<<14) | 1); - OUT_RELOC( intel->batch->buffer, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE, - DRM_BO_MASK_MEM | DRM_BO_FLAG_EXE, - ( offset | force_load_flag | SIS0_BUFFER_VALID ) ); - OUT_BATCH( dwords - 1 ); - ADVANCE_BATCH(); -} - static GLuint emit_packet( struct intel_context *intel, const struct i915_cache_packet *packet ) @@ -113,7 +95,6 @@ static GLuint emit_packet( struct intel_context *intel, packet->dword[packet->reloc[i].dword].u ); } - return offset; } @@ -229,29 +210,7 @@ void i915_cache_emit(struct i915_cache_context *cctx, struct intel_context *intel = &cctx->i915->intel; GLuint size = packet_size( packet ); -#if 1 - GLuint hash = hash_packet( packet, size ); - struct i915_cache *cache = &cctx->cache[packet->cache_id]; - GLuint addr; - - addr = search_cache( cache, hash, packet->dword, size ); - if (addr == 0) - addr = upload_cache( cache, intel, hash, packet, size ); - - /* Always have to tell the hardware about it, unless this is the - * same as last time! - */ - if (addr != cache->last_addr) { - emit_load_indirect( intel, - cache->state_type, - cache->force_load_flag, - addr, - packet->nr_dwords ); - - cache->force_load_flag = 0; - cache->last_addr = addr; - } -#else +#ifdef I915_NO_INDIRECT_STATE GLuint i; BEGIN_BATCH(packet->nr_dwords, 0); @@ -268,6 +227,17 @@ void i915_cache_emit(struct i915_cache_context *cctx, OUT_BATCH(packet->dword[i].u); ADVANCE_BATCH(); +#else + GLuint hash = hash_packet( packet, size ); + struct i915_cache *cache = &cctx->cache[packet->cache_id]; + GLuint addr; + + addr = search_cache( cache, hash, packet->dword, size ); + if (addr == 0) + addr = upload_cache( cache, intel, hash, packet, size ); + + cctx->i915->current.offsets[packet->cache_id] = addr; + cctx->i915->current.sizes[packet->cache_id] = packet->nr_dwords; #endif } @@ -278,6 +248,9 @@ void i915_cache_emit(struct i915_cache_context *cctx, * to fixup by the memory manager as they contain absolute agp * offsets, so we need to ensure there is a fresh version of the * struct available to receive the fixup. + * + * Also, temporarily, need to do this for every cache because they are + * all being stored in the batchbuffer!! */ static void clear_cache( struct i915_cache *cache ) { @@ -294,11 +267,6 @@ static void clear_cache( struct i915_cache *cache ) } cache->n_items = 0; - - /* Make sure hardware knows we've abandoned old data: - */ - cache->force_load_flag = SIS0_FORCE_LOAD; - cache->last_addr = ~0; } @@ -326,6 +294,16 @@ struct i915_cache_context *i915_create_caches( struct i915_context *i915 ) cctx->i915 = i915; init_cache( cctx, + "STATIC", + I915_CACHE_STATIC, + LI0_STATE_STATIC_INDIRECT ); + + init_cache( cctx, + "MAP", + I915_CACHE_MAP, + LI0_STATE_MAP ); + + init_cache( cctx, "SAMPLER", I915_CACHE_SAMPLER, LI0_STATE_SAMPLER ); @@ -340,34 +318,6 @@ struct i915_cache_context *i915_create_caches( struct i915_context *i915 ) I915_CACHE_CONSTANTS, LI0_STATE_CONSTANTS ); - init_cache( cctx, - "MAP", - I915_CACHE_MAP, - LI0_STATE_MAP ); - - /* These are all part of the LI0_STATIC_INDIRECT bucket. Not - * really sure how to handle these. - */ - init_cache( cctx, - "BUFFERS", - I915_CACHE_BUFFERS, - LI0_STATE_STATIC_INDIRECT ); - - init_cache( cctx, - "STIPPLE", - I915_CACHE_STIPPLE, - LI0_STATE_STATIC_INDIRECT ); - - init_cache( cctx, - "SCISSOR", - I915_CACHE_SCISSOR, - LI0_STATE_STATIC_INDIRECT ); - - init_cache( cctx, - "INVARIENT", - I915_CACHE_INVARIENT, - LI0_STATE_STATIC_INDIRECT ); - return cctx; } diff --git a/src/mesa/drivers/dri/i915tex/i915_cache.h b/src/mesa/drivers/dri/i915tex/i915_cache.h index ca48b2daa1..41ee2a162d 100644 --- a/src/mesa/drivers/dri/i915tex/i915_cache.h +++ b/src/mesa/drivers/dri/i915tex/i915_cache.h @@ -37,17 +37,6 @@ struct i915_cache_context; struct i915_cache_packet; struct i915_context; -enum { - I915_CACHE_SAMPLER, - I915_CACHE_PROGRAM, - I915_CACHE_CONSTANTS, - I915_CACHE_MAP, - I915_CACHE_BUFFERS, - I915_CACHE_STIPPLE, - I915_CACHE_SCISSOR, - I915_CACHE_INVARIENT, - I915_MAX_CACHE -}; struct i915_cache_context *i915_create_caches( struct i915_context *i915 ); diff --git a/src/mesa/drivers/dri/i915tex/i915_context.h b/src/mesa/drivers/dri/i915tex/i915_context.h index 9ae3ee97e9..ecb26ebc9a 100644 --- a/src/mesa/drivers/dri/i915tex/i915_context.h +++ b/src/mesa/drivers/dri/i915tex/i915_context.h @@ -38,7 +38,8 @@ #define I915_NEW_INPUT_SIZES (INTEL_NEW_DRIVER0<<0) #define I915_NEW_VERTEX_FORMAT (INTEL_NEW_DRIVER0<<1) #define I915_NEW_DYNAMIC_INDIRECT (INTEL_NEW_DRIVER0<<2) - +#define I915_NEW_CACHED_INDIRECT (INTEL_NEW_DRIVER0<<3) +#define I915_NEW_IMMEDIATE (INTEL_NEW_DRIVER0<<4) /* Hardware version of a parsed fragment program. "Derived" from the @@ -81,18 +82,60 @@ struct i915_fragment_program #define I915_TEX_UNITS 8 #define I915_DYNAMIC_MODES4 0 -#define I915_DYNAMIC_DEPTHSCALE_0 1 +#define I915_DYNAMIC_DEPTHSCALE_0 1 /* just the header */ #define I915_DYNAMIC_DEPTHSCALE_1 2 #define I915_DYNAMIC_IAB 3 -#define I915_DYNAMIC_BC_0 4 +#define I915_DYNAMIC_BC_0 4 /* just the header */ #define I915_DYNAMIC_BC_1 5 -#define I915_DYNAMIC_BFO_0 6 +#define I915_DYNAMIC_BFO_0 6 #define I915_DYNAMIC_BFO_1 7 -#define I915_DYNAMIC_SIZE 8 +#define I915_DYNAMIC_STP_0 8 +#define I915_DYNAMIC_STP_1 9 +#define I915_MAX_DYNAMIC 10 + + +#define I915_IMMEDIATE_S0 0 +#define I915_IMMEDIATE_S1 1 +#define I915_IMMEDIATE_S2 2 +#define I915_IMMEDIATE_S3 3 +#define I915_IMMEDIATE_S4 4 +#define I915_IMMEDIATE_S5 5 +#define I915_IMMEDIATE_S6 6 +#define I915_IMMEDIATE_S7 7 +#define I915_MAX_IMMEDIATE 8 + +/* These must mach the order of LI0_STATE_* bits, as they will be used + * to generate hardware packets: + */ +#define I915_CACHE_STATIC 0 +#define I915_CACHE_ZERO 1 /* placeholder */ +#define I915_CACHE_SAMPLER 2 +#define I915_CACHE_MAP 3 +#define I915_CACHE_PROGRAM 4 +#define I915_CACHE_CONSTANTS 5 +#define I915_MAX_CACHE 6 struct i915_cache_context; +/* Use to calculate differences between state emitted to hardware and + * current driver-calculated state. + */ +struct i915_state +{ + GLuint immediate[I915_MAX_IMMEDIATE]; + GLuint dynamic[I915_MAX_DYNAMIC]; + + GLuint offsets[I915_MAX_CACHE]; + GLuint sizes[I915_MAX_CACHE]; + + /* Something for vbo: + */ + struct _DriBufferObject *vbo; + + GLuint id; /* track lost context events */ +}; + struct i915_context { @@ -119,15 +162,13 @@ struct i915_context GLuint LIS4; } vertex_format; - /* Used for short-circuiting packets. Won't work for packets - * containing relocations. This is zero'd out after lost_context - * events. + /* Used for short-circuiting state updates. Won't work for packets + * containing relocations, unless they are specifically invalidated + * after batchbuffer flushes - currently we zero out the whole + * state after lost_context events. */ - struct { - GLuint buf[I915_DYNAMIC_SIZE]; - GLboolean done_reset; - } dyn_indirect; - + struct i915_state current; + struct i915_state hardware; }; diff --git a/src/mesa/drivers/dri/i915tex/i915_differencer.c b/src/mesa/drivers/dri/i915tex/i915_differencer.c new file mode 100644 index 0000000000..15cb630a80 --- /dev/null +++ b/src/mesa/drivers/dri/i915tex/i915_differencer.c @@ -0,0 +1,283 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "i915_context.h" +#include "i915_state.h" +#include "i915_reg.h" +#include "intel_batchbuffer.h" + + +static GLuint count_bits( GLuint mask ) +{ + GLuint i, nr = 0; + + for (i = 1; mask >= i; i <<= 1) + if (mask & i) + nr++; + + return nr; +} + +static void emit_immediates( struct intel_context *intel, + const struct i915_state *from, + const struct i915_state *to ) +{ + GLuint dirty = 0; + GLuint i; + + /* Lost context? + */ + if (from->id != to->id) { + dirty = (1<<I915_MAX_IMMEDIATE) - 1; + } + else { + for (i = 0; i < I915_MAX_IMMEDIATE; i++) { + if (from->immediate[i] != to->immediate[i]) { + dirty |= 1<<i; + } + } + + if (from->vbo != to->vbo) + dirty |= 1<<I915_IMMEDIATE_S0; + } + + if (to->vbo == NULL) + dirty &= ~(1<<I915_IMMEDIATE_S0); + + if (dirty) { + GLuint nr = count_bits(dirty); + + BEGIN_BATCH( nr + 1, 0 ); + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + (dirty << 4) | + (nr - 1)); + + if (dirty & (1<<0)) { + OUT_RELOC(to->vbo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + DRM_BO_MASK_MEM | DRM_BO_FLAG_READ, + to->immediate[0]); + } + + for (i = 1; i < I915_MAX_IMMEDIATE; i++) { + if (dirty & (1<<i)) { + OUT_BATCH( to->immediate[i] ); + } + } + + ADVANCE_BATCH(); + } +} + + + + + +/* Macro to identify whole packets that differ in any of their dwords. + */ +#define CHECK( idx, nr ) do { \ + for (i = idx; i < idx+nr; i++) { \ + if (to->dynamic[i] != from->dynamic[i]) { \ + dirty |= ((1<<nr)-1) << idx; \ + size += nr; \ + break; \ + } \ + } \ +} while (0) + + +static void emit_dynamic_indirect( struct intel_context *intel, + const struct i915_state *from, + const struct i915_state *to ) +{ + GLuint size, offset, pagetop, flags; + GLuint dirty = 0, flag = 0; + GLuint i; + + /* Lost context? + */ + if (from->id != to->id) { + dirty = (1 << I915_MAX_DYNAMIC) - 1; + flag = DIS0_BUFFER_RESET; + size = I915_MAX_DYNAMIC; + } + else { + /* Otherwise, compare the two states + */ + CHECK( I915_DYNAMIC_MODES4, 1 ); + CHECK( I915_DYNAMIC_DEPTHSCALE_0, 2 ); + CHECK( I915_DYNAMIC_IAB, 1 ); + CHECK( I915_DYNAMIC_BC_0, 2 ); + CHECK( I915_DYNAMIC_BFO_0, 2 ); + CHECK( I915_DYNAMIC_STP_0, 2 ); + } + + + offset = intel->batch->segment_finish_offset[SEGMENT_DYNAMIC_INDIRECT]; + pagetop = ALIGN(offset, 4096); + flags = DIS0_BUFFER_VALID; + + /* check if we cross a 4k boundary and if so pad to 4k and emit + * full state. + */ + if (pagetop != ALIGN(offset + size * 4, 4096)) + { + dirty = (1<<I915_MAX_DYNAMIC)-1; + size = I915_MAX_DYNAMIC; + memset(intel->batch->map + offset, 0, pagetop - offset); + offset = pagetop; + } + + /* Emit: + */ + if (0) { + GLuint segment = SEGMENT_DYNAMIC_INDIRECT; + GLuint *ptr; + + /* Emit the "load state" command, + */ + BEGIN_BATCH(2,0); + OUT_BATCH( _3DSTATE_LOAD_INDIRECT | LI0_STATE_DYNAMIC_INDIRECT | (1<<14) | 0); + OUT_RELOC( intel->batch->buffer, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE, + DRM_BO_MASK_MEM | DRM_BO_FLAG_EXE, + ((offset + size - 4) | DIS0_BUFFER_VALID | flag) ); + ADVANCE_BATCH(); + + /* XXX: + */ + assert( offset + size < intel->batch->segment_max_offset[segment]); + intel->batch->segment_finish_offset[segment] += size; + + ptr = (GLuint *)(intel->batch->map + offset); + + /* Finally emit the state: + */ + for (i = 0; i < I915_MAX_DYNAMIC; i++) { + if (dirty & (1<<i)) { + *ptr++ = to->dynamic[i]; + } + } + } +} + + + +static void emit_cached_indirect( struct intel_context *intel, + const struct i915_state *from, + const struct i915_state *to ) +{ + GLuint flag = 0; + GLuint dirty = 0; + GLuint i; + + if (from->id != to->id) { + dirty = (1<<I915_MAX_CACHE) - 1; + dirty &= ~(1<<I915_CACHE_ZERO); /* clear out placeholder */ + flag = SIS0_FORCE_LOAD; + } + else { + /* Checking the offsets is sufficient - no need to examine sizes as + * they don't change independently of offsets. + */ + for (i = 0; i < I915_MAX_CACHE; i++) { + if (from->offsets[i] != to->offsets[i]) { + dirty |= 1<<i; + } + } + } + + for (i = 0; i < I915_MAX_CACHE; i++) { + if (to->sizes[i] == 0) + dirty &= ~(1<<i); + } + + + + /* Emit the load indirect packet. The actual data has already been + * emitted to the caches. + */ + if (dirty) { + GLuint nr = count_bits(dirty); + GLuint size = nr * 2 + 1; + BEGIN_BATCH(2,0); + OUT_BATCH( _3DSTATE_LOAD_INDIRECT | (dirty<<8) | (1<<14) | (size - 2)); + + for (i = 0; i < I915_MAX_CACHE; i++) { + if (dirty & (1<<i)) { + OUT_RELOC( intel->batch->buffer, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE, + DRM_BO_MASK_MEM | DRM_BO_FLAG_EXE, + ( to->offsets[i] | flag | SIS0_BUFFER_VALID ) ); + + OUT_BATCH( to->sizes[i] ); + } + } + + ADVANCE_BATCH(); + } +} + + +static void state_differencer( struct intel_context *intel ) +{ + struct i915_context *i915 = i915_context( &intel->ctx ); + const struct i915_state *new = &i915->current; + struct i915_state *old = &i915->hardware; + GLuint flags = intel->state.dirty.intel; + + if (flags & I915_NEW_IMMEDIATE) + emit_immediates( intel, old, new ); + + if (flags & I915_NEW_DYNAMIC_INDIRECT) + emit_dynamic_indirect( intel, old, new ); + + if (flags & I915_NEW_CACHED_INDIRECT) + emit_cached_indirect( intel, old, new ); + + memcpy(old, new, sizeof(*new)); +} + +const struct intel_tracked_state i915_state_differencer = { + .dirty = { + .mesa = 0, + .intel = (I915_NEW_DYNAMIC_INDIRECT | + I915_NEW_CACHED_INDIRECT | + I915_NEW_IMMEDIATE | + INTEL_NEW_CONTEXT), + .extra = 0 + }, + .update = state_differencer +}; + + + diff --git a/src/mesa/drivers/dri/i915tex/i915_state.c b/src/mesa/drivers/dri/i915tex/i915_state.c index 237e30f816..69ef918e3b 100644 --- a/src/mesa/drivers/dri/i915tex/i915_state.c +++ b/src/mesa/drivers/dri/i915tex/i915_state.c @@ -63,7 +63,7 @@ const struct intel_tracked_state *atoms[] = */ &i915_vertex_format, - /* Immediate state. Don't make any effort to combine packets yet. + /* Immediate state. */ &i915_upload_S0S1, &i915_upload_S2S4, @@ -71,29 +71,30 @@ const struct intel_tracked_state *atoms[] = &i915_upload_S6, &i915_upload_S7, - /* Dynamic indirect. Packets are combined in a final step. + /* Dynamic indirect. */ &i915_upload_BFO, &i915_upload_BLENDCOLOR, &i915_upload_DEPTHSCALE, -/* &i915_upload_FOGCOLOR, */ -/* &i915_upload_FOGMODE, */ &i915_upload_IAB, &i915_upload_MODES4, - &i915_upload_dynamic_indirect, - - /* Static indirect state. - */ - &i915_upload_invarient, - &i915_upload_buffers, - &i915_upload_scissor, - &i915_upload_stipple, + &i915_upload_STIPPLE, /* Other indirect state. Also includes program state, above. */ &i915_upload_maps, /* must do before samplers */ &i915_upload_samplers, - &i915_upload_constants /* will be patched out at runtime */ + &i915_upload_constants, /* will be patched out at runtime */ + &i915_upload_static, + + + /* Combine packets, diff against hardware state and emit a minimal + * set of changes: + * + * XXX: Could delay this and only make this final step at the point + * where the first triangle gets drawn. + */ + &i915_state_differencer, }; @@ -115,6 +116,9 @@ void i915_init_state( struct i915_context *i915 ) _mesa_memcpy(&i915->constants.tracked_state, &i915_upload_constants, sizeof(i915_upload_constants)); + + i915->hardware.id = 0; + i915->current.id = 1; } diff --git a/src/mesa/drivers/dri/i915tex/i915_state.h b/src/mesa/drivers/dri/i915tex/i915_state.h index ca625bf3ff..1d430f9a9c 100644 --- a/src/mesa/drivers/dri/i915tex/i915_state.h +++ b/src/mesa/drivers/dri/i915tex/i915_state.h @@ -61,21 +61,20 @@ const struct intel_tracked_state i915_upload_FOGCOLOR; const struct intel_tracked_state i915_upload_FOGMODE; const struct intel_tracked_state i915_upload_IAB; const struct intel_tracked_state i915_upload_MODES4; +const struct intel_tracked_state i915_upload_STIPPLE; const struct intel_tracked_state i915_upload_dynamic_indirect; -/* Static indirect: - */ -const struct intel_tracked_state i915_upload_invarient; -const struct intel_tracked_state i915_upload_buffers; -const struct intel_tracked_state i915_upload_scissor; -const struct intel_tracked_state i915_upload_stipple; - /* Other indirect: */ const struct intel_tracked_state i915_upload_constants; const struct intel_tracked_state i915_upload_program; const struct intel_tracked_state i915_upload_maps; const struct intel_tracked_state i915_upload_samplers; +const struct intel_tracked_state i915_upload_static; + +/* Perform state differencing and update hardware: + */ +const struct intel_tracked_state i915_state_differencer; static INLINE GLuint @@ -108,5 +107,10 @@ static INLINE GLint S_FIXED(GLfloat value, GLuint frac_bits) return value * (1<<frac_bits); } +static INLINE GLuint ALIGN(GLuint x, GLuint align) +{ + return (x + align - 1) & ~(align - 1); +} + #endif diff --git a/src/mesa/drivers/dri/i915tex/i915_state_dynamic.c b/src/mesa/drivers/dri/i915tex/i915_state_dynamic.c index e485184536..0ceace203d 100644 --- a/src/mesa/drivers/dri/i915tex/i915_state_dynamic.c +++ b/src/mesa/drivers/dri/i915tex/i915_state_dynamic.c @@ -58,22 +58,21 @@ * state every time. Next would be to diff against previous, but note */ -static void set_dynamic_indirect( struct intel_context *intel, - GLuint offset, - const GLuint *src, - GLuint size ) +static inline void set_dynamic_indirect( struct intel_context *intel, + GLuint offset, + const GLuint *src, + GLuint size ) { #if 1 struct i915_context *i915 = i915_context( &intel->ctx ); - GLuint *dest = i915->dyn_indirect.buf + offset; - GLuint i; + GLuint *dst = i915->current.dynamic + offset; + + if (memcmp(dst, src, size * 4) == 0) + return; + + intel->state.dirty.intel |= I915_NEW_DYNAMIC_INDIRECT; + memcpy(dst, src, size * 4); - for (i = 0; i < size; i++) { - if (dest[i] != src[i]) { - dest[i] = src[i]; - intel->state.dirty.intel |= I915_NEW_DYNAMIC_INDIRECT; - } - } #else GLuint i; BEGIN_BATCH(size, 0); @@ -92,7 +91,7 @@ static void upload_MODES4( struct intel_context *intel ) GLuint modes4 = 0; /* _NEW_STENCIL */ - if (intel->state.Stencil->Enabled) { + if (1 || intel->state.Stencil->Enabled) { GLint testmask = intel->state.Stencil->ValueMask[0] & 0xff; GLint writemask = intel->state.Stencil->WriteMask[0] & 0xff; @@ -104,7 +103,7 @@ static void upload_MODES4( struct intel_context *intel ) } /* _NEW_COLOR */ - if (intel->state.Color->_LogicOpEnabled) { + if (1 || intel->state.Color->_LogicOpEnabled) { modes4 |= (_3DSTATE_MODES_4_CMD | ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(intel_translate_logic_op(intel->state.Color->LogicOp))); @@ -141,7 +140,7 @@ static void upload_BFO( struct intel_context *intel ) /* _NEW_STENCIL */ - if (intel->state.Stencil->Enabled) { + if (1 || intel->state.Stencil->Enabled) { if (intel->state.Stencil->TestTwoSide) { GLint test = intel_translate_compare_func(intel->state.Stencil->Function[1]); GLint fop = intel_translate_stencil_op(intel->state.Stencil->FailFunc[1]); @@ -210,7 +209,7 @@ static void upload_BLENDCOLOR( struct intel_context *intel ) /* _NEW_COLOR */ - if (intel->state.Color->BlendEnabled) { + if (1 || intel->state.Color->BlendEnabled) { const GLfloat *color = intel->state.Color->BlendColor; GLubyte r, g, b, a; @@ -221,7 +220,6 @@ static void upload_BLENDCOLOR( struct intel_context *intel ) bc[0] = (_3DSTATE_CONST_BLEND_COLOR_CMD); bc[1] = (a << 24) | (r << 16) | (g << 8) | b; - } set_dynamic_indirect( intel, @@ -247,7 +245,7 @@ static void upload_IAB( struct intel_context *intel ) { GLuint iab = 0; - if (intel->state.Color->BlendEnabled) { + if (1 || intel->state.Color->BlendEnabled) { GLuint eqRGB = intel->state.Color->BlendEquationRGB; GLuint eqA = intel->state.Color->BlendEquationA; GLuint srcRGB = intel->state.Color->BlendSrcRGB; @@ -312,7 +310,7 @@ static void upload_DEPTHSCALE( struct intel_context *intel ) memset( ds, 0, sizeof(ds) ); - if (intel->state.Polygon->OffsetFill) { + if (1 || intel->state.Polygon->OffsetFill) { ds[0].u = (_3DSTATE_DEPTH_OFFSET_SCALE); ds[1].f = 0; /* XXX */ @@ -334,87 +332,80 @@ const struct intel_tracked_state i915_upload_DEPTHSCALE = { .update = upload_DEPTHSCALE }; -/*********************************************************************** - * Do the group emit in a single packet. - */ - -#define CHECK( idx, nr ) do { \ - if (i915->dyn_indirect.buf[idx] != 0) { \ - GLint i; \ - for (i = 0; i < nr; i++) \ - buf[count++] = i915->dyn_indirect.buf[idx+i]; \ - } \ -} while (0) +/*********************************************************************** + * Polygon stipple + * + * The i915 supports a 4x4 stipple natively, GL wants 32x32. + * Fortunately stipple is usually a repeating pattern. + * + * XXX: does stipple pattern need to be adjusted according to + * the window position? + * + * XXX: possibly need workaround for conform paths test. + */ -static void emit_indirect( struct intel_context *intel ) +static void upload_stipple( struct intel_context *intel ) { - struct i915_context *i915 = i915_context( &intel->ctx ); - GLboolean active; - GLuint i; - - /* XXX: need to check if we wrap 4kb and if so pad. - */ -/* GLuint buf[I915_DYNAMIC_SIZE], count = 0; */ -/* CHECK( I915_DYNAMIC_MODES4, 1 ); */ -/* CHECK( I915_DYNAMIC_DEPTHSCALE_0, 2 ); */ -/* CHECK( I915_DYNAMIC_IAB, 1 ); */ -/* CHECK( I915_DYNAMIC_BC_0, 2 ); */ -/* CHECK( I915_DYNAMIC_BFO_0, 2 ); */ - + GLboolean hw_stipple_fallback = 0; + GLuint st[2]; - - /* Or just emit the whole lot, zeros and all (fix later...): - */ - for (active = 0, i = 0; i < I915_DYNAMIC_SIZE; i++) - if (i915->dyn_indirect.buf[i] != 0) { - active = 1; - break; - } - - - - /* Also - want to check that something has changed & we're not just - * re-emitting the same stuff. + st[0] = _3DSTATE_STIPPLE; + st[1] = 0; + + /* _NEW_POLYGON, INTEL_NEW_REDUCED_PRIMITIVE */ - if (active) { - GLuint size = I915_DYNAMIC_SIZE * 4; - GLuint flag = i915->dyn_indirect.done_reset ? 0 : DIS0_BUFFER_RESET; - GLuint segment = SEGMENT_DYNAMIC_INDIRECT; - GLuint offset = intel->batch->segment_finish_offset[segment]; - - i915->dyn_indirect.done_reset = 1; - - BEGIN_BATCH(2,0); - OUT_BATCH( _3DSTATE_LOAD_INDIRECT | LI0_STATE_DYNAMIC_INDIRECT | (1<<14) | 0); - OUT_RELOC( intel->batch->buffer, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE, - DRM_BO_MASK_MEM | DRM_BO_FLAG_EXE, - ((offset + size - 4) | DIS0_BUFFER_VALID | flag) ); - ADVANCE_BATCH(); - - /* XXX: - */ - assert( offset + size < intel->batch->segment_max_offset[segment]); - intel->batch->segment_finish_offset[segment] += size; + if (intel->state.Polygon->StippleFlag && + intel->reduced_primitive == GL_TRIANGLES) { - /* Just emit the original buffer, zeros and all as this will - * avoid wrapping issues. This is usually not emitted at all, - * so not urgent to fix: + /* _NEW_POLYGONSTIPPLE */ - memcpy(intel->batch->map + offset, i915->dyn_indirect.buf, size ); + const GLubyte *mask = (const GLubyte *)intel->state.PolygonStipple; + GLubyte p[4]; + GLint i, j, k; + + p[0] = mask[12] & 0xf; + p[0] |= p[0] << 4; + p[1] = mask[8] & 0xf; + p[1] |= p[1] << 4; + p[2] = mask[4] & 0xf; + p[2] |= p[2] << 4; + p[3] = mask[0] & 0xf; + p[3] |= p[3] << 4; + + st[1] |= ST1_ENABLE; + st[1] |= (((p[0] & 0xf) << 0) | + ((p[1] & 0xf) << 4) | + ((p[2] & 0xf) << 8) | + ((p[3] & 0xf) << 12)); + + for (k = 0; k < 8; k++) { + for (j = 3; j >= 0; j--) { + for (i = 0; i < 4; i++, mask++) { + if (*mask != p[j]) { + hw_stipple_fallback = 1; + st[1] = 0; + } + } + } + } } + + assert(!hw_stipple_fallback); /* TODO */ + + set_dynamic_indirect( intel, + I915_DYNAMIC_STP_0, + &st[0], + 2 ); } -const struct intel_tracked_state i915_upload_dynamic_indirect = { + +const struct intel_tracked_state i915_upload_stipple = { .dirty = { - .mesa = 0, - .intel = I915_NEW_DYNAMIC_INDIRECT, + .mesa = _NEW_POLYGONSTIPPLE, _NEW_POLYGON, + .intel = INTEL_NEW_REDUCED_PRIMITIVE, .extra = 0 }, - .update = emit_indirect + .update = upload_stipple }; - - - diff --git a/src/mesa/drivers/dri/i915tex/i915_state_immediate.c b/src/mesa/drivers/dri/i915tex/i915_state_immediate.c index 495f8fe903..d032a1cf7c 100644 --- a/src/mesa/drivers/dri/i915tex/i915_state_immediate.c +++ b/src/mesa/drivers/dri/i915tex/i915_state_immediate.c @@ -57,35 +57,34 @@ */ static void upload_S0S1( struct intel_context *intel ) { + struct i915_context *i915 = i915_context( &intel->ctx ); + GLuint S0, S1; - /* INTEL_NEW_VBO */ - if (intel->state.vbo) { - - BEGIN_BATCH(3, 0); - - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - I1_LOAD_S(0) | - I1_LOAD_S(1) | - 1); - - /* INTEL_NEW_VBO, INTEL_NEW_RELOC */ - OUT_RELOC(intel->state.vbo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, - DRM_BO_MASK_MEM | DRM_BO_FLAG_READ, - intel->state.vbo_offset); + /* INTEL_NEW_VBO + */ + S0 = intel->state.vbo_offset; - /* INTEL_NEW_VERTEX_SIZE */ - OUT_BATCH((intel->vertex_size << 24) | - (intel->vertex_size << 16)); + /* INTEL_NEW_VERTEX_SIZE -- do this where the vertex size is calculated! + */ + S1 = ((intel->vertex_size << 24) | + (intel->vertex_size << 16)); - ADVANCE_BATCH(); + /* INTEL_NEW_VBO */ + if (i915->current.vbo != intel->state.vbo || + i915->current.immediate[I915_IMMEDIATE_S0] != S0 || + i915->current.immediate[I915_IMMEDIATE_S1] != S1) + { + i915->current.vbo = intel->state.vbo; + i915->current.immediate[I915_IMMEDIATE_S0] = S0; + i915->current.immediate[I915_IMMEDIATE_S1] = S1; + intel->state.dirty.intel |= I915_NEW_IMMEDIATE; } } const struct intel_tracked_state i915_upload_S0S1 = { .dirty = { .mesa = 0, - .intel = INTEL_NEW_VBO | INTEL_NEW_VERTEX_SIZE | INTEL_NEW_FENCE, + .intel = INTEL_NEW_VBO | INTEL_NEW_VERTEX_SIZE, .extra = 0 }, .update = upload_S0S1 @@ -159,16 +158,14 @@ static void upload_S2S4(struct intel_context *intel) S4_FLATSHADE_SPECULAR); } - - BEGIN_BATCH(3, 0); - - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - I1_LOAD_S(2) | - I1_LOAD_S(4) | - 1); - OUT_BATCH(LIS2); - OUT_BATCH(LIS4); - ADVANCE_BATCH(); + + if (LIS2 != i915->current.immediate[I915_IMMEDIATE_S2] || + LIS4 != i915->current.immediate[I915_IMMEDIATE_S4]) { + + i915->current.immediate[I915_IMMEDIATE_S2] = LIS2; + i915->current.immediate[I915_IMMEDIATE_S4] = LIS4; + intel->state.dirty.intel |= I915_NEW_IMMEDIATE; + } } @@ -193,6 +190,7 @@ const struct intel_tracked_state i915_upload_S2S4 = { */ static void upload_S5( struct intel_context *intel ) { + struct i915_context *i915 = i915_context( &intel->ctx ); GLuint LIS5 = 0; /* _NEW_STENCIL */ @@ -243,18 +241,15 @@ static void upload_S5( struct intel_context *intel ) LIS5 |= S5_WRITEDISABLE_ALPHA; } - BEGIN_BATCH(2, 0); - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - I1_LOAD_S(5) | - 0); - OUT_BATCH(LIS5); - ADVANCE_BATCH(); - + if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) { + i915->current.immediate[I915_IMMEDIATE_S5] = LIS5; + intel->state.dirty.intel |= I915_NEW_IMMEDIATE; + } } const struct intel_tracked_state i915_upload_S5 = { .dirty = { - .mesa = (_NEW_STENCIL | _NEW_COLOR), + .mesa = (_NEW_STENCIL | _NEW_COLOR | _NEW_POLYGON), .intel = 0, .extra = 0 }, @@ -266,19 +261,19 @@ const struct intel_tracked_state i915_upload_S5 = { */ static void upload_S6( struct intel_context *intel ) { + struct i915_context *i915 = i915_context( &intel->ctx ); GLuint LIS6 = (S6_COLOR_WRITE_ENABLE | (2 << S6_TRISTRIP_PV_SHIFT)); /* _NEW_COLOR */ - if (1) { + if (intel->state.Color->AlphaEnabled) { int test = intel_translate_compare_func(intel->state.Color->AlphaFunc); GLubyte refByte; CLAMPED_FLOAT_TO_UBYTE(refByte, intel->state.Color->AlphaRef); - if (intel->state.Color->AlphaEnabled) - LIS6 |= S6_ALPHA_TEST_ENABLE; + LIS6 |= S6_ALPHA_TEST_ENABLE; LIS6 |= ((test << S6_ALPHA_TEST_FUNC_SHIFT) | (((GLuint) refByte) << S6_ALPHA_REF_SHIFT)); @@ -286,8 +281,9 @@ static void upload_S6( struct intel_context *intel ) /* _NEW_COLOR */ - if (1) { - + if (intel->state.Color->BlendEnabled && + !STATE_LOGICOP_ENABLED(&intel->state)) + { GLuint eqRGB = intel->state.Color->BlendEquationRGB; GLuint srcRGB = intel->state.Color->BlendSrcRGB; GLuint dstRGB = intel->state.Color->BlendDstRGB; @@ -296,9 +292,7 @@ static void upload_S6( struct intel_context *intel ) srcRGB = dstRGB = GL_ONE; } - if (intel->state.Color->BlendEnabled && - !STATE_LOGICOP_ENABLED(&intel->state)) - LIS6 |= S6_CBUF_BLEND_ENABLE; + LIS6 |= S6_CBUF_BLEND_ENABLE; LIS6 |= (SRC_BLND_FACT(intel_translate_blend_factor(srcRGB)) | DST_BLND_FACT(intel_translate_blend_factor(dstRGB)) | @@ -307,25 +301,20 @@ static void upload_S6( struct intel_context *intel ) /* _NEW_DEPTH */ - if (1) { + if (intel->state.Depth->Test) { GLint func = intel_translate_compare_func(intel->state.Depth->Func); LIS6 |= func << S6_DEPTH_TEST_FUNC_SHIFT; - if (intel->state.Depth->Test) { - LIS6 |= S6_DEPTH_TEST_ENABLE; - if (intel->state.Depth->Mask) - LIS6 |= S6_DEPTH_WRITE_ENABLE; - } + LIS6 |= S6_DEPTH_TEST_ENABLE; + if (intel->state.Depth->Mask) + LIS6 |= S6_DEPTH_WRITE_ENABLE; } - BEGIN_BATCH(2, 0); - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - I1_LOAD_S(6) | - 0); - OUT_BATCH(LIS6); - ADVANCE_BATCH(); - + if (LIS6 != i915->current.immediate[I915_IMMEDIATE_S6]) { + i915->current.immediate[I915_IMMEDIATE_S6] = LIS6; + intel->state.dirty.intel |= I915_NEW_IMMEDIATE; + } } const struct intel_tracked_state i915_upload_S6 = { @@ -342,6 +331,7 @@ const struct intel_tracked_state i915_upload_S6 = { */ static void upload_S7( struct intel_context *intel ) { + struct i915_context *i915 = i915_context( &intel->ctx ); GLfloat LIS7; /* _NEW_POLYGON @@ -349,13 +339,10 @@ static void upload_S7( struct intel_context *intel ) /* LIS7 = intel->state.Polygon->OffsetUnits * DEPTH_SCALE; */ LIS7 = 0; - BEGIN_BATCH(2, 0); - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - I1_LOAD_S(7) | - 0); - OUT_BATCH_F(LIS7); - ADVANCE_BATCH(); - + if (LIS7 != i915->current.immediate[I915_IMMEDIATE_S7]) { + i915->current.immediate[I915_IMMEDIATE_S7] = LIS7; + intel->state.dirty.intel |= I915_NEW_IMMEDIATE; + } } const struct intel_tracked_state i915_upload_S7 = { diff --git a/src/mesa/drivers/dri/i915tex/i915_state_static.c b/src/mesa/drivers/dri/i915tex/i915_state_static.c index ebbcfd7b8b..32131b7296 100644 --- a/src/mesa/drivers/dri/i915tex/i915_state_static.c +++ b/src/mesa/drivers/dri/i915tex/i915_state_static.c @@ -56,32 +56,103 @@ */ -/*********************************************************************** - * Depthbuffer - currently constant, but rotation would change that. - */ +static GLuint invarient_state[] = { + + (_3DSTATE_AA_CMD | + AA_LINE_ECAAR_WIDTH_ENABLE | + AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | + AA_LINE_REGION_WIDTH_1_0), + +#if 1 + /* Could use these to reduce the size of vertices when the + * incoming array is constant. For now these are don't care + * items - maybe don't bother about setting them?? + */ + (_3DSTATE_DFLT_DIFFUSE_CMD), + (0), + + (_3DSTATE_DFLT_SPEC_CMD), + (0), + + (_3DSTATE_DFLT_Z_CMD), + (0), +#endif + + /* We support texture crossbar via the fragment shader, rather than + * with this mechanism. + */ + (_3DSTATE_COORD_SET_BINDINGS | + CSB_TCB(0, 0) | + CSB_TCB(1, 1) | + CSB_TCB(2, 2) | + CSB_TCB(3, 3) | + CSB_TCB(4, 4) | + CSB_TCB(5, 5) | + CSB_TCB(6, 6) | + CSB_TCB(7, 7)), + + /* Setup OpenGL rasterization state: + */ + (_3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | + ENABLE_TEXKILL_3D_4D | + TEXKILL_4D), + + + /* For private depth buffers but shared color buffers, eg + * front-buffer rendering with a private depthbuffer. We don't do + * this. + */ + (_3DSTATE_DEPTH_SUBRECT_DISABLE), +}; + -static void upload_buffers(struct intel_context *intel) +/* + */ +static void upload_static(struct intel_context *intel) { struct i915_context *i915 = i915_context( &intel->ctx ); struct intel_region *color_region = intel->state.draw_region; struct intel_region *depth_region = intel->state.depth_region; + struct i915_cache_packet packet; + GLuint i; GLuint dwords = ((color_region ? 3 : 0) + (depth_region ? 3 : 0) + - 2); + 2 + /* DV */ + 4 + /* SCISSOR */ + Elements(invarient_state)); GLuint relocs = ((color_region ? 1 : 0) + (depth_region ? 1 : 0)); - struct i915_cache_packet packet; - packet_init( &packet, I915_CACHE_BUFFERS, dwords, relocs ); + packet_init( &packet, I915_CACHE_STATIC, dwords, relocs ); + + /*********************************************************************** + * Misc invarient state packets + */ + for (i = 0; i < Elements(invarient_state); i++) + packet_dword( &packet, invarient_state[i] ); + + + /*********************************************************************** + * Buffers + */ if (color_region) { packet_dword( &packet, _3DSTATE_BUF_INFO_CMD ); packet_dword( &packet, BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(color_region->pitch * color_region->cpp) | +/* BUF_3D_TILED_SURFACE | */ +/* BUF_3D_TILE_WALK_X | */ BUF_3D_USE_FENCE); packet_reloc( &packet, color_region->buffer, @@ -94,6 +165,8 @@ static void upload_buffers(struct intel_context *intel) packet_dword( &packet, _3DSTATE_BUF_INFO_CMD ); packet_dword( &packet, BUF_3D_ID_DEPTH | BUF_3D_PITCH(depth_region->pitch * depth_region->cpp) | +/* BUF_3D_TILED_SURFACE | */ +/* BUF_3D_TILE_WALK_X | */ BUF_3D_USE_FENCE ); packet_reloc( &packet, depth_region->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, @@ -102,124 +175,32 @@ static void upload_buffers(struct intel_context *intel) } + /* This might become dynamic state if it turns out adjusting the + * bias values can cure our point-rendering woes. + */ packet_dword( &packet,_3DSTATE_DST_BUF_VARS_CMD); - packet_dword( &packet, DSTORG_HORT_BIAS(0x8) | /* .5 */ - DSTORG_VERT_BIAS(0x8) | /* .5 */ - LOD_PRECLAMP_OGL | - TEX_DEFAULT_COLOR_OGL | - DITHER_FULL_ALWAYS | - (color_region && color_region->cpp == 4 - ? DV_PF_8888 - : DV_PF_565) | - (depth_region && depth_region->cpp == 4 - ? DEPTH_FRMT_24_FIXED_8_OTHER - : DEPTH_FRMT_16_FIXED) ); - - i915_cache_emit( i915->cctx, &packet ); -} - -const struct intel_tracked_state i915_upload_buffers = { - .dirty = { - .mesa = 0, - .intel = INTEL_NEW_CBUF | INTEL_NEW_ZBUF | INTEL_NEW_FENCE, - .extra = 0 - }, - .update = upload_buffers -}; - - - -/*********************************************************************** - * Polygon stipple - * - * The i915 supports a 4x4 stipple natively, GL wants 32x32. - * Fortunately stipple is usually a repeating pattern. - * - * XXX: does stipple pattern need to be adjusted according to - * the window position? - * - * XXX: possibly need workaround for conform paths test. - */ - -static void upload_stipple( struct intel_context *intel ) -{ - struct i915_context *i915 = i915_context( &intel->ctx ); - GLuint st0 = _3DSTATE_STIPPLE; - GLuint st1 = 0; - - GLboolean hw_stipple_fallback = 0; - - /* _NEW_POLYGON, INTEL_NEW_REDUCED_PRIMITIVE + packet_dword( &packet, (DSTORG_HORT_BIAS(0x8) | /* .5 */ + DSTORG_VERT_BIAS(0x8) | /* .5 */ + LOD_PRECLAMP_OGL | + TEX_DEFAULT_COLOR_OGL | + DITHER_FULL_ALWAYS | + (color_region && color_region->cpp == 4 + ? DV_PF_8888 + : DV_PF_565) | + (depth_region && depth_region->cpp == 4 + ? DEPTH_FRMT_24_FIXED_8_OTHER + : DEPTH_FRMT_16_FIXED)) ); + + + /*********************************************************************** + * Scissor. + * + * Is it static or dynamic??? It is not understood by the hardware + * binner, so if we ever implement HWZ, it would be static under that + * scheme, or somehow not handled, or perhaps we would have to + * manually clip primitives to the scissor region. For now, we call + * it static. */ - if (intel->state.Polygon->StippleFlag && - intel->reduced_primitive == GL_TRIANGLES) { - - /* _NEW_POLYGONSTIPPLE - */ - const GLubyte *mask = (const GLubyte *)intel->state.PolygonStipple; - GLubyte p[4]; - GLint i, j, k; - - p[0] = mask[12] & 0xf; - p[0] |= p[0] << 4; - p[1] = mask[8] & 0xf; - p[1] |= p[1] << 4; - p[2] = mask[4] & 0xf; - p[2] |= p[2] << 4; - p[3] = mask[0] & 0xf; - p[3] |= p[3] << 4; - - st1 |= ST1_ENABLE; - - for (k = 0; k < 8; k++) { - for (j = 3; j >= 0; j--) { - for (i = 0; i < 4; i++, mask++) { - if (*mask != p[j]) { - hw_stipple_fallback = 1; - st1 &= ~ST1_ENABLE; - } - } - } - } - - st1 |= (((p[0] & 0xf) << 0) | - ((p[1] & 0xf) << 4) | - ((p[2] & 0xf) << 8) | - ((p[3] & 0xf) << 12)); - } - - assert(!hw_stipple_fallback); /* TODO */ - - { - struct i915_cache_packet packet; - - packet_init( &packet, I915_CACHE_STIPPLE, 2, 0 ); - packet_dword( &packet,st0); - packet_dword( &packet,st1); - i915_cache_emit( i915->cctx, &packet ); - } -} - - -const struct intel_tracked_state i915_upload_stipple = { - .dirty = { - .mesa = _NEW_POLYGONSTIPPLE, _NEW_POLYGON, - .intel = INTEL_NEW_REDUCED_PRIMITIVE, - .extra = 0 - }, - .update = upload_stipple -}; - - - -/*********************************************************************** - * Scissor. - */ - -static void upload_scissor( struct intel_context *intel ) -{ - struct i915_context *i915 = i915_context( &intel->ctx ); - struct i915_cache_packet packet; /* _NEW_SCISSOR, _NEW_BUFFERS */ @@ -253,141 +234,26 @@ static void upload_scissor( struct intel_context *intel ) x2 = CLAMP(x2, 0, intel->state.DrawBuffer->Width - 1); y2 = CLAMP(y2, 0, intel->state.DrawBuffer->Height - 1); - packet_init( &packet, I915_CACHE_SCISSOR, 4, 0 ); packet_dword( &packet,_3DSTATE_SCISSOR_ENABLE_CMD | ENABLE_SCISSOR_RECT); packet_dword( &packet,_3DSTATE_SCISSOR_RECT_0_CMD); packet_dword( &packet,(y1 << 16) | (x1 & 0xffff)); packet_dword( &packet,(y2 << 16) | (x2 & 0xffff)); - i915_cache_emit( i915->cctx, &packet ); } else { - packet_init( &packet, I915_CACHE_SCISSOR, 1, 0 ); packet_dword( &packet,_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); - i915_cache_emit( i915->cctx, &packet ); } -} - -const struct intel_tracked_state i915_upload_scissor = { - .dirty = { - .mesa = _NEW_SCISSOR | _NEW_BUFFERS, - .intel = 0, - .extra = 0 - }, - .update = upload_scissor -}; - - - - -/*********************************************************************** - * Misc invarient state packets - */ - -static void upload_invarient( struct intel_context *intel ) -{ - struct i915_context *i915 = i915_context( &intel->ctx ); - GLuint i; - - static GLuint invarient_state[] = { - - (_3DSTATE_AA_CMD | - AA_LINE_ECAAR_WIDTH_ENABLE | - AA_LINE_ECAAR_WIDTH_1_0 | - AA_LINE_REGION_WIDTH_ENABLE | - AA_LINE_REGION_WIDTH_1_0), - - /* Could use these to reduce the size of vertices when the incoming - * array is constant. - */ - (_3DSTATE_DFLT_DIFFUSE_CMD), - (0), - - (_3DSTATE_DFLT_SPEC_CMD), - (0), - - (_3DSTATE_DFLT_Z_CMD), - (0), - - /* We support texture crossbar via the fragment shader, rather than - * with this mechanism. - */ - (_3DSTATE_COORD_SET_BINDINGS | - CSB_TCB(0, 0) | - CSB_TCB(1, 1) | - CSB_TCB(2, 2) | - CSB_TCB(3, 3) | - CSB_TCB(4, 4) | - CSB_TCB(5, 5) | - CSB_TCB(6, 6) | - CSB_TCB(7, 7)), - - /* Setup OpenGL rasterization state: - */ - (_3DSTATE_RASTER_RULES_CMD | - ENABLE_POINT_RASTER_RULE | - OGL_POINT_RASTER_RULE | - ENABLE_LINE_STRIP_PROVOKE_VRTX | - ENABLE_TRI_FAN_PROVOKE_VRTX | - LINE_STRIP_PROVOKE_VRTX(1) | - TRI_FAN_PROVOKE_VRTX(2) | - ENABLE_TEXKILL_3D_4D | - TEXKILL_4D), - - /* Need to initialize this to zero. - */ - (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - I1_LOAD_S(3) | - (0)), - (0), - - (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT), - (_3DSTATE_SCISSOR_RECT_0_CMD), - (0), - (0), - - - /* For private depth buffers but shared color buffers, eg - * front-buffer rendering with a private depthbuffer. We don't do - * this. - */ - (_3DSTATE_DEPTH_SUBRECT_DISABLE), - - (_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0) - }; - - /* Disable indirect state for now. - */ -#if 0 - BEGIN_BATCH(2, 0); - OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); - OUT_BATCH(0); - ADVANCE_BATCH(); -#endif - - /* Will be nice if this can be preserved over several frames. I - * guess logical contexts would do much the same thing. - */ - { - struct i915_cache_packet packet; - - packet_init( &packet, I915_CACHE_INVARIENT, sizeof(invarient_state)/4, 0); - for (i = 0; i < sizeof(invarient_state)/4; i++) - packet_dword( &packet, invarient_state[i] ); - - i915_cache_emit( i915->cctx, &packet ); - } + i915_cache_emit( i915->cctx, &packet ); } -const struct intel_tracked_state i915_upload_invarient = { + +const struct intel_tracked_state i915_upload_static = { .dirty = { - .mesa = 0, - .intel = INTEL_NEW_CONTEXT, /* or less frequently? */ + .mesa = _NEW_SCISSOR | _NEW_BUFFERS, + .intel = INTEL_NEW_CBUF | INTEL_NEW_ZBUF | INTEL_NEW_FENCE, .extra = 0 }, - .update = upload_invarient + .update = upload_static }; - - diff --git a/src/mesa/drivers/dri/i915tex/i915_vtbl.c b/src/mesa/drivers/dri/i915tex/i915_vtbl.c index 0e0f926a31..ebc692a554 100644 --- a/src/mesa/drivers/dri/i915tex/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915tex/i915_vtbl.c @@ -68,9 +68,15 @@ static void i915_lost_hardware( struct intel_context *intel ) { struct i915_context *i915 = i915_context( &intel->ctx ); + /* This is required currently as we use the batchbuffer to hold all + * the cached items: + */ i915_clear_caches( i915->cctx ); - memset(&i915->dyn_indirect, 0, sizeof(i915->dyn_indirect)); + /* Update the batchbuffer id so the context tracker knows there has + * been a discontinuity. + */ + i915->current.id++; } diff --git a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c index 7a6285153f..3db3c93417 100644 --- a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c @@ -242,12 +242,14 @@ do_flush_locked(struct intel_batchbuffer *batch, r->offset, driBOOffset(r->buf), r->delta); } - if (INTEL_DEBUG & DEBUG_BATCH) + if (INTEL_DEBUG & DEBUG_BATCH) intel_dump_batchbuffer(batch, (GLubyte *)ptr); + driBOUnmap(batch->buffer); batch->map = NULL; + /* Throw away non-effective packets. Won't work once we have * hardware contexts which would preserve statechanges beyond a * single buffer. |