summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Whitwell <keith@tungstengraphics.com>2007-03-18 09:51:14 +0000
committerKeith Whitwell <keith@tungstengraphics.com>2007-03-18 09:51:14 +0000
commit7c8321dd2790f2ec54387b9bf727193988308108 (patch)
tree03ed7c6f5385e71a66bb5c558a4dd05aefcb14a8
parent013b0c3138d9de0fc13b7348f9e8d68c95f81b3b (diff)
Add a final state differencing step to minimise emitted state.
Rearrange packets so that there is just a single static indirect state. Move stipple to dynamic state. Move all actualy state emit code to the differencer.
-rw-r--r--src/mesa/drivers/dri/i915tex/Makefile1
-rw-r--r--src/mesa/drivers/dri/i915tex/i915_cache.c100
-rw-r--r--src/mesa/drivers/dri/i915tex/i915_cache.h11
-rw-r--r--src/mesa/drivers/dri/i915tex/i915_context.h67
-rw-r--r--src/mesa/drivers/dri/i915tex/i915_differencer.c283
-rw-r--r--src/mesa/drivers/dri/i915tex/i915_state.c30
-rw-r--r--src/mesa/drivers/dri/i915tex/i915_state.h18
-rw-r--r--src/mesa/drivers/dri/i915tex/i915_state_dynamic.c167
-rw-r--r--src/mesa/drivers/dri/i915tex/i915_state_immediate.c119
-rw-r--r--src/mesa/drivers/dri/i915tex/i915_state_static.c354
-rw-r--r--src/mesa/drivers/dri/i915tex/i915_vtbl.c8
-rw-r--r--src/mesa/drivers/dri/i915tex/intel_batchbuffer.c4
12 files changed, 643 insertions, 519 deletions
diff --git a/src/mesa/drivers/dri/i915tex/Makefile b/src/mesa/drivers/dri/i915tex/Makefile
index 7c7ad84d51..90d0e1396c 100644
--- a/src/mesa/drivers/dri/i915tex/Makefile
+++ b/src/mesa/drivers/dri/i915tex/Makefile
@@ -9,6 +9,7 @@ MINIGLX_SOURCES = server/intel_dri.c
DRIVER_SOURCES = \
i915_cache.c \
i915_context.c \
+ i915_differencer.c \
i915_fpc.c \
i915_fpc_debug.c \
i915_fpc_emit.c \
diff --git a/src/mesa/drivers/dri/i915tex/i915_cache.c b/src/mesa/drivers/dri/i915tex/i915_cache.c
index 448d8a4300..51d7b5eb73 100644
--- a/src/mesa/drivers/dri/i915tex/i915_cache.c
+++ b/src/mesa/drivers/dri/i915tex/i915_cache.c
@@ -58,8 +58,6 @@ struct i915_cache {
GLuint state_type;
struct i915_cache_item **items;
GLuint size, n_items;
- GLuint force_load_flag;
- GLuint last_addr;
};
struct i915_cache_context {
@@ -68,22 +66,6 @@ struct i915_cache_context {
};
-static void emit_load_indirect( struct intel_context *intel,
- GLuint state_type,
- GLuint force_load_flag,
- GLuint offset,
- GLuint dwords )
-{
- BEGIN_BATCH(3,0);
- OUT_BATCH( _3DSTATE_LOAD_INDIRECT | state_type | (1<<14) | 1);
- OUT_RELOC( intel->batch->buffer,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE,
- DRM_BO_MASK_MEM | DRM_BO_FLAG_EXE,
- ( offset | force_load_flag | SIS0_BUFFER_VALID ) );
- OUT_BATCH( dwords - 1 );
- ADVANCE_BATCH();
-}
-
static GLuint emit_packet( struct intel_context *intel,
const struct i915_cache_packet *packet )
@@ -113,7 +95,6 @@ static GLuint emit_packet( struct intel_context *intel,
packet->dword[packet->reloc[i].dword].u );
}
-
return offset;
}
@@ -229,29 +210,7 @@ void i915_cache_emit(struct i915_cache_context *cctx,
struct intel_context *intel = &cctx->i915->intel;
GLuint size = packet_size( packet );
-#if 1
- GLuint hash = hash_packet( packet, size );
- struct i915_cache *cache = &cctx->cache[packet->cache_id];
- GLuint addr;
-
- addr = search_cache( cache, hash, packet->dword, size );
- if (addr == 0)
- addr = upload_cache( cache, intel, hash, packet, size );
-
- /* Always have to tell the hardware about it, unless this is the
- * same as last time!
- */
- if (addr != cache->last_addr) {
- emit_load_indirect( intel,
- cache->state_type,
- cache->force_load_flag,
- addr,
- packet->nr_dwords );
-
- cache->force_load_flag = 0;
- cache->last_addr = addr;
- }
-#else
+#ifdef I915_NO_INDIRECT_STATE
GLuint i;
BEGIN_BATCH(packet->nr_dwords, 0);
@@ -268,6 +227,17 @@ void i915_cache_emit(struct i915_cache_context *cctx,
OUT_BATCH(packet->dword[i].u);
ADVANCE_BATCH();
+#else
+ GLuint hash = hash_packet( packet, size );
+ struct i915_cache *cache = &cctx->cache[packet->cache_id];
+ GLuint addr;
+
+ addr = search_cache( cache, hash, packet->dword, size );
+ if (addr == 0)
+ addr = upload_cache( cache, intel, hash, packet, size );
+
+ cctx->i915->current.offsets[packet->cache_id] = addr;
+ cctx->i915->current.sizes[packet->cache_id] = packet->nr_dwords;
#endif
}
@@ -278,6 +248,9 @@ void i915_cache_emit(struct i915_cache_context *cctx,
* to fixup by the memory manager as they contain absolute agp
* offsets, so we need to ensure there is a fresh version of the
* struct available to receive the fixup.
+ *
+ * Also, temporarily, need to do this for every cache because they are
+ * all being stored in the batchbuffer!!
*/
static void clear_cache( struct i915_cache *cache )
{
@@ -294,11 +267,6 @@ static void clear_cache( struct i915_cache *cache )
}
cache->n_items = 0;
-
- /* Make sure hardware knows we've abandoned old data:
- */
- cache->force_load_flag = SIS0_FORCE_LOAD;
- cache->last_addr = ~0;
}
@@ -326,6 +294,16 @@ struct i915_cache_context *i915_create_caches( struct i915_context *i915 )
cctx->i915 = i915;
init_cache( cctx,
+ "STATIC",
+ I915_CACHE_STATIC,
+ LI0_STATE_STATIC_INDIRECT );
+
+ init_cache( cctx,
+ "MAP",
+ I915_CACHE_MAP,
+ LI0_STATE_MAP );
+
+ init_cache( cctx,
"SAMPLER",
I915_CACHE_SAMPLER,
LI0_STATE_SAMPLER );
@@ -340,34 +318,6 @@ struct i915_cache_context *i915_create_caches( struct i915_context *i915 )
I915_CACHE_CONSTANTS,
LI0_STATE_CONSTANTS );
- init_cache( cctx,
- "MAP",
- I915_CACHE_MAP,
- LI0_STATE_MAP );
-
- /* These are all part of the LI0_STATIC_INDIRECT bucket. Not
- * really sure how to handle these.
- */
- init_cache( cctx,
- "BUFFERS",
- I915_CACHE_BUFFERS,
- LI0_STATE_STATIC_INDIRECT );
-
- init_cache( cctx,
- "STIPPLE",
- I915_CACHE_STIPPLE,
- LI0_STATE_STATIC_INDIRECT );
-
- init_cache( cctx,
- "SCISSOR",
- I915_CACHE_SCISSOR,
- LI0_STATE_STATIC_INDIRECT );
-
- init_cache( cctx,
- "INVARIENT",
- I915_CACHE_INVARIENT,
- LI0_STATE_STATIC_INDIRECT );
-
return cctx;
}
diff --git a/src/mesa/drivers/dri/i915tex/i915_cache.h b/src/mesa/drivers/dri/i915tex/i915_cache.h
index ca48b2daa1..41ee2a162d 100644
--- a/src/mesa/drivers/dri/i915tex/i915_cache.h
+++ b/src/mesa/drivers/dri/i915tex/i915_cache.h
@@ -37,17 +37,6 @@ struct i915_cache_context;
struct i915_cache_packet;
struct i915_context;
-enum {
- I915_CACHE_SAMPLER,
- I915_CACHE_PROGRAM,
- I915_CACHE_CONSTANTS,
- I915_CACHE_MAP,
- I915_CACHE_BUFFERS,
- I915_CACHE_STIPPLE,
- I915_CACHE_SCISSOR,
- I915_CACHE_INVARIENT,
- I915_MAX_CACHE
-};
struct i915_cache_context *i915_create_caches( struct i915_context *i915 );
diff --git a/src/mesa/drivers/dri/i915tex/i915_context.h b/src/mesa/drivers/dri/i915tex/i915_context.h
index 9ae3ee97e9..ecb26ebc9a 100644
--- a/src/mesa/drivers/dri/i915tex/i915_context.h
+++ b/src/mesa/drivers/dri/i915tex/i915_context.h
@@ -38,7 +38,8 @@
#define I915_NEW_INPUT_SIZES (INTEL_NEW_DRIVER0<<0)
#define I915_NEW_VERTEX_FORMAT (INTEL_NEW_DRIVER0<<1)
#define I915_NEW_DYNAMIC_INDIRECT (INTEL_NEW_DRIVER0<<2)
-
+#define I915_NEW_CACHED_INDIRECT (INTEL_NEW_DRIVER0<<3)
+#define I915_NEW_IMMEDIATE (INTEL_NEW_DRIVER0<<4)
/* Hardware version of a parsed fragment program. "Derived" from the
@@ -81,18 +82,60 @@ struct i915_fragment_program
#define I915_TEX_UNITS 8
#define I915_DYNAMIC_MODES4 0
-#define I915_DYNAMIC_DEPTHSCALE_0 1
+#define I915_DYNAMIC_DEPTHSCALE_0 1 /* just the header */
#define I915_DYNAMIC_DEPTHSCALE_1 2
#define I915_DYNAMIC_IAB 3
-#define I915_DYNAMIC_BC_0 4
+#define I915_DYNAMIC_BC_0 4 /* just the header */
#define I915_DYNAMIC_BC_1 5
-#define I915_DYNAMIC_BFO_0 6
+#define I915_DYNAMIC_BFO_0 6
#define I915_DYNAMIC_BFO_1 7
-#define I915_DYNAMIC_SIZE 8
+#define I915_DYNAMIC_STP_0 8
+#define I915_DYNAMIC_STP_1 9
+#define I915_MAX_DYNAMIC 10
+
+
+#define I915_IMMEDIATE_S0 0
+#define I915_IMMEDIATE_S1 1
+#define I915_IMMEDIATE_S2 2
+#define I915_IMMEDIATE_S3 3
+#define I915_IMMEDIATE_S4 4
+#define I915_IMMEDIATE_S5 5
+#define I915_IMMEDIATE_S6 6
+#define I915_IMMEDIATE_S7 7
+#define I915_MAX_IMMEDIATE 8
+
+/* These must mach the order of LI0_STATE_* bits, as they will be used
+ * to generate hardware packets:
+ */
+#define I915_CACHE_STATIC 0
+#define I915_CACHE_ZERO 1 /* placeholder */
+#define I915_CACHE_SAMPLER 2
+#define I915_CACHE_MAP 3
+#define I915_CACHE_PROGRAM 4
+#define I915_CACHE_CONSTANTS 5
+#define I915_MAX_CACHE 6
struct i915_cache_context;
+/* Use to calculate differences between state emitted to hardware and
+ * current driver-calculated state.
+ */
+struct i915_state
+{
+ GLuint immediate[I915_MAX_IMMEDIATE];
+ GLuint dynamic[I915_MAX_DYNAMIC];
+
+ GLuint offsets[I915_MAX_CACHE];
+ GLuint sizes[I915_MAX_CACHE];
+
+ /* Something for vbo:
+ */
+ struct _DriBufferObject *vbo;
+
+ GLuint id; /* track lost context events */
+};
+
struct i915_context
{
@@ -119,15 +162,13 @@ struct i915_context
GLuint LIS4;
} vertex_format;
- /* Used for short-circuiting packets. Won't work for packets
- * containing relocations. This is zero'd out after lost_context
- * events.
+ /* Used for short-circuiting state updates. Won't work for packets
+ * containing relocations, unless they are specifically invalidated
+ * after batchbuffer flushes - currently we zero out the whole
+ * state after lost_context events.
*/
- struct {
- GLuint buf[I915_DYNAMIC_SIZE];
- GLboolean done_reset;
- } dyn_indirect;
-
+ struct i915_state current;
+ struct i915_state hardware;
};
diff --git a/src/mesa/drivers/dri/i915tex/i915_differencer.c b/src/mesa/drivers/dri/i915tex/i915_differencer.c
new file mode 100644
index 0000000000..15cb630a80
--- /dev/null
+++ b/src/mesa/drivers/dri/i915tex/i915_differencer.c
@@ -0,0 +1,283 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "i915_context.h"
+#include "i915_state.h"
+#include "i915_reg.h"
+#include "intel_batchbuffer.h"
+
+
+static GLuint count_bits( GLuint mask )
+{
+ GLuint i, nr = 0;
+
+ for (i = 1; mask >= i; i <<= 1)
+ if (mask & i)
+ nr++;
+
+ return nr;
+}
+
+static void emit_immediates( struct intel_context *intel,
+ const struct i915_state *from,
+ const struct i915_state *to )
+{
+ GLuint dirty = 0;
+ GLuint i;
+
+ /* Lost context?
+ */
+ if (from->id != to->id) {
+ dirty = (1<<I915_MAX_IMMEDIATE) - 1;
+ }
+ else {
+ for (i = 0; i < I915_MAX_IMMEDIATE; i++) {
+ if (from->immediate[i] != to->immediate[i]) {
+ dirty |= 1<<i;
+ }
+ }
+
+ if (from->vbo != to->vbo)
+ dirty |= 1<<I915_IMMEDIATE_S0;
+ }
+
+ if (to->vbo == NULL)
+ dirty &= ~(1<<I915_IMMEDIATE_S0);
+
+ if (dirty) {
+ GLuint nr = count_bits(dirty);
+
+ BEGIN_BATCH( nr + 1, 0 );
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+ (dirty << 4) |
+ (nr - 1));
+
+ if (dirty & (1<<0)) {
+ OUT_RELOC(to->vbo,
+ DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+ DRM_BO_MASK_MEM | DRM_BO_FLAG_READ,
+ to->immediate[0]);
+ }
+
+ for (i = 1; i < I915_MAX_IMMEDIATE; i++) {
+ if (dirty & (1<<i)) {
+ OUT_BATCH( to->immediate[i] );
+ }
+ }
+
+ ADVANCE_BATCH();
+ }
+}
+
+
+
+
+
+/* Macro to identify whole packets that differ in any of their dwords.
+ */
+#define CHECK( idx, nr ) do { \
+ for (i = idx; i < idx+nr; i++) { \
+ if (to->dynamic[i] != from->dynamic[i]) { \
+ dirty |= ((1<<nr)-1) << idx; \
+ size += nr; \
+ break; \
+ } \
+ } \
+} while (0)
+
+
+static void emit_dynamic_indirect( struct intel_context *intel,
+ const struct i915_state *from,
+ const struct i915_state *to )
+{
+ GLuint size, offset, pagetop, flags;
+ GLuint dirty = 0, flag = 0;
+ GLuint i;
+
+ /* Lost context?
+ */
+ if (from->id != to->id) {
+ dirty = (1 << I915_MAX_DYNAMIC) - 1;
+ flag = DIS0_BUFFER_RESET;
+ size = I915_MAX_DYNAMIC;
+ }
+ else {
+ /* Otherwise, compare the two states
+ */
+ CHECK( I915_DYNAMIC_MODES4, 1 );
+ CHECK( I915_DYNAMIC_DEPTHSCALE_0, 2 );
+ CHECK( I915_DYNAMIC_IAB, 1 );
+ CHECK( I915_DYNAMIC_BC_0, 2 );
+ CHECK( I915_DYNAMIC_BFO_0, 2 );
+ CHECK( I915_DYNAMIC_STP_0, 2 );
+ }
+
+
+ offset = intel->batch->segment_finish_offset[SEGMENT_DYNAMIC_INDIRECT];
+ pagetop = ALIGN(offset, 4096);
+ flags = DIS0_BUFFER_VALID;
+
+ /* check if we cross a 4k boundary and if so pad to 4k and emit
+ * full state.
+ */
+ if (pagetop != ALIGN(offset + size * 4, 4096))
+ {
+ dirty = (1<<I915_MAX_DYNAMIC)-1;
+ size = I915_MAX_DYNAMIC;
+ memset(intel->batch->map + offset, 0, pagetop - offset);
+ offset = pagetop;
+ }
+
+ /* Emit:
+ */
+ if (0) {
+ GLuint segment = SEGMENT_DYNAMIC_INDIRECT;
+ GLuint *ptr;
+
+ /* Emit the "load state" command,
+ */
+ BEGIN_BATCH(2,0);
+ OUT_BATCH( _3DSTATE_LOAD_INDIRECT | LI0_STATE_DYNAMIC_INDIRECT | (1<<14) | 0);
+ OUT_RELOC( intel->batch->buffer,
+ DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE,
+ DRM_BO_MASK_MEM | DRM_BO_FLAG_EXE,
+ ((offset + size - 4) | DIS0_BUFFER_VALID | flag) );
+ ADVANCE_BATCH();
+
+ /* XXX:
+ */
+ assert( offset + size < intel->batch->segment_max_offset[segment]);
+ intel->batch->segment_finish_offset[segment] += size;
+
+ ptr = (GLuint *)(intel->batch->map + offset);
+
+ /* Finally emit the state:
+ */
+ for (i = 0; i < I915_MAX_DYNAMIC; i++) {
+ if (dirty & (1<<i)) {
+ *ptr++ = to->dynamic[i];
+ }
+ }
+ }
+}
+
+
+
+static void emit_cached_indirect( struct intel_context *intel,
+ const struct i915_state *from,
+ const struct i915_state *to )
+{
+ GLuint flag = 0;
+ GLuint dirty = 0;
+ GLuint i;
+
+ if (from->id != to->id) {
+ dirty = (1<<I915_MAX_CACHE) - 1;
+ dirty &= ~(1<<I915_CACHE_ZERO); /* clear out placeholder */
+ flag = SIS0_FORCE_LOAD;
+ }
+ else {
+ /* Checking the offsets is sufficient - no need to examine sizes as
+ * they don't change independently of offsets.
+ */
+ for (i = 0; i < I915_MAX_CACHE; i++) {
+ if (from->offsets[i] != to->offsets[i]) {
+ dirty |= 1<<i;
+ }
+ }
+ }
+
+ for (i = 0; i < I915_MAX_CACHE; i++) {
+ if (to->sizes[i] == 0)
+ dirty &= ~(1<<i);
+ }
+
+
+
+ /* Emit the load indirect packet. The actual data has already been
+ * emitted to the caches.
+ */
+ if (dirty) {
+ GLuint nr = count_bits(dirty);
+ GLuint size = nr * 2 + 1;
+ BEGIN_BATCH(2,0);
+ OUT_BATCH( _3DSTATE_LOAD_INDIRECT | (dirty<<8) | (1<<14) | (size - 2));
+
+ for (i = 0; i < I915_MAX_CACHE; i++) {
+ if (dirty & (1<<i)) {
+ OUT_RELOC( intel->batch->buffer,
+ DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE,
+ DRM_BO_MASK_MEM | DRM_BO_FLAG_EXE,
+ ( to->offsets[i] | flag | SIS0_BUFFER_VALID ) );
+
+ OUT_BATCH( to->sizes[i] );
+ }
+ }
+
+ ADVANCE_BATCH();
+ }
+}
+
+
+static void state_differencer( struct intel_context *intel )
+{
+ struct i915_context *i915 = i915_context( &intel->ctx );
+ const struct i915_state *new = &i915->current;
+ struct i915_state *old = &i915->hardware;
+ GLuint flags = intel->state.dirty.intel;
+
+ if (flags & I915_NEW_IMMEDIATE)
+ emit_immediates( intel, old, new );
+
+ if (flags & I915_NEW_DYNAMIC_INDIRECT)
+ emit_dynamic_indirect( intel, old, new );
+
+ if (flags & I915_NEW_CACHED_INDIRECT)
+ emit_cached_indirect( intel, old, new );
+
+ memcpy(old, new, sizeof(*new));
+}
+
+const struct intel_tracked_state i915_state_differencer = {
+ .dirty = {
+ .mesa = 0,
+ .intel = (I915_NEW_DYNAMIC_INDIRECT |
+ I915_NEW_CACHED_INDIRECT |
+ I915_NEW_IMMEDIATE |
+ INTEL_NEW_CONTEXT),
+ .extra = 0
+ },
+ .update = state_differencer
+};
+
+
+
diff --git a/src/mesa/drivers/dri/i915tex/i915_state.c b/src/mesa/drivers/dri/i915tex/i915_state.c
index 237e30f816..69ef918e3b 100644
--- a/src/mesa/drivers/dri/i915tex/i915_state.c
+++ b/src/mesa/drivers/dri/i915tex/i915_state.c
@@ -63,7 +63,7 @@ const struct intel_tracked_state *atoms[] =
*/
&i915_vertex_format,
- /* Immediate state. Don't make any effort to combine packets yet.
+ /* Immediate state.
*/
&i915_upload_S0S1,
&i915_upload_S2S4,
@@ -71,29 +71,30 @@ const struct intel_tracked_state *atoms[] =
&i915_upload_S6,
&i915_upload_S7,
- /* Dynamic indirect. Packets are combined in a final step.
+ /* Dynamic indirect.
*/
&i915_upload_BFO,
&i915_upload_BLENDCOLOR,
&i915_upload_DEPTHSCALE,
-/* &i915_upload_FOGCOLOR, */
-/* &i915_upload_FOGMODE, */
&i915_upload_IAB,
&i915_upload_MODES4,
- &i915_upload_dynamic_indirect,
-
- /* Static indirect state.
- */
- &i915_upload_invarient,
- &i915_upload_buffers,
- &i915_upload_scissor,
- &i915_upload_stipple,
+ &i915_upload_STIPPLE,
/* Other indirect state. Also includes program state, above.
*/
&i915_upload_maps, /* must do before samplers */
&i915_upload_samplers,
- &i915_upload_constants /* will be patched out at runtime */
+ &i915_upload_constants, /* will be patched out at runtime */
+ &i915_upload_static,
+
+
+ /* Combine packets, diff against hardware state and emit a minimal
+ * set of changes:
+ *
+ * XXX: Could delay this and only make this final step at the point
+ * where the first triangle gets drawn.
+ */
+ &i915_state_differencer,
};
@@ -115,6 +116,9 @@ void i915_init_state( struct i915_context *i915 )
_mesa_memcpy(&i915->constants.tracked_state,
&i915_upload_constants,
sizeof(i915_upload_constants));
+
+ i915->hardware.id = 0;
+ i915->current.id = 1;
}
diff --git a/src/mesa/drivers/dri/i915tex/i915_state.h b/src/mesa/drivers/dri/i915tex/i915_state.h
index ca625bf3ff..1d430f9a9c 100644
--- a/src/mesa/drivers/dri/i915tex/i915_state.h
+++ b/src/mesa/drivers/dri/i915tex/i915_state.h
@@ -61,21 +61,20 @@ const struct intel_tracked_state i915_upload_FOGCOLOR;
const struct intel_tracked_state i915_upload_FOGMODE;
const struct intel_tracked_state i915_upload_IAB;
const struct intel_tracked_state i915_upload_MODES4;
+const struct intel_tracked_state i915_upload_STIPPLE;
const struct intel_tracked_state i915_upload_dynamic_indirect;
-/* Static indirect:
- */
-const struct intel_tracked_state i915_upload_invarient;
-const struct intel_tracked_state i915_upload_buffers;
-const struct intel_tracked_state i915_upload_scissor;
-const struct intel_tracked_state i915_upload_stipple;
-
/* Other indirect:
*/
const struct intel_tracked_state i915_upload_constants;
const struct intel_tracked_state i915_upload_program;
const struct intel_tracked_state i915_upload_maps;
const struct intel_tracked_state i915_upload_samplers;
+const struct intel_tracked_state i915_upload_static;
+
+/* Perform state differencing and update hardware:
+ */
+const struct intel_tracked_state i915_state_differencer;
static INLINE GLuint
@@ -108,5 +107,10 @@ static INLINE GLint S_FIXED(GLfloat value, GLuint frac_bits)
return value * (1<<frac_bits);
}
+static INLINE GLuint ALIGN(GLuint x, GLuint align)
+{
+ return (x + align - 1) & ~(align - 1);
+}
+
#endif
diff --git a/src/mesa/drivers/dri/i915tex/i915_state_dynamic.c b/src/mesa/drivers/dri/i915tex/i915_state_dynamic.c
index e485184536..0ceace203d 100644
--- a/src/mesa/drivers/dri/i915tex/i915_state_dynamic.c
+++ b/src/mesa/drivers/dri/i915tex/i915_state_dynamic.c
@@ -58,22 +58,21 @@
* state every time. Next would be to diff against previous, but note
*/
-static void set_dynamic_indirect( struct intel_context *intel,
- GLuint offset,
- const GLuint *src,
- GLuint size )
+static inline void set_dynamic_indirect( struct intel_context *intel,
+ GLuint offset,
+ const GLuint *src,
+ GLuint size )
{
#if 1
struct i915_context *i915 = i915_context( &intel->ctx );
- GLuint *dest = i915->dyn_indirect.buf + offset;
- GLuint i;
+ GLuint *dst = i915->current.dynamic + offset;
+
+ if (memcmp(dst, src, size * 4) == 0)
+ return;
+
+ intel->state.dirty.intel |= I915_NEW_DYNAMIC_INDIRECT;
+ memcpy(dst, src, size * 4);
- for (i = 0; i < size; i++) {
- if (dest[i] != src[i]) {
- dest[i] = src[i];
- intel->state.dirty.intel |= I915_NEW_DYNAMIC_INDIRECT;
- }
- }
#else
GLuint i;
BEGIN_BATCH(size, 0);
@@ -92,7 +91,7 @@ static void upload_MODES4( struct intel_context *intel )
GLuint modes4 = 0;
/* _NEW_STENCIL */
- if (intel->state.Stencil->Enabled) {
+ if (1 || intel->state.Stencil->Enabled) {
GLint testmask = intel->state.Stencil->ValueMask[0] & 0xff;
GLint writemask = intel->state.Stencil->WriteMask[0] & 0xff;
@@ -104,7 +103,7 @@ static void upload_MODES4( struct intel_context *intel )
}
/* _NEW_COLOR */
- if (intel->state.Color->_LogicOpEnabled) {
+ if (1 || intel->state.Color->_LogicOpEnabled) {
modes4 |= (_3DSTATE_MODES_4_CMD |
ENABLE_LOGIC_OP_FUNC |
LOGIC_OP_FUNC(intel_translate_logic_op(intel->state.Color->LogicOp)));
@@ -141,7 +140,7 @@ static void upload_BFO( struct intel_context *intel )
/* _NEW_STENCIL
*/
- if (intel->state.Stencil->Enabled) {
+ if (1 || intel->state.Stencil->Enabled) {
if (intel->state.Stencil->TestTwoSide) {
GLint test = intel_translate_compare_func(intel->state.Stencil->Function[1]);
GLint fop = intel_translate_stencil_op(intel->state.Stencil->FailFunc[1]);
@@ -210,7 +209,7 @@ static void upload_BLENDCOLOR( struct intel_context *intel )
/* _NEW_COLOR
*/
- if (intel->state.Color->BlendEnabled) {
+ if (1 || intel->state.Color->BlendEnabled) {
const GLfloat *color = intel->state.Color->BlendColor;
GLubyte r, g, b, a;
@@ -221,7 +220,6 @@ static void upload_BLENDCOLOR( struct intel_context *intel )
bc[0] = (_3DSTATE_CONST_BLEND_COLOR_CMD);
bc[1] = (a << 24) | (r << 16) | (g << 8) | b;
-
}
set_dynamic_indirect( intel,
@@ -247,7 +245,7 @@ static void upload_IAB( struct intel_context *intel )
{
GLuint iab = 0;
- if (intel->state.Color->BlendEnabled) {
+ if (1 || intel->state.Color->BlendEnabled) {
GLuint eqRGB = intel->state.Color->BlendEquationRGB;
GLuint eqA = intel->state.Color->BlendEquationA;
GLuint srcRGB = intel->state.Color->BlendSrcRGB;
@@ -312,7 +310,7 @@ static void upload_DEPTHSCALE( struct intel_context *intel )
memset( ds, 0, sizeof(ds) );
- if (intel->state.Polygon->OffsetFill) {
+ if (1 || intel->state.Polygon->OffsetFill) {
ds[0].u = (_3DSTATE_DEPTH_OFFSET_SCALE);
ds[1].f = 0; /* XXX */
@@ -334,87 +332,80 @@ const struct intel_tracked_state i915_upload_DEPTHSCALE = {
.update = upload_DEPTHSCALE
};
-/***********************************************************************
- * Do the group emit in a single packet.
- */
-
-#define CHECK( idx, nr ) do { \
- if (i915->dyn_indirect.buf[idx] != 0) { \
- GLint i; \
- for (i = 0; i < nr; i++) \
- buf[count++] = i915->dyn_indirect.buf[idx+i]; \
- } \
-} while (0)
+/***********************************************************************
+ * Polygon stipple
+ *
+ * The i915 supports a 4x4 stipple natively, GL wants 32x32.
+ * Fortunately stipple is usually a repeating pattern.
+ *
+ * XXX: does stipple pattern need to be adjusted according to
+ * the window position?
+ *
+ * XXX: possibly need workaround for conform paths test.
+ */
-static void emit_indirect( struct intel_context *intel )
+static void upload_stipple( struct intel_context *intel )
{
- struct i915_context *i915 = i915_context( &intel->ctx );
- GLboolean active;
- GLuint i;
-
- /* XXX: need to check if we wrap 4kb and if so pad.
- */
-/* GLuint buf[I915_DYNAMIC_SIZE], count = 0; */
-/* CHECK( I915_DYNAMIC_MODES4, 1 ); */
-/* CHECK( I915_DYNAMIC_DEPTHSCALE_0, 2 ); */
-/* CHECK( I915_DYNAMIC_IAB, 1 ); */
-/* CHECK( I915_DYNAMIC_BC_0, 2 ); */
-/* CHECK( I915_DYNAMIC_BFO_0, 2 ); */
-
+ GLboolean hw_stipple_fallback = 0;
+ GLuint st[2];
-
- /* Or just emit the whole lot, zeros and all (fix later...):
- */
- for (active = 0, i = 0; i < I915_DYNAMIC_SIZE; i++)
- if (i915->dyn_indirect.buf[i] != 0) {
- active = 1;
- break;
- }
-
-
-
- /* Also - want to check that something has changed & we're not just
- * re-emitting the same stuff.
+ st[0] = _3DSTATE_STIPPLE;
+ st[1] = 0;
+
+ /* _NEW_POLYGON, INTEL_NEW_REDUCED_PRIMITIVE
*/
- if (active) {
- GLuint size = I915_DYNAMIC_SIZE * 4;
- GLuint flag = i915->dyn_indirect.done_reset ? 0 : DIS0_BUFFER_RESET;
- GLuint segment = SEGMENT_DYNAMIC_INDIRECT;
- GLuint offset = intel->batch->segment_finish_offset[segment];
-
- i915->dyn_indirect.done_reset = 1;
-
- BEGIN_BATCH(2,0);
- OUT_BATCH( _3DSTATE_LOAD_INDIRECT | LI0_STATE_DYNAMIC_INDIRECT | (1<<14) | 0);
- OUT_RELOC( intel->batch->buffer,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE,
- DRM_BO_MASK_MEM | DRM_BO_FLAG_EXE,
- ((offset + size - 4) | DIS0_BUFFER_VALID | flag) );
- ADVANCE_BATCH();
-
- /* XXX:
- */
- assert( offset + size < intel->batch->segment_max_offset[segment]);
- intel->batch->segment_finish_offset[segment] += size;
+ if (intel->state.Polygon->StippleFlag &&
+ intel->reduced_primitive == GL_TRIANGLES) {
- /* Just emit the original buffer, zeros and all as this will
- * avoid wrapping issues. This is usually not emitted at all,
- * so not urgent to fix:
+ /* _NEW_POLYGONSTIPPLE
*/
- memcpy(intel->batch->map + offset, i915->dyn_indirect.buf, size );
+ const GLubyte *mask = (const GLubyte *)intel->state.PolygonStipple;
+ GLubyte p[4];
+ GLint i, j, k;
+
+ p[0] = mask[12] & 0xf;
+ p[0] |= p[0] << 4;
+ p[1] = mask[8] & 0xf;
+ p[1] |= p[1] << 4;
+ p[2] = mask[4] & 0xf;
+ p[2] |= p[2] << 4;
+ p[3] = mask[0] & 0xf;
+ p[3] |= p[3] << 4;
+
+ st[1] |= ST1_ENABLE;
+ st[1] |= (((p[0] & 0xf) << 0) |
+ ((p[1] & 0xf) << 4) |
+ ((p[2] & 0xf) << 8) |
+ ((p[3] & 0xf) << 12));
+
+ for (k = 0; k < 8; k++) {
+ for (j = 3; j >= 0; j--) {
+ for (i = 0; i < 4; i++, mask++) {
+ if (*mask != p[j]) {
+ hw_stipple_fallback = 1;
+ st[1] = 0;
+ }
+ }
+ }
+ }
}
+
+ assert(!hw_stipple_fallback); /* TODO */
+
+ set_dynamic_indirect( intel,
+ I915_DYNAMIC_STP_0,
+ &st[0],
+ 2 );
}
-const struct intel_tracked_state i915_upload_dynamic_indirect = {
+
+const struct intel_tracked_state i915_upload_stipple = {
.dirty = {
- .mesa = 0,
- .intel = I915_NEW_DYNAMIC_INDIRECT,
+ .mesa = _NEW_POLYGONSTIPPLE, _NEW_POLYGON,
+ .intel = INTEL_NEW_REDUCED_PRIMITIVE,
.extra = 0
},
- .update = emit_indirect
+ .update = upload_stipple
};
-
-
-
diff --git a/src/mesa/drivers/dri/i915tex/i915_state_immediate.c b/src/mesa/drivers/dri/i915tex/i915_state_immediate.c
index 495f8fe903..d032a1cf7c 100644
--- a/src/mesa/drivers/dri/i915tex/i915_state_immediate.c
+++ b/src/mesa/drivers/dri/i915tex/i915_state_immediate.c
@@ -57,35 +57,34 @@
*/
static void upload_S0S1( struct intel_context *intel )
{
+ struct i915_context *i915 = i915_context( &intel->ctx );
+ GLuint S0, S1;
- /* INTEL_NEW_VBO */
- if (intel->state.vbo) {
-
- BEGIN_BATCH(3, 0);
-
- OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
- I1_LOAD_S(0) |
- I1_LOAD_S(1) |
- 1);
-
- /* INTEL_NEW_VBO, INTEL_NEW_RELOC */
- OUT_RELOC(intel->state.vbo,
- DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
- DRM_BO_MASK_MEM | DRM_BO_FLAG_READ,
- intel->state.vbo_offset);
+ /* INTEL_NEW_VBO
+ */
+ S0 = intel->state.vbo_offset;
- /* INTEL_NEW_VERTEX_SIZE */
- OUT_BATCH((intel->vertex_size << 24) |
- (intel->vertex_size << 16));
+ /* INTEL_NEW_VERTEX_SIZE -- do this where the vertex size is calculated!
+ */
+ S1 = ((intel->vertex_size << 24) |
+ (intel->vertex_size << 16));
- ADVANCE_BATCH();
+ /* INTEL_NEW_VBO */
+ if (i915->current.vbo != intel->state.vbo ||
+ i915->current.immediate[I915_IMMEDIATE_S0] != S0 ||
+ i915->current.immediate[I915_IMMEDIATE_S1] != S1)
+ {
+ i915->current.vbo = intel->state.vbo;
+ i915->current.immediate[I915_IMMEDIATE_S0] = S0;
+ i915->current.immediate[I915_IMMEDIATE_S1] = S1;
+ intel->state.dirty.intel |= I915_NEW_IMMEDIATE;
}
}
const struct intel_tracked_state i915_upload_S0S1 = {
.dirty = {
.mesa = 0,
- .intel = INTEL_NEW_VBO | INTEL_NEW_VERTEX_SIZE | INTEL_NEW_FENCE,
+ .intel = INTEL_NEW_VBO | INTEL_NEW_VERTEX_SIZE,
.extra = 0
},
.update = upload_S0S1
@@ -159,16 +158,14 @@ static void upload_S2S4(struct intel_context *intel)
S4_FLATSHADE_SPECULAR);
}
-
- BEGIN_BATCH(3, 0);
-
- OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
- I1_LOAD_S(2) |
- I1_LOAD_S(4) |
- 1);
- OUT_BATCH(LIS2);
- OUT_BATCH(LIS4);
- ADVANCE_BATCH();
+
+ if (LIS2 != i915->current.immediate[I915_IMMEDIATE_S2] ||
+ LIS4 != i915->current.immediate[I915_IMMEDIATE_S4]) {
+
+ i915->current.immediate[I915_IMMEDIATE_S2] = LIS2;
+ i915->current.immediate[I915_IMMEDIATE_S4] = LIS4;
+ intel->state.dirty.intel |= I915_NEW_IMMEDIATE;
+ }
}
@@ -193,6 +190,7 @@ const struct intel_tracked_state i915_upload_S2S4 = {
*/
static void upload_S5( struct intel_context *intel )
{
+ struct i915_context *i915 = i915_context( &intel->ctx );
GLuint LIS5 = 0;
/* _NEW_STENCIL */
@@ -243,18 +241,15 @@ static void upload_S5( struct intel_context *intel )
LIS5 |= S5_WRITEDISABLE_ALPHA;
}
- BEGIN_BATCH(2, 0);
- OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
- I1_LOAD_S(5) |
- 0);
- OUT_BATCH(LIS5);
- ADVANCE_BATCH();
-
+ if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) {
+ i915->current.immediate[I915_IMMEDIATE_S5] = LIS5;
+ intel->state.dirty.intel |= I915_NEW_IMMEDIATE;
+ }
}
const struct intel_tracked_state i915_upload_S5 = {
.dirty = {
- .mesa = (_NEW_STENCIL | _NEW_COLOR),
+ .mesa = (_NEW_STENCIL | _NEW_COLOR | _NEW_POLYGON),
.intel = 0,
.extra = 0
},
@@ -266,19 +261,19 @@ const struct intel_tracked_state i915_upload_S5 = {
*/
static void upload_S6( struct intel_context *intel )
{
+ struct i915_context *i915 = i915_context( &intel->ctx );
GLuint LIS6 = (S6_COLOR_WRITE_ENABLE |
(2 << S6_TRISTRIP_PV_SHIFT));
/* _NEW_COLOR
*/
- if (1) {
+ if (intel->state.Color->AlphaEnabled) {
int test = intel_translate_compare_func(intel->state.Color->AlphaFunc);
GLubyte refByte;
CLAMPED_FLOAT_TO_UBYTE(refByte, intel->state.Color->AlphaRef);
- if (intel->state.Color->AlphaEnabled)
- LIS6 |= S6_ALPHA_TEST_ENABLE;
+ LIS6 |= S6_ALPHA_TEST_ENABLE;
LIS6 |= ((test << S6_ALPHA_TEST_FUNC_SHIFT) |
(((GLuint) refByte) << S6_ALPHA_REF_SHIFT));
@@ -286,8 +281,9 @@ static void upload_S6( struct intel_context *intel )
/* _NEW_COLOR
*/
- if (1) {
-
+ if (intel->state.Color->BlendEnabled &&
+ !STATE_LOGICOP_ENABLED(&intel->state))
+ {
GLuint eqRGB = intel->state.Color->BlendEquationRGB;
GLuint srcRGB = intel->state.Color->BlendSrcRGB;
GLuint dstRGB = intel->state.Color->BlendDstRGB;
@@ -296,9 +292,7 @@ static void upload_S6( struct intel_context *intel )
srcRGB = dstRGB = GL_ONE;
}
- if (intel->state.Color->BlendEnabled &&
- !STATE_LOGICOP_ENABLED(&intel->state))
- LIS6 |= S6_CBUF_BLEND_ENABLE;
+ LIS6 |= S6_CBUF_BLEND_ENABLE;
LIS6 |= (SRC_BLND_FACT(intel_translate_blend_factor(srcRGB)) |
DST_BLND_FACT(intel_translate_blend_factor(dstRGB)) |
@@ -307,25 +301,20 @@ static void upload_S6( struct intel_context *intel )
/* _NEW_DEPTH
*/
- if (1) {
+ if (intel->state.Depth->Test) {
GLint func = intel_translate_compare_func(intel->state.Depth->Func);
LIS6 |= func << S6_DEPTH_TEST_FUNC_SHIFT;
- if (intel->state.Depth->Test) {
- LIS6 |= S6_DEPTH_TEST_ENABLE;
- if (intel->state.Depth->Mask)
- LIS6 |= S6_DEPTH_WRITE_ENABLE;
- }
+ LIS6 |= S6_DEPTH_TEST_ENABLE;
+ if (intel->state.Depth->Mask)
+ LIS6 |= S6_DEPTH_WRITE_ENABLE;
}
- BEGIN_BATCH(2, 0);
- OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
- I1_LOAD_S(6) |
- 0);
- OUT_BATCH(LIS6);
- ADVANCE_BATCH();
-
+ if (LIS6 != i915->current.immediate[I915_IMMEDIATE_S6]) {
+ i915->current.immediate[I915_IMMEDIATE_S6] = LIS6;
+ intel->state.dirty.intel |= I915_NEW_IMMEDIATE;
+ }
}
const struct intel_tracked_state i915_upload_S6 = {
@@ -342,6 +331,7 @@ const struct intel_tracked_state i915_upload_S6 = {
*/
static void upload_S7( struct intel_context *intel )
{
+ struct i915_context *i915 = i915_context( &intel->ctx );
GLfloat LIS7;
/* _NEW_POLYGON
@@ -349,13 +339,10 @@ static void upload_S7( struct intel_context *intel )
/* LIS7 = intel->state.Polygon->OffsetUnits * DEPTH_SCALE; */
LIS7 = 0;
- BEGIN_BATCH(2, 0);
- OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
- I1_LOAD_S(7) |
- 0);
- OUT_BATCH_F(LIS7);
- ADVANCE_BATCH();
-
+ if (LIS7 != i915->current.immediate[I915_IMMEDIATE_S7]) {
+ i915->current.immediate[I915_IMMEDIATE_S7] = LIS7;
+ intel->state.dirty.intel |= I915_NEW_IMMEDIATE;
+ }
}
const struct intel_tracked_state i915_upload_S7 = {
diff --git a/src/mesa/drivers/dri/i915tex/i915_state_static.c b/src/mesa/drivers/dri/i915tex/i915_state_static.c
index ebbcfd7b8b..32131b7296 100644
--- a/src/mesa/drivers/dri/i915tex/i915_state_static.c
+++ b/src/mesa/drivers/dri/i915tex/i915_state_static.c
@@ -56,32 +56,103 @@
*/
-/***********************************************************************
- * Depthbuffer - currently constant, but rotation would change that.
- */
+static GLuint invarient_state[] = {
+
+ (_3DSTATE_AA_CMD |
+ AA_LINE_ECAAR_WIDTH_ENABLE |
+ AA_LINE_ECAAR_WIDTH_1_0 |
+ AA_LINE_REGION_WIDTH_ENABLE |
+ AA_LINE_REGION_WIDTH_1_0),
+
+#if 1
+ /* Could use these to reduce the size of vertices when the
+ * incoming array is constant. For now these are don't care
+ * items - maybe don't bother about setting them??
+ */
+ (_3DSTATE_DFLT_DIFFUSE_CMD),
+ (0),
+
+ (_3DSTATE_DFLT_SPEC_CMD),
+ (0),
+
+ (_3DSTATE_DFLT_Z_CMD),
+ (0),
+#endif
+
+ /* We support texture crossbar via the fragment shader, rather than
+ * with this mechanism.
+ */
+ (_3DSTATE_COORD_SET_BINDINGS |
+ CSB_TCB(0, 0) |
+ CSB_TCB(1, 1) |
+ CSB_TCB(2, 2) |
+ CSB_TCB(3, 3) |
+ CSB_TCB(4, 4) |
+ CSB_TCB(5, 5) |
+ CSB_TCB(6, 6) |
+ CSB_TCB(7, 7)),
+
+ /* Setup OpenGL rasterization state:
+ */
+ (_3DSTATE_RASTER_RULES_CMD |
+ ENABLE_POINT_RASTER_RULE |
+ OGL_POINT_RASTER_RULE |
+ ENABLE_LINE_STRIP_PROVOKE_VRTX |
+ ENABLE_TRI_FAN_PROVOKE_VRTX |
+ LINE_STRIP_PROVOKE_VRTX(1) |
+ TRI_FAN_PROVOKE_VRTX(2) |
+ ENABLE_TEXKILL_3D_4D |
+ TEXKILL_4D),
+
+
+ /* For private depth buffers but shared color buffers, eg
+ * front-buffer rendering with a private depthbuffer. We don't do
+ * this.
+ */
+ (_3DSTATE_DEPTH_SUBRECT_DISABLE),
+};
+
-static void upload_buffers(struct intel_context *intel)
+/*
+ */
+static void upload_static(struct intel_context *intel)
{
struct i915_context *i915 = i915_context( &intel->ctx );
struct intel_region *color_region = intel->state.draw_region;
struct intel_region *depth_region = intel->state.depth_region;
+ struct i915_cache_packet packet;
+ GLuint i;
GLuint dwords = ((color_region ? 3 : 0) +
(depth_region ? 3 : 0) +
- 2);
+ 2 + /* DV */
+ 4 + /* SCISSOR */
+ Elements(invarient_state));
GLuint relocs = ((color_region ? 1 : 0) +
(depth_region ? 1 : 0));
- struct i915_cache_packet packet;
- packet_init( &packet, I915_CACHE_BUFFERS, dwords, relocs );
+ packet_init( &packet, I915_CACHE_STATIC, dwords, relocs );
+
+ /***********************************************************************
+ * Misc invarient state packets
+ */
+ for (i = 0; i < Elements(invarient_state); i++)
+ packet_dword( &packet, invarient_state[i] );
+
+
+ /***********************************************************************
+ * Buffers
+ */
if (color_region) {
packet_dword( &packet, _3DSTATE_BUF_INFO_CMD );
packet_dword( &packet, BUF_3D_ID_COLOR_BACK |
BUF_3D_PITCH(color_region->pitch * color_region->cpp) |
+/* BUF_3D_TILED_SURFACE | */
+/* BUF_3D_TILE_WALK_X | */
BUF_3D_USE_FENCE);
packet_reloc( &packet, color_region->buffer,
@@ -94,6 +165,8 @@ static void upload_buffers(struct intel_context *intel)
packet_dword( &packet, _3DSTATE_BUF_INFO_CMD );
packet_dword( &packet, BUF_3D_ID_DEPTH |
BUF_3D_PITCH(depth_region->pitch * depth_region->cpp) |
+/* BUF_3D_TILED_SURFACE | */
+/* BUF_3D_TILE_WALK_X | */
BUF_3D_USE_FENCE );
packet_reloc( &packet, depth_region->buffer,
DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
@@ -102,124 +175,32 @@ static void upload_buffers(struct intel_context *intel)
}
+ /* This might become dynamic state if it turns out adjusting the
+ * bias values can cure our point-rendering woes.
+ */
packet_dword( &packet,_3DSTATE_DST_BUF_VARS_CMD);
- packet_dword( &packet, DSTORG_HORT_BIAS(0x8) | /* .5 */
- DSTORG_VERT_BIAS(0x8) | /* .5 */
- LOD_PRECLAMP_OGL |
- TEX_DEFAULT_COLOR_OGL |
- DITHER_FULL_ALWAYS |
- (color_region && color_region->cpp == 4
- ? DV_PF_8888
- : DV_PF_565) |
- (depth_region && depth_region->cpp == 4
- ? DEPTH_FRMT_24_FIXED_8_OTHER
- : DEPTH_FRMT_16_FIXED) );
-
- i915_cache_emit( i915->cctx, &packet );
-}
-
-const struct intel_tracked_state i915_upload_buffers = {
- .dirty = {
- .mesa = 0,
- .intel = INTEL_NEW_CBUF | INTEL_NEW_ZBUF | INTEL_NEW_FENCE,
- .extra = 0
- },
- .update = upload_buffers
-};
-
-
-
-/***********************************************************************
- * Polygon stipple
- *
- * The i915 supports a 4x4 stipple natively, GL wants 32x32.
- * Fortunately stipple is usually a repeating pattern.
- *
- * XXX: does stipple pattern need to be adjusted according to
- * the window position?
- *
- * XXX: possibly need workaround for conform paths test.
- */
-
-static void upload_stipple( struct intel_context *intel )
-{
- struct i915_context *i915 = i915_context( &intel->ctx );
- GLuint st0 = _3DSTATE_STIPPLE;
- GLuint st1 = 0;
-
- GLboolean hw_stipple_fallback = 0;
-
- /* _NEW_POLYGON, INTEL_NEW_REDUCED_PRIMITIVE
+ packet_dword( &packet, (DSTORG_HORT_BIAS(0x8) | /* .5 */
+ DSTORG_VERT_BIAS(0x8) | /* .5 */
+ LOD_PRECLAMP_OGL |
+ TEX_DEFAULT_COLOR_OGL |
+ DITHER_FULL_ALWAYS |
+ (color_region && color_region->cpp == 4
+ ? DV_PF_8888
+ : DV_PF_565) |
+ (depth_region && depth_region->cpp == 4
+ ? DEPTH_FRMT_24_FIXED_8_OTHER
+ : DEPTH_FRMT_16_FIXED)) );
+
+
+ /***********************************************************************
+ * Scissor.
+ *
+ * Is it static or dynamic??? It is not understood by the hardware
+ * binner, so if we ever implement HWZ, it would be static under that
+ * scheme, or somehow not handled, or perhaps we would have to
+ * manually clip primitives to the scissor region. For now, we call
+ * it static.
*/
- if (intel->state.Polygon->StippleFlag &&
- intel->reduced_primitive == GL_TRIANGLES) {
-
- /* _NEW_POLYGONSTIPPLE
- */
- const GLubyte *mask = (const GLubyte *)intel->state.PolygonStipple;
- GLubyte p[4];
- GLint i, j, k;
-
- p[0] = mask[12] & 0xf;
- p[0] |= p[0] << 4;
- p[1] = mask[8] & 0xf;
- p[1] |= p[1] << 4;
- p[2] = mask[4] & 0xf;
- p[2] |= p[2] << 4;
- p[3] = mask[0] & 0xf;
- p[3] |= p[3] << 4;
-
- st1 |= ST1_ENABLE;
-
- for (k = 0; k < 8; k++) {
- for (j = 3; j >= 0; j--) {
- for (i = 0; i < 4; i++, mask++) {
- if (*mask != p[j]) {
- hw_stipple_fallback = 1;
- st1 &= ~ST1_ENABLE;
- }
- }
- }
- }
-
- st1 |= (((p[0] & 0xf) << 0) |
- ((p[1] & 0xf) << 4) |
- ((p[2] & 0xf) << 8) |
- ((p[3] & 0xf) << 12));
- }
-
- assert(!hw_stipple_fallback); /* TODO */
-
- {
- struct i915_cache_packet packet;
-
- packet_init( &packet, I915_CACHE_STIPPLE, 2, 0 );
- packet_dword( &packet,st0);
- packet_dword( &packet,st1);
- i915_cache_emit( i915->cctx, &packet );
- }
-}
-
-
-const struct intel_tracked_state i915_upload_stipple = {
- .dirty = {
- .mesa = _NEW_POLYGONSTIPPLE, _NEW_POLYGON,
- .intel = INTEL_NEW_REDUCED_PRIMITIVE,
- .extra = 0
- },
- .update = upload_stipple
-};
-
-
-
-/***********************************************************************
- * Scissor.
- */
-
-static void upload_scissor( struct intel_context *intel )
-{
- struct i915_context *i915 = i915_context( &intel->ctx );
- struct i915_cache_packet packet;
/* _NEW_SCISSOR, _NEW_BUFFERS
*/
@@ -253,141 +234,26 @@ static void upload_scissor( struct intel_context *intel )
x2 = CLAMP(x2, 0, intel->state.DrawBuffer->Width - 1);
y2 = CLAMP(y2, 0, intel->state.DrawBuffer->Height - 1);
- packet_init( &packet, I915_CACHE_SCISSOR, 4, 0 );
packet_dword( &packet,_3DSTATE_SCISSOR_ENABLE_CMD | ENABLE_SCISSOR_RECT);
packet_dword( &packet,_3DSTATE_SCISSOR_RECT_0_CMD);
packet_dword( &packet,(y1 << 16) | (x1 & 0xffff));
packet_dword( &packet,(y2 << 16) | (x2 & 0xffff));
- i915_cache_emit( i915->cctx, &packet );
}
else {
- packet_init( &packet, I915_CACHE_SCISSOR, 1, 0 );
packet_dword( &packet,_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
- i915_cache_emit( i915->cctx, &packet );
}
-}
-
-const struct intel_tracked_state i915_upload_scissor = {
- .dirty = {
- .mesa = _NEW_SCISSOR | _NEW_BUFFERS,
- .intel = 0,
- .extra = 0
- },
- .update = upload_scissor
-};
-
-
-
-
-/***********************************************************************
- * Misc invarient state packets
- */
-
-static void upload_invarient( struct intel_context *intel )
-{
- struct i915_context *i915 = i915_context( &intel->ctx );
- GLuint i;
-
- static GLuint invarient_state[] = {
-
- (_3DSTATE_AA_CMD |
- AA_LINE_ECAAR_WIDTH_ENABLE |
- AA_LINE_ECAAR_WIDTH_1_0 |
- AA_LINE_REGION_WIDTH_ENABLE |
- AA_LINE_REGION_WIDTH_1_0),
-
- /* Could use these to reduce the size of vertices when the incoming
- * array is constant.
- */
- (_3DSTATE_DFLT_DIFFUSE_CMD),
- (0),
-
- (_3DSTATE_DFLT_SPEC_CMD),
- (0),
-
- (_3DSTATE_DFLT_Z_CMD),
- (0),
-
- /* We support texture crossbar via the fragment shader, rather than
- * with this mechanism.
- */
- (_3DSTATE_COORD_SET_BINDINGS |
- CSB_TCB(0, 0) |
- CSB_TCB(1, 1) |
- CSB_TCB(2, 2) |
- CSB_TCB(3, 3) |
- CSB_TCB(4, 4) |
- CSB_TCB(5, 5) |
- CSB_TCB(6, 6) |
- CSB_TCB(7, 7)),
-
- /* Setup OpenGL rasterization state:
- */
- (_3DSTATE_RASTER_RULES_CMD |
- ENABLE_POINT_RASTER_RULE |
- OGL_POINT_RASTER_RULE |
- ENABLE_LINE_STRIP_PROVOKE_VRTX |
- ENABLE_TRI_FAN_PROVOKE_VRTX |
- LINE_STRIP_PROVOKE_VRTX(1) |
- TRI_FAN_PROVOKE_VRTX(2) |
- ENABLE_TEXKILL_3D_4D |
- TEXKILL_4D),
-
- /* Need to initialize this to zero.
- */
- (_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
- I1_LOAD_S(3) |
- (0)),
- (0),
-
- (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT),
- (_3DSTATE_SCISSOR_RECT_0_CMD),
- (0),
- (0),
-
-
- /* For private depth buffers but shared color buffers, eg
- * front-buffer rendering with a private depthbuffer. We don't do
- * this.
- */
- (_3DSTATE_DEPTH_SUBRECT_DISABLE),
-
- (_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0)
- };
-
- /* Disable indirect state for now.
- */
-#if 0
- BEGIN_BATCH(2, 0);
- OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
-#endif
-
- /* Will be nice if this can be preserved over several frames. I
- * guess logical contexts would do much the same thing.
- */
- {
- struct i915_cache_packet packet;
-
- packet_init( &packet, I915_CACHE_INVARIENT, sizeof(invarient_state)/4, 0);
- for (i = 0; i < sizeof(invarient_state)/4; i++)
- packet_dword( &packet, invarient_state[i] );
-
- i915_cache_emit( i915->cctx, &packet );
- }
+ i915_cache_emit( i915->cctx, &packet );
}
-const struct intel_tracked_state i915_upload_invarient = {
+
+const struct intel_tracked_state i915_upload_static = {
.dirty = {
- .mesa = 0,
- .intel = INTEL_NEW_CONTEXT, /* or less frequently? */
+ .mesa = _NEW_SCISSOR | _NEW_BUFFERS,
+ .intel = INTEL_NEW_CBUF | INTEL_NEW_ZBUF | INTEL_NEW_FENCE,
.extra = 0
},
- .update = upload_invarient
+ .update = upload_static
};
-
-
diff --git a/src/mesa/drivers/dri/i915tex/i915_vtbl.c b/src/mesa/drivers/dri/i915tex/i915_vtbl.c
index 0e0f926a31..ebc692a554 100644
--- a/src/mesa/drivers/dri/i915tex/i915_vtbl.c
+++ b/src/mesa/drivers/dri/i915tex/i915_vtbl.c
@@ -68,9 +68,15 @@ static void i915_lost_hardware( struct intel_context *intel )
{
struct i915_context *i915 = i915_context( &intel->ctx );
+ /* This is required currently as we use the batchbuffer to hold all
+ * the cached items:
+ */
i915_clear_caches( i915->cctx );
- memset(&i915->dyn_indirect, 0, sizeof(i915->dyn_indirect));
+ /* Update the batchbuffer id so the context tracker knows there has
+ * been a discontinuity.
+ */
+ i915->current.id++;
}
diff --git a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c
index 7a6285153f..3db3c93417 100644
--- a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c
@@ -242,12 +242,14 @@ do_flush_locked(struct intel_batchbuffer *batch,
r->offset, driBOOffset(r->buf), r->delta);
}
- if (INTEL_DEBUG & DEBUG_BATCH)
+ if (INTEL_DEBUG & DEBUG_BATCH)
intel_dump_batchbuffer(batch, (GLubyte *)ptr);
+
driBOUnmap(batch->buffer);
batch->map = NULL;
+
/* Throw away non-effective packets. Won't work once we have
* hardware contexts which would preserve statechanges beyond a
* single buffer.