summaryrefslogtreecommitdiff
path: root/i965
diff options
context:
space:
mode:
authorLuc Verhaegen <libv@skynet.be>2010-03-16 20:19:03 +0100
committerLuc Verhaegen <libv@skynet.be>2010-03-16 20:19:03 +0100
commit54a8e9cc3988d908b5b846a752679127cacefd3b (patch)
treeee289172f13f246903151b12db7808b7cb0651e8 /i965
parent6b263d1e2c599ddcc0ce4c84425dbc1ac8de020c (diff)
Import i915 and i965 dri drivers from mesa 7.8-rc1.7.8-rc1
Diffstat (limited to 'i965')
-rw-r--r--i965/Makefile.am14
-rw-r--r--i965/brw_cc.c5
-rw-r--r--i965/brw_clip.c26
-rw-r--r--i965/brw_clip.h1
-rw-r--r--i965/brw_clip_line.c13
-rw-r--r--i965/brw_clip_point.c1
-rw-r--r--i965/brw_clip_state.c9
-rw-r--r--i965/brw_clip_tri.c9
-rw-r--r--i965/brw_clip_unfilled.c1
-rw-r--r--i965/brw_clip_util.c16
-rw-r--r--i965/brw_context.c49
-rw-r--r--i965/brw_context.h46
-rw-r--r--i965/brw_curbe.c68
-rw-r--r--i965/brw_defines.h260
-rw-r--r--i965/brw_disasm.c3
-rw-r--r--i965/brw_draw.c23
-rw-r--r--i965/brw_draw_upload.c144
-rw-r--r--i965/brw_eu.c4
-rw-r--r--i965/brw_eu_debug.c14
-rw-r--r--i965/brw_eu_emit.c117
-rw-r--r--i965/brw_fallback.c8
-rw-r--r--i965/brw_gs.c17
-rw-r--r--i965/brw_gs.h1
-rw-r--r--i965/brw_gs_emit.c21
-rw-r--r--i965/brw_gs_state.c7
-rw-r--r--i965/brw_misc_state.c140
-rw-r--r--i965/brw_program.c11
-rw-r--r--i965/brw_queryobj.c8
-rw-r--r--i965/brw_sf.c13
-rw-r--r--i965/brw_sf_emit.c6
-rw-r--r--i965/brw_sf_state.c19
-rw-r--r--i965/brw_state.h51
-rw-r--r--i965/brw_state_batch.c10
-rw-r--r--i965/brw_state_cache.c275
-rw-r--r--i965/brw_state_dump.c1
-rw-r--r--i965/brw_state_upload.c91
-rw-r--r--i965/brw_structs.h114
-rw-r--r--i965/brw_tex_layout.c3
-rw-r--r--i965/brw_urb.c18
-rw-r--r--i965/brw_vs.c31
-rw-r--r--i965/brw_vs.h3
-rw-r--r--i965/brw_vs_emit.c300
-rw-r--r--i965/brw_vs_state.c22
-rw-r--r--i965/brw_vs_surface_state.c16
-rw-r--r--i965/brw_vtbl.c32
-rw-r--r--i965/brw_wm.c28
-rw-r--r--i965/brw_wm.h3
-rw-r--r--i965/brw_wm_debug.c68
-rw-r--r--i965/brw_wm_emit.c74
-rw-r--r--i965/brw_wm_fp.c15
-rw-r--r--i965/brw_wm_glsl.c23
-rw-r--r--i965/brw_wm_pass0.c4
-rw-r--r--i965/brw_wm_sampler_state.c29
-rw-r--r--i965/brw_wm_state.c26
-rw-r--r--i965/brw_wm_surface_state.c198
-rw-r--r--i965/gen6_cc.c296
-rw-r--r--i965/gen6_clip_state.c75
-rw-r--r--i965/gen6_depthstencil.c165
-rw-r--r--i965/gen6_gs_state.c91
-rw-r--r--i965/gen6_sampler_state.c71
-rw-r--r--i965/gen6_scissor_state.c105
-rw-r--r--i965/gen6_sf_state.c187
-rw-r--r--i965/gen6_urb.c83
-rw-r--r--i965/gen6_viewport_state.c173
-rw-r--r--i965/gen6_vs_state.c119
-rw-r--r--i965/gen6_wm_state.c160
66 files changed, 3143 insertions, 891 deletions
diff --git a/i965/Makefile.am b/i965/Makefile.am
index 8a61dab..d9c82d5 100644
--- a/i965/Makefile.am
+++ b/i965/Makefile.am
@@ -27,7 +27,6 @@ i965_dri_la_SOURCES = \
../shared/intel_pixel_draw.c \
../shared/intel_pixel_read.c \
../shared/intel_state.c \
- ../shared/intel_swapbuffers.c \
../shared/intel_syncobj.c \
../shared/intel_tex.c \
../shared/intel_tex_copy.c \
@@ -88,4 +87,15 @@ i965_dri_la_SOURCES = \
brw_wm_pass2.c \
brw_wm_sampler_state.c \
brw_wm_state.c \
- brw_wm_surface_state.c
+ brw_wm_surface_state.c \
+ gen6_cc.c \
+ gen6_clip_state.c \
+ gen6_depthstencil.c \
+ gen6_gs_state.c \
+ gen6_sampler_state.c \
+ gen6_scissor_state.c \
+ gen6_sf_state.c \
+ gen6_urb.c \
+ gen6_viewport_state.c \
+ gen6_vs_state.c \
+ gen6_wm_state.c
diff --git a/i965/brw_cc.c b/i965/brw_cc.c
index bac1c3a..fa2d394 100644
--- a/i965/brw_cc.c
+++ b/i965/brw_cc.c
@@ -34,9 +34,7 @@
#include "brw_state.h"
#include "brw_defines.h"
#include "brw_util.h"
-#include "intel_fbo.h"
#include "main/macros.h"
-#include "main/enums.h"
static void prepare_cc_vp( struct brw_context *brw )
{
@@ -295,8 +293,7 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT,
key, sizeof(*key),
&brw->cc.vp_bo, 1,
- &cc, sizeof(cc),
- NULL, NULL);
+ &cc, sizeof(cc));
/* Emit CC viewport relocation */
dri_bo_emit_reloc(bo,
diff --git a/i965/brw_clip.c b/i965/brw_clip.c
index dbd10a5..d3275c7 100644
--- a/i965/brw_clip.c
+++ b/i965/brw_clip.c
@@ -50,6 +50,7 @@
static void compile_clip_prog( struct brw_context *brw,
struct brw_clip_prog_key *key )
{
+ struct intel_context *intel = &brw->intel;
struct brw_clip_compile c;
const GLuint *program;
GLuint program_size;
@@ -65,14 +66,13 @@ static void compile_clip_prog( struct brw_context *brw,
c.func.single_program_flow = 1;
c.key = *key;
- c.need_ff_sync = BRW_IS_IGDNG(brw);
/* Need to locate the two positions present in vertex + header.
* These are currently hardcoded:
*/
c.header_position_offset = ATTR_SIZE;
- if (BRW_IS_IGDNG(brw))
+ if (intel->is_ironlake)
delta = 3 * REG_SIZE;
else
delta = REG_SIZE;
@@ -85,7 +85,7 @@ static void compile_clip_prog( struct brw_context *brw,
c.nr_attrs = brw_count_bits(c.key.attrs);
- if (BRW_IS_IGDNG(brw))
+ if (intel->is_ironlake)
c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
else
c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
@@ -130,20 +130,22 @@ static void compile_clip_prog( struct brw_context *brw,
/* Upload
*/
dri_bo_unreference(brw->clip.prog_bo);
- brw->clip.prog_bo = brw_upload_cache( &brw->cache,
- BRW_CLIP_PROG,
- &c.key, sizeof(c.key),
- NULL, 0,
- program, program_size,
- &c.prog_data,
- &brw->clip.prog_data );
+ brw->clip.prog_bo = brw_upload_cache_with_auxdata(&brw->cache,
+ BRW_CLIP_PROG,
+ &c.key, sizeof(c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ sizeof(c.prog_data),
+ &brw->clip.prog_data);
}
/* Calculate interpolants for triangle and line rasterization.
*/
static void upload_clip_prog(struct brw_context *brw)
{
- GLcontext *ctx = &brw->intel.ctx;
+ struct intel_context *intel = &brw->intel;
+ GLcontext *ctx = &intel->ctx;
struct brw_clip_prog_key key;
memset(&key, 0, sizeof(key));
@@ -160,7 +162,7 @@ static void upload_clip_prog(struct brw_context *brw)
/* _NEW_TRANSFORM */
key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
- if (BRW_IS_IGDNG(brw))
+ if (intel->is_ironlake)
key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
else
key.clip_mode = BRW_CLIPMODE_NORMAL;
diff --git a/i965/brw_clip.h b/i965/brw_clip.h
index 1c68255..d71bac7 100644
--- a/i965/brw_clip.h
+++ b/i965/brw_clip.h
@@ -118,7 +118,6 @@ struct brw_clip_compile {
GLuint header_position_offset;
GLuint offset[VERT_ATTRIB_MAX];
- GLboolean need_ff_sync;
};
#define ATTR_SIZE (4*4)
diff --git a/i965/brw_clip_line.c b/i965/brw_clip_line.c
index fa9648f..ceb62a3 100644
--- a/i965/brw_clip_line.c
+++ b/i965/brw_clip_line.c
@@ -39,13 +39,13 @@
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_eu.h"
-#include "brw_util.h"
#include "brw_clip.h"
static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
{
+ struct intel_context *intel = &c->func.brw->intel;
GLuint i = 0,j;
/* Register usage is static, precompute here:
@@ -85,7 +85,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
i++;
}
- if (c->need_ff_sync) {
+ if (intel->needs_ff_sync) {
c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
i++;
}
@@ -126,6 +126,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
static void clip_and_emit_line( struct brw_clip_compile *c )
{
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
struct brw_indirect vtx0 = brw_indirect(0, 0);
struct brw_indirect vtx1 = brw_indirect(1, 0);
struct brw_indirect newvtx0 = brw_indirect(2, 0);
@@ -152,7 +153,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
brw_clip_init_clipmask(c);
/* -ve rhw workaround */
- if (BRW_IS_965(p->brw)) {
+ if (brw->has_negative_rhw_bug) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
brw_imm_ud(1<<20));
@@ -189,7 +190,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
* Both can be negative on GM965/G965 due to RHW workaround
* if so, this object should be rejected.
*/
- if (BRW_IS_965(p->brw)) {
+ if (brw->has_negative_rhw_bug) {
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0));
is_neg2 = brw_IF(p, BRW_EXECUTE_1);
{
@@ -214,7 +215,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
/* If both are positive, do nothing */
/* Only on GM965/G965 */
- if (BRW_IS_965(p->brw)) {
+ if (brw->has_negative_rhw_bug) {
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
is_neg2 = brw_IF(p, BRW_EXECUTE_1);
}
@@ -229,7 +230,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
- if (BRW_IS_965(p->brw)) {
+ if (brw->has_negative_rhw_bug) {
brw_ENDIF(p, is_neg2);
}
}
diff --git a/i965/brw_clip_point.c b/i965/brw_clip_point.c
index 8458f61..7f47634 100644
--- a/i965/brw_clip_point.c
+++ b/i965/brw_clip_point.c
@@ -39,7 +39,6 @@
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_eu.h"
-#include "brw_util.h"
#include "brw_clip.h"
diff --git a/i965/brw_clip_state.c b/i965/brw_clip_state.c
index 234b374..424c9a1 100644
--- a/i965/brw_clip_state.c
+++ b/i965/brw_clip_state.c
@@ -32,7 +32,6 @@
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
-#include "main/macros.h"
struct brw_clip_unit_key {
unsigned int total_grf;
@@ -74,6 +73,7 @@ static dri_bo *
clip_unit_create_from_key(struct brw_context *brw,
struct brw_clip_unit_key *key)
{
+ struct intel_context *intel = &brw->intel;
struct brw_clip_unit_state clip;
dri_bo *bo;
@@ -105,7 +105,7 @@ clip_unit_create_from_key(struct brw_context *brw,
/* Although up to 16 concurrent Clip threads are allowed on IGDNG,
* only 2 threads can output VUEs at a time.
*/
- if (BRW_IS_IGDNG(brw))
+ if (intel->is_ironlake)
clip.thread4.max_threads = 16 - 1;
else
clip.thread4.max_threads = 2 - 1;
@@ -130,7 +130,7 @@ clip_unit_create_from_key(struct brw_context *brw,
clip.clip5.api_mode = BRW_CLIP_API_OGL;
clip.clip5.clip_mode = key->clip_mode;
- if (BRW_IS_G4X(brw))
+ if (intel->is_g4x)
clip.clip5.negative_w_clip_test = 1;
clip.clip6.clipper_viewport_state_ptr = 0;
@@ -142,8 +142,7 @@ clip_unit_create_from_key(struct brw_context *brw,
bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
key, sizeof(*key),
&brw->clip.prog_bo, 1,
- &clip, sizeof(clip),
- NULL, NULL);
+ &clip, sizeof(clip));
/* Emit clip program relocation */
assert(brw->clip.prog_bo);
diff --git a/i965/brw_clip_tri.c b/i965/brw_clip_tri.c
index cf79224..815211a 100644
--- a/i965/brw_clip_tri.c
+++ b/i965/brw_clip_tri.c
@@ -39,7 +39,6 @@
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_eu.h"
-#include "brw_util.h"
#include "brw_clip.h"
static void release_tmps( struct brw_clip_compile *c )
@@ -51,6 +50,7 @@ static void release_tmps( struct brw_clip_compile *c )
void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
GLuint nr_verts )
{
+ struct intel_context *intel = &c->func.brw->intel;
GLuint i = 0,j;
/* Register usage is static, precompute here:
@@ -78,7 +78,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
for (j = 0; j < 3; j++) {
GLuint delta = c->nr_attrs*16 + 32;
- if (BRW_IS_IGDNG(c->func.brw))
+ if (intel->is_ironlake)
delta = c->nr_attrs * 16 + 32 * 3;
brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
@@ -119,7 +119,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
i++;
}
- if (c->need_ff_sync) {
+ if (intel->needs_ff_sync) {
c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
i++;
}
@@ -571,6 +571,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
{
struct brw_instruction *neg_rhw;
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
brw_clip_tri_init_vertices(c);
brw_clip_init_clipmask(c);
@@ -578,7 +579,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
/* if -ve rhw workaround bit is set,
do cliptest */
- if (BRW_IS_965(p->brw)) {
+ if (brw->has_negative_rhw_bug) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
brw_imm_ud(1<<20));
diff --git a/i965/brw_clip_unfilled.c b/i965/brw_clip_unfilled.c
index ad1bfa4..f36d22f 100644
--- a/i965/brw_clip_unfilled.c
+++ b/i965/brw_clip_unfilled.c
@@ -39,7 +39,6 @@
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_eu.h"
-#include "brw_util.h"
#include "brw_clip.h"
diff --git a/i965/brw_clip_util.c b/i965/brw_clip_util.c
index 5a73abd..14bc889 100644
--- a/i965/brw_clip_util.c
+++ b/i965/brw_clip_util.c
@@ -40,7 +40,6 @@
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_eu.h"
-#include "brw_util.h"
#include "brw_clip.h"
@@ -135,6 +134,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
GLboolean force_edgeflag)
{
struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
struct brw_reg tmp = get_tmp(c);
GLuint i;
@@ -142,7 +142,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
*/
/*
* After CLIP stage, only first 256 bits of the VUE are read
- * back on IGDNG, so needn't change it
+ * back on Ironlake, so needn't change it
*/
brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
@@ -151,7 +151,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
for (i = 0; i < c->nr_attrs; i++) {
GLuint delta = i*16 + 32;
- if (BRW_IS_IGDNG(p->brw))
+ if (intel->is_ironlake)
delta = i * 16 + 32 * 3;
if (delta == c->offset[VERT_RESULT_EDGE]) {
@@ -185,7 +185,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
if (i & 1) {
GLuint delta = i*16 + 32;
- if (BRW_IS_IGDNG(p->brw))
+ if (intel->is_ironlake)
delta = i * 16 + 32 * 3;
brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
@@ -359,7 +359,9 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c )
void brw_clip_ff_sync(struct brw_clip_compile *c)
{
- if (c->need_ff_sync) {
+ struct intel_context *intel = &c->func.brw->intel;
+
+ if (intel->needs_ff_sync) {
struct brw_compile *p = &c->func;
struct brw_instruction *need_ff_sync;
@@ -388,7 +390,9 @@ void brw_clip_ff_sync(struct brw_clip_compile *c)
void brw_clip_init_ff_sync(struct brw_clip_compile *c)
{
- if (c->need_ff_sync) {
+ struct intel_context *intel = &c->func.brw->intel;
+
+ if (intel->needs_ff_sync) {
struct brw_compile *p = &c->func;
brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));
diff --git a/i965/brw_context.c b/i965/brw_context.c
index aaa2d80..a512896 100644
--- a/i965/brw_context.c
+++ b/i965/brw_context.c
@@ -33,7 +33,6 @@
#include "main/imports.h"
#include "main/api_noop.h"
#include "main/macros.h"
-/* #include "main/vtxfmt.h" */
#include "main/simple_list.h"
#include "shader/shader_api.h"
@@ -41,16 +40,9 @@
#include "brw_defines.h"
#include "brw_draw.h"
#include "brw_state.h"
-#include "brw_vs.h"
-#include "intel_tex.h"
-#include "intel_blit.h"
-#include "intel_batchbuffer.h"
-#include "intel_pixel.h"
#include "intel_span.h"
#include "tnl/t_pipeline.h"
-#include "utils.h"
-
/***************************************
* Mesa's Driver Functions
@@ -77,7 +69,7 @@ static void brwInitDriverFunctions( struct dd_function_table *functions )
}
GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
- __DRIcontextPrivate *driContextPriv,
+ __DRIcontext *driContextPriv,
void *sharedContextPrivate)
{
struct dd_function_table functions;
@@ -86,7 +78,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
GLcontext *ctx = &intel->ctx;
if (!brw) {
- _mesa_printf("%s: failed to alloc context\n", __FUNCTION__);
+ printf("%s: failed to alloc context\n", __FUNCTION__);
return GL_FALSE;
}
@@ -95,7 +87,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
if (!intelInitContext( intel, mesaVis, driContextPriv,
sharedContextPrivate, &functions )) {
- _mesa_printf("%s: failed to init intel context\n", __FUNCTION__);
+ printf("%s: failed to init intel context\n", __FUNCTION__);
FREE(brw);
return GL_FALSE;
}
@@ -111,6 +103,9 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits,
ctx->Const.MaxTextureImageUnits);
ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */
+ ctx->Const.MaxCombinedTextureImageUnits =
+ ctx->Const.MaxVertexTextureImageUnits +
+ ctx->Const.MaxTextureImageUnits;
/* Mesa limits textures to 4kx4k; it would be nice to fix that someday
*/
@@ -155,6 +150,38 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
ctx->Const.FragmentProgram.MaxEnvParams);
+ if (intel->is_ironlake || intel->is_g4x || intel->gen >= 6) {
+ brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_GM45;
+ brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
+ brw->has_surface_tile_offset = GL_TRUE;
+ brw->has_compr4 = GL_TRUE;
+ brw->has_aa_line_parameters = GL_TRUE;
+ } else {
+ brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_965;
+ brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
+ }
+
+ /* WM maximum threads is number of EUs times number of threads per EU. */
+ if (intel->is_ironlake) {
+ brw->urb.size = 1024;
+ brw->vs_max_threads = 72;
+ brw->wm_max_threads = 12 * 6;
+ } else if (intel->is_g4x) {
+ brw->urb.size = 384;
+ brw->vs_max_threads = 32;
+ brw->wm_max_threads = 10 * 5;
+ } else if (intel->gen < 6) {
+ brw->urb.size = 256;
+ brw->vs_max_threads = 16;
+ brw->wm_max_threads = 8 * 4;
+ brw->has_negative_rhw_bug = GL_TRUE;
+ }
+
+ if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) {
+ brw->vs_max_threads = 1;
+ brw->wm_max_threads = 1;
+ }
+
brw_init_state( brw );
brw->state.dirty.mesa = ~0;
diff --git a/i965/brw_context.h b/i965/brw_context.h
index fded47a..d6fc37e 100644
--- a/i965/brw_context.h
+++ b/i965/brw_context.h
@@ -131,7 +131,6 @@ struct brw_context;
#define BRW_NEW_WM_INPUT_DIMENSIONS 0x100
#define BRW_NEW_PSP 0x800
#define BRW_NEW_WM_SURFACES 0x1000
-#define BRW_NEW_FENCE 0x2000
#define BRW_NEW_INDICES 0x4000
#define BRW_NEW_VERTICES 0x8000
/**
@@ -172,8 +171,8 @@ struct brw_fragment_program {
GLuint id; /**< serial no. to identify frag progs, never re-used */
GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */
- dri_bo *const_buffer; /** Program constant buffer/surface */
GLboolean use_const_buffer;
+ dri_bo *const_buffer; /** Program constant buffer/surface */
/** for debugging, which texture units are referenced */
GLbitfield tex_units_used;
@@ -283,6 +282,9 @@ struct brw_vs_ouput_sizes {
enum brw_cache_id {
+ BRW_BLEND_STATE,
+ BRW_DEPTH_STENCIL_STATE,
+ BRW_COLOR_CALC_STATE,
BRW_CC_VP,
BRW_CC_UNIT,
BRW_WM_PROG,
@@ -291,7 +293,7 @@ enum brw_cache_id {
BRW_WM_UNIT,
BRW_SF_PROG,
BRW_SF_VP,
- BRW_SF_UNIT,
+ BRW_SF_UNIT, /* scissor state on gen6 */
BRW_VS_UNIT,
BRW_VS_PROG,
BRW_GS_UNIT,
@@ -332,7 +334,6 @@ struct brw_cache {
struct brw_cache_item **items;
GLuint size, n_items;
- GLuint aux_size[BRW_MAX_CACHE];
char *name[BRW_MAX_CACHE];
/* Record of the last BOs chosen for each cache_id. Used to set
@@ -356,6 +357,9 @@ struct brw_tracked_state {
/* Flags for brw->state.cache.
*/
+#define CACHE_NEW_BLEND_STATE (1<<BRW_BLEND_STATE)
+#define CACHE_NEW_DEPTH_STENCIL_STATE (1<<BRW_DEPTH_STENCIL_STATE)
+#define CACHE_NEW_COLOR_CALC_STATE (1<<BRW_COLOR_CALC_STATE)
#define CACHE_NEW_CC_VP (1<<BRW_CC_VP)
#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT)
#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG)
@@ -438,8 +442,11 @@ struct brw_context
GLuint primitive;
GLboolean emit_state_always;
- GLboolean no_batch_wrap;
-
+ GLboolean has_surface_tile_offset;
+ GLboolean has_compr4;
+ GLboolean has_negative_rhw_bug;
+ GLboolean has_aa_line_parameters;
+;
struct {
struct brw_state_flags dirty;
@@ -515,6 +522,12 @@ struct brw_context
*/
GLuint next_free_page;
+ /* hw-dependent 3DSTATE_VF_STATISTICS opcode */
+ uint32_t CMD_VF_STATISTICS;
+ /* hw-dependent 3DSTATE_PIPELINE_SELECT opcode */
+ uint32_t CMD_PIPELINE_SELECT;
+ int vs_max_threads;
+ int wm_max_threads;
/* BRW_NEW_URB_ALLOCATIONS:
*/
@@ -531,7 +544,8 @@ struct brw_context
GLuint nr_sf_entries;
GLuint nr_cs_entries;
-/* GLuint vs_size; */
+ /* gen6 */
+ GLuint vs_size;
/* GLuint gs_size; */
/* GLuint clip_size; */
/* GLuint sf_size; */
@@ -542,6 +556,7 @@ struct brw_context
GLuint clip_start;
GLuint sf_start;
GLuint cs_start;
+ GLuint size; /* Hardware URB size, in KB. */
} urb;
@@ -564,15 +579,11 @@ struct brw_context
GLfloat *last_buf;
GLuint last_bufsz;
- /**
- * Whether we should create a new bo instead of reusing the old one
- * (if we just dispatch the batch pointing at the old one.
- */
- GLboolean need_new_bo;
} curbe;
struct {
struct brw_vs_prog_data *prog_data;
+ int8_t *constant_map; /* variable array following prog_data */
dri_bo *prog_bo;
dri_bo *state_bo;
@@ -639,9 +650,16 @@ struct brw_context
struct {
+ /* gen4 */
dri_bo *prog_bo;
- dri_bo *state_bo;
dri_bo *vp_bo;
+
+ /* gen6 */
+ dri_bo *blend_state_bo;
+ dri_bo *depth_stencil_state_bo;
+ dri_bo *color_calc_state_bo;
+
+ dri_bo *state_bo;
} cc;
struct {
@@ -669,7 +687,7 @@ void brwInitVtbl( struct brw_context *brw );
* brw_context.c
*/
GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
- __DRIcontextPrivate *driContextPriv,
+ __DRIcontext *driContextPriv,
void *sharedContextPrivate);
/*======================================================================
diff --git a/i965/brw_curbe.c b/i965/brw_curbe.c
index aadcfbe..4e78b08 100644
--- a/i965/brw_curbe.c
+++ b/i965/brw_curbe.c
@@ -114,13 +114,13 @@ static void calculate_curbe_offsets( struct brw_context *brw )
brw->curbe.total_size = reg;
if (0)
- _mesa_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n",
- brw->curbe.wm_start,
- brw->curbe.wm_size,
- brw->curbe.clip_start,
- brw->curbe.clip_size,
- brw->curbe.vs_start,
- brw->curbe.vs_size );
+ printf("curbe wm %d+%d clip %d+%d vs %d+%d\n",
+ brw->curbe.wm_start,
+ brw->curbe.wm_size,
+ brw->curbe.clip_start,
+ brw->curbe.clip_size,
+ brw->curbe.vs_start,
+ brw->curbe.vs_size );
brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
}
@@ -198,7 +198,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
return;
}
- buf = (GLfloat *) _mesa_calloc(bufsz);
+ buf = (GLfloat *) calloc(1, bufsz);
/* fragment shader constants */
if (brw->curbe.wm_size) {
@@ -256,25 +256,36 @@ static void prepare_constant_buffer(struct brw_context *brw)
*/
_mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
- /* XXX just use a memcpy here */
- for (i = 0; i < nr; i++) {
- const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i];
- buf[offset + i * 4 + 0] = value[0];
- buf[offset + i * 4 + 1] = value[1];
- buf[offset + i * 4 + 2] = value[2];
- buf[offset + i * 4 + 3] = value[3];
+ if (vp->use_const_buffer) {
+ /* Load the subset of push constants that will get used when
+ * we also have a pull constant buffer.
+ */
+ for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+ if (brw->vs.constant_map[i] != -1) {
+ assert(brw->vs.constant_map[i] <= nr);
+ memcpy(buf + offset + brw->vs.constant_map[i] * 4,
+ vp->program.Base.Parameters->ParameterValues[i],
+ 4 * sizeof(float));
+ }
+ }
+ } else {
+ for (i = 0; i < nr; i++) {
+ memcpy(buf + offset + i * 4,
+ vp->program.Base.Parameters->ParameterValues[i],
+ 4 * sizeof(float));
+ }
}
}
if (0) {
for (i = 0; i < sz*16; i+=4)
- _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
- buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
+ printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
+ buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
- _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
- brw->curbe.last_buf, buf,
- bufsz, brw->curbe.last_bufsz,
- brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
+ printf("last_buf %p buf %p sz %d/%d cmp %d\n",
+ brw->curbe.last_buf, buf,
+ bufsz, brw->curbe.last_bufsz,
+ brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
}
if (brw->curbe.curbe_bo != NULL &&
@@ -282,20 +293,20 @@ static void prepare_constant_buffer(struct brw_context *brw)
bufsz == brw->curbe.last_bufsz &&
memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
/* constants have not changed */
- _mesa_free(buf);
+ free(buf);
}
else {
/* constants have changed */
if (brw->curbe.last_buf)
- _mesa_free(brw->curbe.last_buf);
+ free(brw->curbe.last_buf);
brw->curbe.last_buf = buf;
brw->curbe.last_bufsz = bufsz;
if (brw->curbe.curbe_bo != NULL &&
- (brw->curbe.need_new_bo ||
- brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size))
+ brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size)
{
+ drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo);
dri_bo_unreference(brw->curbe.curbe_bo);
brw->curbe.curbe_bo = NULL;
}
@@ -307,6 +318,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE",
4096, 1 << 6);
brw->curbe.curbe_next_offset = 0;
+ drm_intel_gem_bo_map_gtt(brw->curbe.curbe_bo);
}
brw->curbe.curbe_offset = brw->curbe.curbe_next_offset;
@@ -315,7 +327,9 @@ static void prepare_constant_buffer(struct brw_context *brw)
/* Copy data to the buffer:
*/
- dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf);
+ memcpy(brw->curbe.curbe_bo->virtual + brw->curbe.curbe_offset,
+ buf,
+ bufsz);
}
brw_add_validated_bo(brw, brw->curbe.curbe_bo);
@@ -340,7 +354,7 @@ static void emit_constant_buffer(struct brw_context *brw)
struct intel_context *intel = &brw->intel;
GLuint sz = brw->curbe.total_size;
- BEGIN_BATCH(2, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(2);
if (sz == 0) {
OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
OUT_BATCH(0);
diff --git a/i965/brw_defines.h b/i965/brw_defines.h
index c19510b..bb1b5f5 100644
--- a/i965/brw_defines.h
+++ b/i965/brw_defines.h
@@ -530,6 +530,7 @@
#define BRW_OPCODE_POP 47
#define BRW_OPCODE_WAIT 48
#define BRW_OPCODE_SEND 49
+#define BRW_OPCODE_MATH 56
#define BRW_OPCODE_ADD 64
#define BRW_OPCODE_MUL 65
#define BRW_OPCODE_AVG 66
@@ -727,7 +728,8 @@
#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */
#define BRW_MATH_FUNCTION_COS 7 /* was 8 */
#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */
-#define BRW_MATH_FUNCTION_TAN 9
+#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */
+#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */
#define BRW_MATH_FUNCTION_POW 10
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12
@@ -778,17 +780,33 @@
#define CMD_PIPELINED_STATE_POINTERS 0x7800
#define CMD_BINDING_TABLE_PTRS 0x7801
+# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8)
+# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9)
+# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 10)
+
+#define CMD_3D_SAMPLER_STATE_POINTERS 0x7802 /* SNB+ */
+# define PS_SAMPLER_STATE_CHANGE (1 << 12)
+# define GS_SAMPLER_STATE_CHANGE (1 << 9)
+# define VS_SAMPLER_STATE_CHANGE (1 << 8)
+/* DW1: VS */
+/* DW2: GS */
+/* DW3: PS */
#define CMD_VERTEX_BUFFER 0x7808
# define BRW_VB0_INDEX_SHIFT 27
+# define GEN6_VB0_INDEX_SHIFT 26
# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26)
# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26)
+# define GEN6_VB0_ACCESS_VERTEXDATA (0 << 20)
+# define GEN6_VB0_ACCESS_INSTANCEDATA (1 << 20)
# define BRW_VB0_PITCH_SHIFT 0
#define CMD_VERTEX_ELEMENT 0x7809
# define BRW_VE0_INDEX_SHIFT 27
+# define GEN6_VE0_INDEX_SHIFT 26
# define BRW_VE0_FORMAT_SHIFT 16
# define BRW_VE0_VALID (1 << 26)
+# define GEN6_VE0_VALID (1 << 25)
# define BRW_VE0_SRC_OFFSET_SHIFT 0
# define BRW_VE1_COMPONENT_NOSTORE 0
# define BRW_VE1_COMPONENT_STORE_SRC 1
@@ -805,8 +823,219 @@
# define BRW_VE1_DST_OFFSET_SHIFT 0
#define CMD_INDEX_BUFFER 0x780a
-#define CMD_VF_STATISTICS_965 0x780b
+#define CMD_VF_STATISTICS_965 0x780b
#define CMD_VF_STATISTICS_GM45 0x680b
+#define CMD_3D_CC_STATE_POINTERS 0x780e /* GEN6+ */
+
+#define CMD_URB 0x7805 /* GEN6+ */
+# define GEN6_URB_VS_SIZE_SHIFT 16
+# define GEN6_URB_VS_ENTRIES_SHIFT 0
+# define GEN6_URB_GS_SIZE_SHIFT 8
+# define GEN6_URB_GS_ENTRIES_SHIFT 0
+
+#define CMD_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */
+# define GEN6_CC_VIEWPORT_MODIFY (1 << 12)
+# define GEN6_SF_VIEWPORT_MODIFY (1 << 11)
+# define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10)
+
+#define CMD_3D_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */
+
+#define CMD_3D_VS_STATE 0x7810 /* GEN6+ */
+/* DW2 */
+# define GEN6_VS_SPF_MODE (1 << 31)
+# define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30)
+# define GEN6_VS_SAMPLER_COUNT_SHIFT 27
+# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+/* DW4 */
+# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20
+# define GEN6_VS_URB_READ_LENGTH_SHIFT 11
+# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW5 */
+# define GEN6_VS_MAX_THREADS_SHIFT 25
+# define GEN6_VS_STATISTICS_ENABLE (1 << 10)
+# define GEN6_VS_CACHE_DISABLE (1 << 1)
+# define GEN6_VS_ENABLE (1 << 0)
+
+#define CMD_3D_GS_STATE 0x7811 /* GEN6+ */
+/* DW2 */
+# define GEN6_GS_SPF_MODE (1 << 31)
+# define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30)
+# define GEN6_GS_SAMPLER_COUNT_SHIFT 27
+# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+/* DW4 */
+# define GEN6_GS_URB_READ_LENGTH_SHIFT 11
+# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT 4
+# define GEN6_GS_DISPATCH_START_GRF_SHIFT 0
+/* DW5 */
+# define GEN6_GS_MAX_THREADS_SHIFT 25
+# define GEN6_GS_STATISTICS_ENABLE (1 << 10)
+# define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9)
+# define GEN6_GS_RENDERING_ENABLE (1 << 8)
+/* DW6 */
+# define GEN6_GS_ENABLE (1 << 15)
+
+#define CMD_3D_CLIP_STATE 0x7812 /* GEN6+ */
+/* DW1 */
+# define GEN6_CLIP_STATISTICS_ENABLE (1 << 10)
+/* DW2 */
+# define GEN6_CLIP_ENABLE (1 << 31)
+# define GEN6_CLIP_API_OGL (0 << 30)
+# define GEN6_CLIP_API_D3D (1 << 30)
+# define GEN6_CLIP_XY_TEST (1 << 28)
+# define GEN6_CLIP_Z_TEST (1 << 27)
+# define GEN6_CLIP_GB_TEST (1 << 26)
+# define GEN6_CLIP_MODE_NORMAL (0 << 13)
+# define GEN6_CLIP_MODE_REJECT_ALL (3 << 13)
+# define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13)
+# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE (1 << 9)
+# define GEN6_CLIP_BARYCENTRIC_ENABLE (1 << 8)
+# define GEN6_CLIP_TRI_PROVOKE_SHIFT 4
+# define GEN6_CLIP_LINE_PROVOKE_SHIFT 2
+# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT 0
+/* DW3 */
+# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT 17
+# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6
+
+#define CMD_3D_SF_STATE 0x7813 /* GEN6+ */
+/* DW1 */
+# define GEN6_SF_NUM_OUTPUTS_SHIFT 22
+# define GEN6_SF_SWIZZLE_ENABLE (1 << 21)
+# define GEN6_SF_POINT_SPRITE_LOWERLEFT (1 << 20)
+# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
+# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW2 */
+# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS (1 << 11)
+# define GEN6_SF_STATISTICS_ENABLE (1 << 10)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID (1 << 9)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME (1 << 8)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT (1 << 7)
+# define GEN6_SF_FRONT_SOLID (0 << 5)
+# define GEN6_SF_FRONT_WIREFRAME (1 << 5)
+# define GEN6_SF_FRONT_POINT (2 << 5)
+# define GEN6_SF_BACK_SOLID (0 << 3)
+# define GEN6_SF_BACK_WIREFRAME (1 << 3)
+# define GEN6_SF_BACK_POINT (2 << 3)
+# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE (1 << 1)
+# define GEN6_SF_WINDING_CCW (1 << 0)
+/* DW3 */
+# define GEN6_SF_LINE_AA_ENABLE (1 << 31)
+# define GEN6_SF_CULL_BOTH (0 << 29)
+# define GEN6_SF_CULL_NONE (1 << 29)
+# define GEN6_SF_CULL_FRONT (2 << 29)
+# define GEN6_SF_CULL_BACK (3 << 29)
+# define GEN6_SF_LINE_WIDTH_SHIFT 18 /* U3.7 */
+# define GEN6_SF_LINE_END_CAP_WIDTH_0_5 (0 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_1_0 (1 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_2_0 (2 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_4_0 (3 << 16)
+# define GEN6_SF_SCISSOR_ENABLE (1 << 11)
+# define GEN6_SF_MSRAST_OFF_PIXEL (0 << 8)
+# define GEN6_SF_MSRAST_OFF_PATTERN (1 << 8)
+# define GEN6_SF_MSRAST_ON_PIXEL (2 << 8)
+# define GEN6_SF_MSRAST_ON_PATTERN (3 << 8)
+/* DW4 */
+# define GEN6_SF_TRI_PROVOKE_SHIFT 29
+# define GEN6_SF_LINE_PROVOKE_SHIFT 27
+# define GEN6_SF_TRIFAN_PROVOKE_SHIFT 25
+# define GEN6_SF_LINE_AA_MODE_MANHATTAN (0 << 14)
+# define GEN6_SF_LINE_AA_MODE_TRUE (1 << 14)
+# define GEN6_SF_VERTEX_SUBPIXEL_8BITS (0 << 12)
+# define GEN6_SF_VERTEX_SUBPIXEL_4BITS (1 << 12)
+# define GEN6_SF_USE_STATE_POINT_WIDTH (1 << 11)
+# define GEN6_SF_POINT_WIDTH_SHIFT 0 /* U8.3 */
+/* DW5: depth offset constant */
+/* DW6: depth offset scale */
+/* DW7: depth offset clamp */
+/* DW8 */
+# define ATTRIBUTE_1_OVERRIDE_W (1 << 31)
+# define ATTRIBUTE_1_OVERRIDE_Z (1 << 30)
+# define ATTRIBUTE_1_OVERRIDE_Y (1 << 29)
+# define ATTRIBUTE_1_OVERRIDE_X (1 << 28)
+# define ATTRIBUTE_1_CONST_SOURCE_SHIFT 25
+# define ATTRIBUTE_1_SWIZZLE_SHIFT 22
+# define ATTRIBUTE_1_SOURCE_SHIFT 16
+# define ATTRIBUTE_0_OVERRIDE_W (1 << 15)
+# define ATTRIBUTE_0_OVERRIDE_Z (1 << 14)
+# define ATTRIBUTE_0_OVERRIDE_Y (1 << 13)
+# define ATTRIBUTE_0_OVERRIDE_X (1 << 12)
+# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9
+# define ATTRIBUTE_0_SWIZZLE_SHIFT 6
+# define ATTRIBUTE_0_SOURCE_SHIFT 0
+/* DW16: Point sprite texture coordinate enables */
+/* DW17: Constant interpolation enables */
+/* DW18: attr 0-7 wrap shortest enables */
+/* DW19: attr 8-16 wrap shortest enables */
+
+#define CMD_3D_WM_STATE 0x7814 /* GEN6+ */
+/* DW1: kernel pointer */
+/* DW2 */
+# define GEN6_WM_SPF_MODE (1 << 31)
+# define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30)
+# define GEN6_WM_SAMPLER_COUNT_SHIFT 27
+# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+/* DW3: scratch space */
+/* DW4 */
+# define GEN6_WM_STATISTICS_ENABLE (1 << 31)
+# define GEN6_WM_DEPTH_CLEAR (1 << 30)
+# define GEN6_WM_DEPTH_RESOLVE (1 << 28)
+# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27)
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0 16
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1 8
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2 0
+/* DW5 */
+# define GEN6_WM_MAX_THREADS_SHIFT 25
+# define GEN6_WM_KILL_ENABLE (1 << 22)
+# define GEN6_WM_COMPUTED_DEPTH (1 << 21)
+# define GEN6_WM_USES_SOURCE_DEPTH (1 << 20)
+# define GEN6_WM_DISPATCH_ENABLE (1 << 19)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 16)
+# define GEN6_WM_LINE_AA_WIDTH_0_5 (0 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_1_0 (1 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_2_0 (2 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_4_0 (3 << 14)
+# define GEN6_WM_POLYGON_STIPPLE_ENABLE (1 << 13)
+# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 12)
+# define GEN6_WM_OMASK_TO_RENDER_TARGET (1 << 9)
+# define GEN6_WM_USES_SOURCE_W (1 << 8)
+# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
+# define GEN6_WM_32_DISPATCH_ENABLE (1 << 2)
+# define GEN6_WM_16_DISPATCH_ENABLE (1 << 1)
+# define GEN6_WM_8_DISPATCH_ENABLE (1 << 0)
+/* DW6 */
+# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT 20
+# define GEN6_WM_POSOFFSET_NONE (0 << 18)
+# define GEN6_WM_POSOFFSET_CENTROID (2 << 18)
+# define GEN6_WM_POSOFFSET_SAMPLE (3 << 18)
+# define GEN6_WM_POSITION_ZW_PIXEL (0 << 16)
+# define GEN6_WM_POSITION_ZW_CENTROID (2 << 16)
+# define GEN6_WM_POSITION_ZW_SAMPLE (3 << 16)
+# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
+# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
+# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
+# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
+# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
+# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
+# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 9)
+# define GEN6_WM_MSRAST_OFF_PIXEL (0 << 1)
+# define GEN6_WM_MSRAST_OFF_PATTERN (1 << 1)
+# define GEN6_WM_MSRAST_ON_PIXEL (2 << 1)
+# define GEN6_WM_MSRAST_ON_PATTERN (3 << 1)
+# define GEN6_WM_MSDISPMODE_PERPIXEL (1 << 0)
+/* DW7: kernel 1 pointer */
+/* DW8: kernel 2 pointer */
+
+#define CMD_3D_CONSTANT_VS_STATE 0x7815 /* GEN6+ */
+#define CMD_3D_CONSTANT_GS_STATE 0x7816 /* GEN6+ */
+#define CMD_3D_CONSTANT_PS_STATE 0x7817 /* GEN6+ */
+# define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15)
+# define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14)
+# define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13)
+# define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12)
+
+#define CMD_3D_SAMPLE_MASK 0x7818 /* GEN6+ */
#define CMD_DRAW_RECT 0x7900
#define CMD_BLEND_CONSTANT_COLOR 0x7901
@@ -818,6 +1047,25 @@
#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
#define CMD_AA_LINE_PARAMETERS 0x790a
+#define CMD_GS_SVB_INDEX 0x790b /* CTG+ */
+/* DW1 */
+# define SVB_INDEX_SHIFT 29
+# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */
+/* DW2: SVB index */
+/* DW3: SVB maximum index */
+
+#define CMD_3D_MULTISAMPLE 0x790d /* SNB+ */
+/* DW1 */
+# define MS_PIXEL_LOCATION_CENTER (0 << 4)
+# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
+# define MS_NUMSAMPLES_1 (0 << 1)
+# define MS_NUMSAMPLES_4 (2 << 1)
+# define MS_NUMSAMPLES_8 (3 << 1)
+
+#define CMD_3D_CLEAR_PARAMS 0x7910 /* ILK+ */
+# define DEPTH_CLEAR_VALID (1 << 15)
+/* DW1: depth clear value */
+
#define CMD_PIPE_CONTROL 0x7a00
#define CMD_3D_PRIM 0x7b00
@@ -832,12 +1080,4 @@
#include "intel_chipset.h"
-#define BRW_IS_G4X(brw) (IS_G4X((brw)->intel.intelScreen->deviceID))
-#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->intel.intelScreen->deviceID))
-#define BRW_IS_965(brw) (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)))
-#define CMD_PIPELINE_SELECT(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965)
-#define CMD_VF_STATISTICS(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965)
-#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \
- (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */
-
#endif
diff --git a/i965/brw_disasm.c b/i965/brw_disasm.c
index 9fef230..a8f6b99 100644
--- a/i965/brw_disasm.c
+++ b/i965/brw_disasm.c
@@ -239,7 +239,7 @@ char *imm_encoding[8] = {
[2] = "UW",
[3] = "W",
[5] = "VF",
- [5] = "V",
+ [6] = "V",
[7] = "F"
};
@@ -365,6 +365,7 @@ static int format (FILE *f, char *format, ...)
va_start (args, format);
vsnprintf (buf, sizeof (buf) - 1, format, args);
+ va_end (args);
string (f, buf);
return 0;
}
diff --git a/i965/brw_draw.c b/i965/brw_draw.c
index 8bcb608..e348d46 100644
--- a/i965/brw_draw.c
+++ b/i965/brw_draw.c
@@ -39,10 +39,8 @@
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_state.h"
-#include "brw_fallback.h"
#include "intel_batchbuffer.h"
-#include "intel_buffer_objects.h"
#define FILE_DEBUG_FLAG DEBUG_BATCH
@@ -84,7 +82,7 @@ static GLuint brw_set_prim(struct brw_context *brw, GLenum prim)
GLcontext *ctx = &brw->intel.ctx;
if (INTEL_DEBUG & DEBUG_PRIMS)
- _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim));
+ printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim));
/* Slight optimization to avoid the GS program when not needed:
*/
@@ -127,7 +125,7 @@ static void brw_emit_prim(struct brw_context *brw,
struct intel_context *intel = &brw->intel;
if (INTEL_DEBUG & DEBUG_PRIMS)
- _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
+ printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
prim->start, prim->count);
prim_packet.header.opcode = CMD_3D_PRIM;
@@ -145,7 +143,7 @@ static void brw_emit_prim(struct brw_context *brw,
prim_packet.base_vert_location = prim->basevertex;
/* Can't wrap here, since we rely on the validated state. */
- brw->no_batch_wrap = GL_TRUE;
+ intel->no_batch_wrap = GL_TRUE;
/* If we're set to always flush, do it before and after the primitive emit.
* We want to catch both missed flushes that hurt instruction/state cache
@@ -157,13 +155,13 @@ static void brw_emit_prim(struct brw_context *brw,
}
if (prim_packet.verts_per_instance) {
intel_batchbuffer_data( brw->intel.batch, &prim_packet,
- sizeof(prim_packet), LOOP_CLIPRECTS);
+ sizeof(prim_packet));
}
if (intel->always_flush_cache) {
intel_batchbuffer_emit_mi_flush(intel->batch);
}
- brw->no_batch_wrap = GL_FALSE;
+ intel->no_batch_wrap = GL_FALSE;
}
static void brw_merge_inputs( struct brw_context *brw,
@@ -339,12 +337,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
* so can't access it earlier.
*/
- LOCK_HARDWARE(intel);
-
- if (!intel->constant_cliprect && intel->driDrawable->numClipRects == 0) {
- UNLOCK_HARDWARE(intel);
- return GL_TRUE;
- }
+ intel_prepare_render(intel);
for (i = 0; i < nr_prims; i++) {
uint32_t hw_prim;
@@ -356,8 +349,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
* an upper bound of how much we might emit in a single
* brw_try_draw_prims().
*/
- intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4,
- LOOP_CLIPRECTS);
+ intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4);
hw_prim = brw_set_prim(brw, prim[i].mode);
@@ -404,7 +396,6 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
if (intel->always_flush_batch)
intel_batchbuffer_flush(intel->batch);
out:
- UNLOCK_HARDWARE(intel);
brw_state_cache_check_size(brw);
diff --git a/i965/brw_draw_upload.c b/i965/brw_draw_upload.c
index ee684f6..71a4357 100644
--- a/i965/brw_draw_upload.c
+++ b/i965/brw_draw_upload.c
@@ -29,19 +29,15 @@
#include "main/glheader.h"
#include "main/bufferobj.h"
#include "main/context.h"
-#include "main/state.h"
-/* #include "main/api_validate.h" */
#include "main/enums.h"
#include "brw_draw.h"
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_state.h"
-#include "brw_fallback.h"
#include "intel_batchbuffer.h"
#include "intel_buffer_objects.h"
-#include "intel_tex.h"
static GLuint double_types[5] = {
0,
@@ -59,6 +55,14 @@ static GLuint float_types[5] = {
BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
};
+static GLuint half_float_types[5] = {
+ 0,
+ BRW_SURFACEFORMAT_R16_FLOAT,
+ BRW_SURFACEFORMAT_R16G16_FLOAT,
+ 0, /* can't seem to render this one */
+ BRW_SURFACEFORMAT_R16G16B16A16_FLOAT
+};
+
static GLuint uint_types_norm[5] = {
0,
BRW_SURFACEFORMAT_R32_UNORM,
@@ -165,13 +169,14 @@ static GLuint get_surface_type( GLenum type, GLuint size,
GLenum format, GLboolean normalized )
{
if (INTEL_DEBUG & DEBUG_VERTS)
- _mesa_printf("type %s size %d normalized %d\n",
+ printf("type %s size %d normalized %d\n",
_mesa_lookup_enum_by_nr(type), size, normalized);
if (normalized) {
switch (type) {
case GL_DOUBLE: return double_types[size];
case GL_FLOAT: return float_types[size];
+ case GL_HALF_FLOAT: return half_float_types[size];
case GL_INT: return int_types_norm[size];
case GL_SHORT: return short_types_norm[size];
case GL_BYTE: return byte_types_norm[size];
@@ -194,6 +199,7 @@ static GLuint get_surface_type( GLenum type, GLuint size,
switch (type) {
case GL_DOUBLE: return double_types[size];
case GL_FLOAT: return float_types[size];
+ case GL_HALF_FLOAT: return half_float_types[size];
case GL_INT: return int_types_scale[size];
case GL_SHORT: return short_types_scale[size];
case GL_BYTE: return byte_types_scale[size];
@@ -211,6 +217,7 @@ static GLuint get_size( GLenum type )
switch (type) {
case GL_DOUBLE: return sizeof(GLdouble);
case GL_FLOAT: return sizeof(GLfloat);
+ case GL_HALF_FLOAT: return sizeof(GLhalfARB);
case GL_INT: return sizeof(GLint);
case GL_SHORT: return sizeof(GLshort);
case GL_BYTE: return sizeof(GLbyte);
@@ -243,14 +250,6 @@ static void wrap_buffers( struct brw_context *brw,
dri_bo_unreference(brw->vb.upload.bo);
brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO",
size, 1);
-
- /* Set the internal VBO\ to no-backing-store. We only use them as a
- * temporary within a brw_try_draw_prims while the lock is held.
- */
- /* DON'T DO THIS AS IF WE HAVE TO RE-ORG MEMORY WE NEED SOMEWHERE WITH
- FAKE TO PUSH THIS STUFF */
-// if (!brw->intel.ttm)
-// dri_bo_fake_disable_backing_store(brw->vb.upload.bo, NULL, NULL);
}
static void get_space( struct brw_context *brw,
@@ -277,7 +276,6 @@ copy_array_to_vbo_array( struct brw_context *brw,
struct brw_vertex_element *element,
GLuint dst_stride)
{
- struct intel_context *intel = &brw->intel;
GLuint size = element->count * dst_stride;
get_space(brw, size, &element->bo, &element->offset);
@@ -290,52 +288,26 @@ copy_array_to_vbo_array( struct brw_context *brw,
}
if (dst_stride == element->glarray->StrideB) {
- if (intel->intelScreen->kernel_exec_fencing) {
- drm_intel_gem_bo_map_gtt(element->bo);
- memcpy((char *)element->bo->virtual + element->offset,
- element->glarray->Ptr, size);
- drm_intel_gem_bo_unmap_gtt(element->bo);
- } else {
- dri_bo_subdata(element->bo,
- element->offset,
- size,
- element->glarray->Ptr);
- }
+ drm_intel_gem_bo_map_gtt(element->bo);
+ memcpy((char *)element->bo->virtual + element->offset,
+ element->glarray->Ptr, size);
+ drm_intel_gem_bo_unmap_gtt(element->bo);
} else {
char *dest;
const unsigned char *src = element->glarray->Ptr;
int i;
- if (intel->intelScreen->kernel_exec_fencing) {
- drm_intel_gem_bo_map_gtt(element->bo);
- dest = element->bo->virtual;
- dest += element->offset;
-
- for (i = 0; i < element->count; i++) {
- memcpy(dest, src, dst_stride);
- src += element->glarray->StrideB;
- dest += dst_stride;
- }
-
- drm_intel_gem_bo_unmap_gtt(element->bo);
- } else {
- void *data;
-
- data = _mesa_malloc(dst_stride * element->count);
- dest = data;
- for (i = 0; i < element->count; i++) {
- memcpy(dest, src, dst_stride);
- src += element->glarray->StrideB;
- dest += dst_stride;
- }
-
- dri_bo_subdata(element->bo,
- element->offset,
- size,
- data);
+ drm_intel_gem_bo_map_gtt(element->bo);
+ dest = element->bo->virtual;
+ dest += element->offset;
- _mesa_free(data);
+ for (i = 0; i < element->count; i++) {
+ memcpy(dest, src, dst_stride);
+ src += element->glarray->StrideB;
+ dest += dst_stride;
}
+
+ drm_intel_gem_bo_unmap_gtt(element->bo);
}
}
@@ -356,7 +328,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
/* First build an array of pointers to ve's in vb.inputs_read
*/
if (0)
- _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
+ printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
/* Accumulate the list of enabled arrays. */
brw->vb.nr_enabled = 0;
@@ -502,12 +474,19 @@ static void brw_emit_vertices(struct brw_context *brw)
* a VE loads from them.
*/
if (brw->vb.nr_enabled == 0) {
- BEGIN_BATCH(3, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(3);
OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
- OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
- BRW_VE0_VALID |
- (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
- (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+ if (IS_GEN6(intel->intelScreen->deviceID)) {
+ OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) |
+ GEN6_VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
+ (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+ } else {
+ OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
+ BRW_VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
+ (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+ }
OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
@@ -522,20 +501,28 @@ static void brw_emit_vertices(struct brw_context *brw)
* are interleaved or from the same VBO. TBD if this makes a
* performance difference.
*/
- BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(1 + brw->vb.nr_enabled * 4);
OUT_BATCH((CMD_VERTEX_BUFFER << 16) |
((1 + brw->vb.nr_enabled * 4) - 2));
for (i = 0; i < brw->vb.nr_enabled; i++) {
struct brw_vertex_element *input = brw->vb.enabled[i];
+ uint32_t dw0;
+
+ if (intel->gen >= 6) {
+ dw0 = GEN6_VB0_ACCESS_VERTEXDATA |
+ (i << GEN6_VB0_INDEX_SHIFT);
+ } else {
+ dw0 = BRW_VB0_ACCESS_VERTEXDATA |
+ (i << BRW_VB0_INDEX_SHIFT);
+ }
- OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) |
- BRW_VB0_ACCESS_VERTEXDATA |
+ OUT_BATCH(dw0 |
(input->stride << BRW_VB0_PITCH_SHIFT));
OUT_RELOC(input->bo,
I915_GEM_DOMAIN_VERTEX, 0,
input->offset);
- if (BRW_IS_IGDNG(brw)) {
+ if (intel->is_ironlake || intel->gen >= 6) {
OUT_RELOC(input->bo,
I915_GEM_DOMAIN_VERTEX, 0,
input->bo->size - 1);
@@ -545,7 +532,7 @@ static void brw_emit_vertices(struct brw_context *brw)
}
ADVANCE_BATCH();
- BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(1 + brw->vb.nr_enabled * 2);
OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2));
for (i = 0; i < brw->vb.nr_enabled; i++) {
struct brw_vertex_element *input = brw->vb.enabled[i];
@@ -566,12 +553,19 @@ static void brw_emit_vertices(struct brw_context *brw)
break;
}
- OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) |
- BRW_VE0_VALID |
- (format << BRW_VE0_FORMAT_SHIFT) |
- (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+ if (IS_GEN6(intel->intelScreen->deviceID)) {
+ OUT_BATCH((i << GEN6_VE0_INDEX_SHIFT) |
+ GEN6_VE0_VALID |
+ (format << BRW_VE0_FORMAT_SHIFT) |
+ (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+ } else {
+ OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) |
+ BRW_VE0_VALID |
+ (format << BRW_VE0_FORMAT_SHIFT) |
+ (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+ }
- if (BRW_IS_IGDNG(brw))
+ if (intel->is_ironlake || intel->gen >= 6)
OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
(comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
(comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
@@ -625,13 +619,9 @@ static void brw_prepare_indices(struct brw_context *brw)
/* Straight upload
*/
- if (intel->intelScreen->kernel_exec_fencing) {
- drm_intel_gem_bo_map_gtt(bo);
- memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size);
- drm_intel_gem_bo_unmap_gtt(bo);
- } else {
- dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
- }
+ drm_intel_gem_bo_map_gtt(bo);
+ memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size);
+ drm_intel_gem_bo_unmap_gtt(bo);
} else {
offset = (GLuint) (unsigned long) index_buffer->ptr;
brw->ib.start_vertex_offset = 0;
@@ -712,7 +702,7 @@ static void brw_emit_index_buffer(struct brw_context *brw)
ib.header.bits.index_format = get_index_type(index_buffer->type);
ib.header.bits.cut_index_enable = 0;
- BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(4);
OUT_BATCH( ib.header.dword );
OUT_RELOC(brw->ib.bo,
I915_GEM_DOMAIN_VERTEX, 0,
diff --git a/i965/brw_eu.c b/i965/brw_eu.c
index 1df5613..4e7c122 100644
--- a/i965/brw_eu.c
+++ b/i965/brw_eu.c
@@ -237,7 +237,7 @@ brw_resolve_cals(struct brw_compile *c)
struct brw_glsl_call *call, *next;
for (call = c->first_call; call; call = next) {
next = call->next;
- _mesa_free(call);
+ free(call);
}
c->first_call = NULL;
}
@@ -247,7 +247,7 @@ brw_resolve_cals(struct brw_compile *c)
struct brw_glsl_label *label, *next;
for (label = c->first_label; label; label = next) {
next = label->next;
- _mesa_free(label);
+ free(label);
}
c->first_label = NULL;
}
diff --git a/i965/brw_eu_debug.c b/i965/brw_eu_debug.c
index 29f3f6d..99453af 100644
--- a/i965/brw_eu_debug.c
+++ b/i965/brw_eu_debug.c
@@ -54,9 +54,9 @@ void brw_print_reg( struct brw_reg hwreg )
"f"
};
- _mesa_printf("%s%s",
- hwreg.abs ? "abs/" : "",
- hwreg.negate ? "-" : "");
+ printf("%s%s",
+ hwreg.abs ? "abs/" : "",
+ hwreg.negate ? "-" : "");
if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
hwreg.nr % 2 == 0 &&
@@ -66,7 +66,7 @@ void brw_print_reg( struct brw_reg hwreg )
hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
hwreg.type == BRW_REGISTER_TYPE_F) {
/* vector register */
- _mesa_printf("vec%d", hwreg.nr);
+ printf("vec%d", hwreg.nr);
}
else if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
hwreg.vstride == BRW_VERTICAL_STRIDE_0 &&
@@ -74,13 +74,13 @@ void brw_print_reg( struct brw_reg hwreg )
hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 &&
hwreg.type == BRW_REGISTER_TYPE_F) {
/* "scalar" register */
- _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
+ printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
}
else if (hwreg.file == BRW_IMMEDIATE_VALUE) {
- _mesa_printf("imm %f", hwreg.dw1.f);
+ printf("imm %f", hwreg.dw1.f);
}
else {
- _mesa_printf("%s%d.%d<%d;%d,%d>:%s",
+ printf("%s%d.%d<%d;%d,%d>:%s",
file[hwreg.file],
hwreg.nr,
hwreg.subnr / type_sz(hwreg.type),
diff --git a/i965/brw_eu_emit.c b/i965/brw_eu_emit.c
index 7ceabba..f69d529 100644
--- a/i965/brw_eu_emit.c
+++ b/i965/brw_eu_emit.c
@@ -102,8 +102,6 @@ static void brw_set_dest( struct brw_instruction *insn,
static void brw_set_src0( struct brw_instruction *insn,
struct brw_reg reg )
{
- assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
-
if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
assert(reg.nr < 128);
@@ -199,7 +197,7 @@ void brw_set_src1( struct brw_instruction *insn,
* in the future:
*/
assert (reg.address_mode == BRW_ADDRESS_DIRECT);
- //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
+ /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits3.da1.src1_subreg_nr = reg.subnr;
@@ -252,9 +250,10 @@ static void brw_set_math_message( struct brw_context *brw,
GLboolean saturate,
GLuint dataType )
{
+ struct intel_context *intel = &brw->intel;
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_IGDNG(brw)) {
+ if (intel->is_ironlake) {
insn->bits3.math_igdng.function = function;
insn->bits3.math_igdng.int_type = integer_type;
insn->bits3.math_igdng.precision = low_precision;
@@ -319,9 +318,10 @@ static void brw_set_urb_message( struct brw_context *brw,
GLuint offset,
GLuint swizzle_control )
{
+ struct intel_context *intel = &brw->intel;
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_IGDNG(brw)) {
+ if (intel->is_ironlake || intel->gen >= 6) {
insn->bits3.urb_igdng.opcode = 0; /* ? */
insn->bits3.urb_igdng.offset = offset;
insn->bits3.urb_igdng.swizzle_control = swizzle_control;
@@ -332,8 +332,16 @@ static void brw_set_urb_message( struct brw_context *brw,
insn->bits3.urb_igdng.response_length = response_length;
insn->bits3.urb_igdng.msg_length = msg_length;
insn->bits3.urb_igdng.end_of_thread = end_of_thread;
- insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
- insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ if (intel->gen >= 6) {
+ /* For SNB, the SFID bits moved to the condmod bits, and
+ * EOT stayed in bits3 above. Does the EOT bit setting
+ * below on Ironlake even do anything?
+ */
+ insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
+ } else {
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ }
} else {
insn->bits3.urb.opcode = 0; /* ? */
insn->bits3.urb.offset = offset;
@@ -358,9 +366,10 @@ static void brw_set_dp_write_message( struct brw_context *brw,
GLuint response_length,
GLuint end_of_thread )
{
+ struct intel_context *intel = &brw->intel;
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_IGDNG(brw)) {
+ if (intel->is_ironlake) {
insn->bits3.dp_write_igdng.binding_table_index = binding_table_index;
insn->bits3.dp_write_igdng.msg_control = msg_control;
insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear;
@@ -395,9 +404,10 @@ static void brw_set_dp_read_message( struct brw_context *brw,
GLuint response_length,
GLuint end_of_thread )
{
+ struct intel_context *intel = &brw->intel;
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_IGDNG(brw)) {
+ if (intel->is_ironlake) {
insn->bits3.dp_read_igdng.binding_table_index = binding_table_index;
insn->bits3.dp_read_igdng.msg_control = msg_control;
insn->bits3.dp_read_igdng.msg_type = msg_type;
@@ -433,10 +443,11 @@ static void brw_set_sampler_message(struct brw_context *brw,
GLuint header_present,
GLuint simd_mode)
{
+ struct intel_context *intel = &brw->intel;
assert(eot == 0);
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_IGDNG(brw)) {
+ if (intel->is_ironlake) {
insn->bits3.sampler_igdng.binding_table_index = binding_table_index;
insn->bits3.sampler_igdng.sampler = sampler;
insn->bits3.sampler_igdng.msg_type = msg_type;
@@ -447,7 +458,7 @@ static void brw_set_sampler_message(struct brw_context *brw,
insn->bits3.sampler_igdng.end_of_thread = eot;
insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER;
insn->bits2.send_igdng.end_of_thread = eot;
- } else if (BRW_IS_G4X(brw)) {
+ } else if (intel->is_g4x) {
insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
insn->bits3.sampler_g4x.sampler = sampler;
insn->bits3.sampler_g4x.msg_type = msg_type;
@@ -648,10 +659,11 @@ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
struct brw_instruction *brw_ELSE(struct brw_compile *p,
struct brw_instruction *if_insn)
{
+ struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
GLuint br = 1;
- if (BRW_IS_IGDNG(p->brw))
+ if (intel->is_ironlake)
br = 2;
if (p->single_program_flow) {
@@ -690,9 +702,10 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p,
void brw_ENDIF(struct brw_compile *p,
struct brw_instruction *patch_insn)
{
+ struct intel_context *intel = &p->brw->intel;
GLuint br = 1;
- if (BRW_IS_IGDNG(p->brw))
+ if (intel->is_ironlake)
br = 2;
if (p->single_program_flow) {
@@ -803,10 +816,11 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
struct brw_instruction *brw_WHILE(struct brw_compile *p,
struct brw_instruction *do_insn)
{
+ struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
GLuint br = 1;
- if (BRW_IS_IGDNG(p->brw))
+ if (intel->is_ironlake)
br = 2;
if (p->single_program_flow)
@@ -846,14 +860,15 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
void brw_land_fwd_jump(struct brw_compile *p,
struct brw_instruction *jmp_insn)
{
+ struct intel_context *intel = &p->brw->intel;
struct brw_instruction *landing = &p->store[p->nr_insn];
GLuint jmpi = 1;
- if (BRW_IS_IGDNG(p->brw))
+ if (intel->is_ironlake)
jmpi = 2;
assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
- assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
+ assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
}
@@ -908,26 +923,40 @@ void brw_math( struct brw_compile *p,
GLuint data_type,
GLuint precision )
{
- struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
- GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
- GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
+ struct intel_context *intel = &p->brw->intel;
- /* Example code doesn't set predicate_control for send
- * instructions.
- */
- insn->header.predicate_control = 0;
- insn->header.destreg__conditionalmod = msg_reg_nr;
+ if (intel->gen >= 6) {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
- brw_set_dest(insn, dest);
- brw_set_src0(insn, src);
- brw_set_math_message(p->brw,
- insn,
- msg_length, response_length,
- function,
- BRW_MATH_INTEGER_UNSIGNED,
- precision,
- saturate,
- data_type);
+ /* Math is the same ISA format as other opcodes, except that CondModifier
+ * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
+ */
+ insn->header.destreg__conditionalmod = function;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+ brw_set_src1(insn, brw_null_reg());
+ } else {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+ GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
+ GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
+ /* Example code doesn't set predicate_control for send
+ * instructions.
+ */
+ insn->header.predicate_control = 0;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+ brw_set_math_message(p->brw,
+ insn,
+ msg_length, response_length,
+ function,
+ BRW_MATH_INTEGER_UNSIGNED,
+ precision,
+ saturate,
+ data_type);
+ }
}
/**
@@ -1263,7 +1292,7 @@ void brw_SAMPLE(struct brw_compile *p,
GLboolean need_stall = 0;
if (writemask == 0) {
- /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
+ /*printf("%s: zero writemask??\n", __FUNCTION__); */
return;
}
@@ -1295,7 +1324,7 @@ void brw_SAMPLE(struct brw_compile *p,
if (newmask != writemask) {
need_stall = 1;
- /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
+ /* printf("need stall %x %x\n", newmask , writemask); */
}
else {
struct brw_reg m1 = brw_message_reg(msg_reg_nr);
@@ -1368,7 +1397,18 @@ void brw_urb_WRITE(struct brw_compile *p,
GLuint offset,
GLuint swizzle)
{
- struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_instruction *insn;
+
+ /* Sandybridge doesn't have the implied move for SENDs,
+ * and the first message register index comes from src0.
+ */
+ if (intel->gen >= 6) {
+ brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
+ src0 = brw_message_reg(msg_reg_nr);
+ }
+
+ insn = next_insn(p, BRW_OPCODE_SEND);
assert(msg_length < BRW_MAX_MRF);
@@ -1376,7 +1416,8 @@ void brw_urb_WRITE(struct brw_compile *p,
brw_set_src0(insn, src0);
brw_set_src1(insn, brw_imm_d(0));
- insn->header.destreg__conditionalmod = msg_reg_nr;
+ if (intel->gen < 6)
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_urb_message(p->brw,
insn,
diff --git a/i965/brw_fallback.c b/i965/brw_fallback.c
index 562a178..ba401c2 100644
--- a/i965/brw_fallback.c
+++ b/i965/brw_fallback.c
@@ -36,18 +36,13 @@
#include "swrast/swrast.h"
#include "tnl/tnl.h"
#include "brw_context.h"
-#include "brw_fallback.h"
-#include "intel_chipset.h"
#include "intel_fbo.h"
#include "intel_regions.h"
-#include "glapi/glapi.h"
-
#define FILE_DEBUG_FLAG DEBUG_FALLBACKS
static GLboolean do_check_fallback(struct brw_context *brw)
{
- struct intel_context *intel = &brw->intel;
GLcontext *ctx = &brw->intel.ctx;
GLuint i;
@@ -86,8 +81,7 @@ static GLboolean do_check_fallback(struct brw_context *brw)
}
/* _NEW_BUFFERS */
- if (IS_965(intel->intelScreen->deviceID) &&
- !IS_G4X(intel->intelScreen->deviceID)) {
+ if (!brw->has_surface_tile_offset) {
for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
diff --git a/i965/brw_gs.c b/i965/brw_gs.c
index 610b6c3..7261b31 100644
--- a/i965/brw_gs.c
+++ b/i965/brw_gs.c
@@ -47,6 +47,7 @@
static void compile_gs_prog( struct brw_context *brw,
struct brw_gs_prog_key *key )
{
+ struct intel_context *intel = &brw->intel;
struct brw_gs_compile c;
const GLuint *program;
GLuint program_size;
@@ -54,13 +55,12 @@ static void compile_gs_prog( struct brw_context *brw,
memset(&c, 0, sizeof(c));
c.key = *key;
- c.need_ff_sync = BRW_IS_IGDNG(brw);
/* Need to locate the two positions present in vertex + header.
* These are currently hardcoded:
*/
c.nr_attrs = brw_count_bits(c.key.attrs);
- if (BRW_IS_IGDNG(brw))
+ if (intel->is_ironlake)
c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
else
c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
@@ -125,12 +125,13 @@ static void compile_gs_prog( struct brw_context *brw,
/* Upload
*/
dri_bo_unreference(brw->gs.prog_bo);
- brw->gs.prog_bo = brw_upload_cache( &brw->cache, BRW_GS_PROG,
- &c.key, sizeof(c.key),
- NULL, 0,
- program, program_size,
- &c.prog_data,
- &brw->gs.prog_data );
+ brw->gs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_GS_PROG,
+ &c.key, sizeof(c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ sizeof(c.prog_data),
+ &brw->gs.prog_data);
}
static const GLenum gs_prim[GL_POLYGON+1] = {
diff --git a/i965/brw_gs.h b/i965/brw_gs.h
index 010c1c2..813b8d4 100644
--- a/i965/brw_gs.h
+++ b/i965/brw_gs.h
@@ -63,7 +63,6 @@ struct brw_gs_compile {
GLuint nr_attrs;
GLuint nr_regs;
GLuint nr_bytes;
- GLboolean need_ff_sync;
};
#define ATTR_SIZE (4*4)
diff --git a/i965/brw_gs_emit.c b/i965/brw_gs_emit.c
index 0fc5b02..dd7b057 100644
--- a/i965/brw_gs_emit.c
+++ b/i965/brw_gs_emit.c
@@ -40,7 +40,6 @@
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_eu.h"
-#include "brw_util.h"
#include "brw_gs.h"
static void brw_gs_alloc_regs( struct brw_gs_compile *c,
@@ -122,12 +121,14 @@ static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
{
+ struct intel_context *intel = &c->func.brw->intel;
+
brw_gs_alloc_regs(c, 4);
/* Use polygons for correct edgeflag behaviour. Note that vertex 3
* is the PV for quads, but vertex 0 for polygons:
*/
- if (c->need_ff_sync)
+ if (intel->needs_ff_sync)
brw_gs_ff_sync(c, 1);
if (key->pv_first) {
brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
@@ -145,9 +146,11 @@ void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
{
+ struct intel_context *intel = &c->func.brw->intel;
+
brw_gs_alloc_regs(c, 4);
- if (c->need_ff_sync)
+ if (intel->needs_ff_sync)
brw_gs_ff_sync(c, 1);
if (key->pv_first) {
brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
@@ -165,9 +168,11 @@ void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
void brw_gs_tris( struct brw_gs_compile *c )
{
+ struct intel_context *intel = &c->func.brw->intel;
+
brw_gs_alloc_regs(c, 3);
- if (c->need_ff_sync)
+ if (intel->needs_ff_sync)
brw_gs_ff_sync(c, 1);
brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START));
brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2));
@@ -176,9 +181,11 @@ void brw_gs_tris( struct brw_gs_compile *c )
void brw_gs_lines( struct brw_gs_compile *c )
{
+ struct intel_context *intel = &c->func.brw->intel;
+
brw_gs_alloc_regs(c, 2);
- if (c->need_ff_sync)
+ if (intel->needs_ff_sync)
brw_gs_ff_sync(c, 1);
brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START));
brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END));
@@ -186,9 +193,11 @@ void brw_gs_lines( struct brw_gs_compile *c )
void brw_gs_points( struct brw_gs_compile *c )
{
+ struct intel_context *intel = &c->func.brw->intel;
+
brw_gs_alloc_regs(c, 1);
- if (c->need_ff_sync)
+ if (intel->needs_ff_sync)
brw_gs_ff_sync(c, 1);
brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END));
}
diff --git a/i965/brw_gs_state.c b/i965/brw_gs_state.c
index ed9d2ff..d8ad5ce 100644
--- a/i965/brw_gs_state.c
+++ b/i965/brw_gs_state.c
@@ -34,7 +34,6 @@
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
-#include "main/macros.h"
struct brw_gs_unit_key {
unsigned int total_grf;
@@ -72,6 +71,7 @@ gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key)
static dri_bo *
gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
{
+ struct intel_context *intel = &brw->intel;
struct brw_gs_unit_state gs;
dri_bo *bo;
@@ -98,7 +98,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
else
gs.thread4.max_threads = 0;
- if (BRW_IS_IGDNG(brw))
+ if (intel->is_ironlake)
gs.thread4.rendering_enable = 1;
if (INTEL_DEBUG & DEBUG_STATS)
@@ -107,8 +107,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT,
key, sizeof(*key),
&brw->gs.prog_bo, 1,
- &gs, sizeof(gs),
- NULL, NULL);
+ &gs, sizeof(gs));
if (key->prog_active) {
/* Emit GS program relocation */
diff --git a/i965/brw_misc_state.c b/i965/brw_misc_state.c
index 4b0d598..d030ed4 100644
--- a/i965/brw_misc_state.c
+++ b/i965/brw_misc_state.c
@@ -78,10 +78,7 @@ static void upload_drawing_rect(struct brw_context *brw)
struct intel_context *intel = &brw->intel;
GLcontext *ctx = &intel->ctx;
- if (!intel->constant_cliprect)
- return;
-
- BEGIN_BATCH(4, NO_LOOP_CLIPRECTS);
+ BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965);
OUT_BATCH(0); /* xmin, ymin */
OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
@@ -116,7 +113,7 @@ static void upload_binding_table_pointers(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
- BEGIN_BATCH(6, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(6);
OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
if (brw->vs.bind_bo != NULL)
OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
@@ -139,6 +136,41 @@ const struct brw_tracked_state brw_binding_table_pointers = {
.emit = upload_binding_table_pointers,
};
+/**
+ * Upload the binding table pointers, which point each stage's array of surface
+ * state pointers.
+ *
+ * The binding table pointers are relative to the surface state base address,
+ * which is 0.
+ */
+static void upload_gen6_binding_table_pointers(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+
+ BEGIN_BATCH(4);
+ OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 |
+ GEN6_BINDING_TABLE_MODIFY_VS |
+ GEN6_BINDING_TABLE_MODIFY_GS |
+ GEN6_BINDING_TABLE_MODIFY_PS |
+ (4 - 2));
+ if (brw->vs.bind_bo != NULL)
+ OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
+ else
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* gs */
+ OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */
+ ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state gen6_binding_table_pointers = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH,
+ .cache = CACHE_NEW_SURF_BIND,
+ },
+ .prepare = prepare_binding_table_pointers,
+ .emit = upload_gen6_binding_table_pointers,
+};
/**
* Upload pointers to the per-stage state.
@@ -150,7 +182,7 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
{
struct intel_context *intel = &brw->intel;
- BEGIN_BATCH(7, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(7);
OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2));
OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
if (brw->gs.prog_active)
@@ -212,10 +244,17 @@ static void emit_depthbuffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
struct intel_region *region = brw->state.depth_region;
- unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5;
+ unsigned int len;
+
+ if (intel->gen >= 6)
+ len = 7;
+ else if (intel->is_g4x || intel->is_ironlake)
+ len = 6;
+ else
+ len = 5;
if (region == NULL) {
- BEGIN_BATCH(len, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(len);
OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
(BRW_SURFACE_NULL << 29));
@@ -223,9 +262,12 @@ static void emit_depthbuffer(struct brw_context *brw)
OUT_BATCH(0);
OUT_BATCH(0);
- if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ if (intel->is_g4x || intel->is_ironlake || intel->gen >= 6)
OUT_BATCH(0);
+ if (intel->gen >= 6)
+ OUT_BATCH(0);
+
ADVANCE_BATCH();
} else {
unsigned int format;
@@ -246,8 +288,10 @@ static void emit_depthbuffer(struct brw_context *brw)
}
assert(region->tiling != I915_TILING_X);
+ if (IS_GEN6(intel->intelScreen->deviceID))
+ assert(region->tiling != I915_TILING_NONE);
- BEGIN_BATCH(len, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(len);
OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
OUT_BATCH(((region->pitch * region->cpp) - 1) |
(format << 18) |
@@ -262,9 +306,20 @@ static void emit_depthbuffer(struct brw_context *brw)
((region->height - 1) << 19));
OUT_BATCH(0);
- if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ if (intel->is_g4x || intel->is_ironlake || intel->gen >= 6)
OUT_BATCH(0);
+ if (intel->gen >= 6)
+ OUT_BATCH(0);
+
+ ADVANCE_BATCH();
+ }
+
+ /* Initialize it for safety. */
+ if (intel->gen >= 6) {
+ BEGIN_BATCH(2);
+ OUT_BATCH(CMD_3D_CLEAR_PARAMS << 16 | (2 - 2));
+ OUT_BATCH(0);
ADVANCE_BATCH();
}
}
@@ -330,7 +385,7 @@ const struct brw_tracked_state brw_polygon_stipple = {
static void upload_polygon_stipple_offset(struct brw_context *brw)
{
- __DRIdrawablePrivate *dPriv = brw->intel.driDrawable;
+ GLcontext *ctx = &brw->intel.ctx;
struct brw_polygon_stipple_offset bpso;
memset(&bpso, 0, sizeof(bpso));
@@ -346,8 +401,8 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
* worry about.
*/
if (brw->intel.ctx.DrawBuffer->Name == 0) {
- bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31;
- bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31;
+ bpso.bits0.x_offset = 0;
+ bpso.bits0.y_offset = (32 - (ctx->DrawBuffer->Height & 31)) & 31;
}
else {
bpso.bits0.y_offset = 0;
@@ -374,8 +429,8 @@ const struct brw_tracked_state brw_polygon_stipple_offset = {
static void upload_aa_line_parameters(struct brw_context *brw)
{
struct brw_aa_line_parameters balp;
-
- if (BRW_IS_965(brw))
+
+ if (!brw->has_aa_line_parameters)
return;
/* use legacy aa line coverage computation */
@@ -438,18 +493,20 @@ const struct brw_tracked_state brw_line_stipple = {
static void upload_invarient_state( struct brw_context *brw )
{
+ struct intel_context *intel = &brw->intel;
+
{
/* 0x61040000 Pipeline Select */
/* PipelineSelect : 0 */
struct brw_pipeline_select ps;
memset(&ps, 0, sizeof(ps));
- ps.header.opcode = CMD_PIPELINE_SELECT(brw);
+ ps.header.opcode = brw->CMD_PIPELINE_SELECT;
ps.header.pipeline_select = 0;
BRW_BATCH_STRUCT(brw, &ps);
}
- {
+ if (intel->gen < 6) {
struct brw_global_depth_offset_clamp gdo;
memset(&gdo, 0, sizeof(gdo));
@@ -462,6 +519,32 @@ static void upload_invarient_state( struct brw_context *brw )
BRW_BATCH_STRUCT(brw, &gdo);
}
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+
+ if (intel->gen >= 6) {
+ int i;
+
+ BEGIN_BATCH(3);
+ OUT_BATCH(CMD_3D_MULTISAMPLE << 16 | (3 - 2));
+ OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
+ MS_NUMSAMPLES_1);
+ OUT_BATCH(0); /* positions for 4/8-sample */
+ ADVANCE_BATCH();
+
+ BEGIN_BATCH(2);
+ OUT_BATCH(CMD_3D_SAMPLE_MASK << 16 | (2 - 2));
+ OUT_BATCH(1);
+ ADVANCE_BATCH();
+
+ for (i = 0; i < 4; i++) {
+ BEGIN_BATCH(4);
+ OUT_BATCH(CMD_GS_SVB_INDEX << 16 | (4 - 2));
+ OUT_BATCH(i << SVB_INDEX_SHIFT);
+ OUT_BATCH(0);
+ OUT_BATCH(0xffffffff);
+ ADVANCE_BATCH();
+ }
+ }
/* 0x61020000 State Instruction Pointer */
{
@@ -480,7 +563,7 @@ static void upload_invarient_state( struct brw_context *brw )
struct brw_vf_statistics vfs;
memset(&vfs, 0, sizeof(vfs));
- vfs.opcode = CMD_VF_STATISTICS(brw);
+ vfs.opcode = brw->CMD_VF_STATISTICS;
if (INTEL_DEBUG & DEBUG_STATS)
vfs.statistics_enable = 1;
@@ -512,8 +595,21 @@ static void upload_state_base_address( struct brw_context *brw )
/* Output the structure (brw_state_base_address) directly to the
* batchbuffer, so we can emit relocations inline.
*/
- if (BRW_IS_IGDNG(brw)) {
- BEGIN_BATCH(8, IGNORE_CLIPRECTS);
+ if (intel->gen >= 6) {
+ BEGIN_BATCH(10);
+ OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
+ OUT_BATCH(1); /* General state base address */
+ OUT_BATCH(1); /* Surface state base address */
+ OUT_BATCH(1); /* Dynamic state base address */
+ OUT_BATCH(1); /* Indirect object base address */
+ OUT_BATCH(1); /* Instruction base address */
+ OUT_BATCH(1); /* General state upper bound */
+ OUT_BATCH(1); /* Dynamic state upper bound */
+ OUT_BATCH(1); /* Indirect object upper bound */
+ OUT_BATCH(1); /* Instruction access upper bound */
+ ADVANCE_BATCH();
+ } else if (intel->is_ironlake) {
+ BEGIN_BATCH(8);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
OUT_BATCH(1); /* General state base address */
OUT_BATCH(1); /* Surface state base address */
@@ -524,7 +620,7 @@ static void upload_state_base_address( struct brw_context *brw )
OUT_BATCH(1); /* Instruction access upper bound */
ADVANCE_BATCH();
} else {
- BEGIN_BATCH(6, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(6);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
OUT_BATCH(1); /* General state base address */
OUT_BATCH(1); /* Surface state base address */
diff --git a/i965/brw_program.c b/i965/brw_program.c
index bac6918..c78f7b3 100644
--- a/i965/brw_program.c
+++ b/i965/brw_program.c
@@ -37,7 +37,6 @@
#include "tnl/tnl.h"
#include "brw_context.h"
-#include "brw_util.h"
#include "brw_wm.h"
static void brwBindProgram( GLcontext *ctx,
@@ -112,9 +111,10 @@ static GLboolean brwIsProgramNative( GLcontext *ctx,
return GL_TRUE;
}
-static void brwProgramStringNotify( GLcontext *ctx,
- GLenum target,
- struct gl_program *prog )
+
+static GLboolean brwProgramStringNotify( GLcontext *ctx,
+ GLenum target,
+ struct gl_program *prog )
{
struct brw_context *brw = brw_context(ctx);
@@ -151,6 +151,9 @@ static void brwProgramStringNotify( GLcontext *ctx,
*/
_tnl_program_string(ctx, target, prog);
}
+
+ /* XXX check if program is legal, within limits */
+ return GL_TRUE;
}
void brwInitFragProgFuncs( struct dd_function_table *functions )
diff --git a/i965/brw_queryobj.c b/i965/brw_queryobj.c
index a195bc3..6cce7e5 100644
--- a/i965/brw_queryobj.c
+++ b/i965/brw_queryobj.c
@@ -73,7 +73,7 @@ brw_new_query_object(GLcontext *ctx, GLuint id)
{
struct brw_query_object *query;
- query = _mesa_calloc(sizeof(struct brw_query_object));
+ query = calloc(1, sizeof(struct brw_query_object));
query->Base.Id = id;
query->Base.Result = 0;
@@ -89,7 +89,7 @@ brw_delete_query(GLcontext *ctx, struct gl_query_object *q)
struct brw_query_object *query = (struct brw_query_object *)q;
dri_bo_unreference(query->bo);
- _mesa_free(query);
+ free(query);
}
static void
@@ -188,7 +188,7 @@ brw_emit_query_begin(struct brw_context *brw)
if (brw->query.active || is_empty_list(&brw->query.active_head))
return;
- BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_PIPE_CONTROL |
PIPE_CONTROL_DEPTH_STALL |
PIPE_CONTROL_WRITE_DEPTH_COUNT);
@@ -227,7 +227,7 @@ brw_emit_query_end(struct brw_context *brw)
if (!brw->query.active)
return;
- BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_PIPE_CONTROL |
PIPE_CONTROL_DEPTH_STALL |
PIPE_CONTROL_WRITE_DEPTH_COUNT);
diff --git a/i965/brw_sf.c b/i965/brw_sf.c
index 968890f..8e6839b 100644
--- a/i965/brw_sf.c
+++ b/i965/brw_sf.c
@@ -117,12 +117,13 @@ static void compile_sf_prog( struct brw_context *brw,
/* Upload
*/
dri_bo_unreference(brw->sf.prog_bo);
- brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG,
- &c.key, sizeof(c.key),
- NULL, 0,
- program, program_size,
- &c.prog_data,
- &brw->sf.prog_data );
+ brw->sf.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_SF_PROG,
+ &c.key, sizeof(c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ sizeof(c.prog_data),
+ &brw->sf.prog_data);
}
/* Calculate interpolants for triangle and line rasterization.
diff --git a/i965/brw_sf_emit.c b/i965/brw_sf_emit.c
index 3eae41e..bb08055 100644
--- a/i965/brw_sf_emit.c
+++ b/i965/brw_sf_emit.c
@@ -149,6 +149,7 @@ static void copy_colors( struct brw_sf_compile *c,
static void do_flatshade_triangle( struct brw_sf_compile *c )
{
struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
struct brw_reg ip = brw_ip_reg();
GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
GLuint jmpi = 1;
@@ -161,7 +162,7 @@ static void do_flatshade_triangle( struct brw_sf_compile *c )
if (c->key.primitive == SF_UNFILLED_TRIS)
return;
- if (BRW_IS_IGDNG(p->brw))
+ if (intel->is_ironlake)
jmpi = 2;
brw_push_insn_state(p);
@@ -187,6 +188,7 @@ static void do_flatshade_triangle( struct brw_sf_compile *c )
static void do_flatshade_line( struct brw_sf_compile *c )
{
struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
struct brw_reg ip = brw_ip_reg();
GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
GLuint jmpi = 1;
@@ -199,7 +201,7 @@ static void do_flatshade_line( struct brw_sf_compile *c )
if (c->key.primitive == SF_UNFILLED_TRIS)
return;
- if (BRW_IS_IGDNG(p->brw))
+ if (intel->is_ironlake)
jmpi = 2;
brw_push_insn_state(p);
diff --git a/i965/brw_sf_state.c b/i965/brw_sf_state.c
index bb69435..847c886 100644
--- a/i965/brw_sf_state.c
+++ b/i965/brw_sf_state.c
@@ -35,7 +35,6 @@
#include "brw_state.h"
#include "brw_defines.h"
#include "main/macros.h"
-#include "intel_fbo.h"
static void upload_sf_vp(struct brw_context *brw)
{
@@ -70,9 +69,9 @@ static void upload_sf_vp(struct brw_context *brw)
* for DrawBuffer->_[XY]{min,max}
*/
- /* The scissor only needs to handle the intersection of drawable and
- * scissor rect. Clipping to the boundaries of static shared buffers
- * for front/back/depth is covered by looping over cliprects in brw_draw.c.
+ /* The scissor only needs to handle the intersection of drawable
+ * and scissor rect, since there are no longer cliprects for shared
+ * buffers with DRI2.
*
* Note that the hardware's coordinates are inclusive, while Mesa's min is
* inclusive but max is exclusive.
@@ -165,6 +164,7 @@ static dri_bo *
sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
dri_bo **reloc_bufs)
{
+ struct intel_context *intel = &brw->intel;
struct brw_sf_unit_state sf;
dri_bo *bo;
int chipset_max_threads;
@@ -177,7 +177,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
sf.thread3.dispatch_grf_start_reg = 3;
- if (BRW_IS_IGDNG(brw))
+ if (intel->is_ironlake)
sf.thread3.urb_entry_read_offset = 3;
else
sf.thread3.urb_entry_read_offset = 1;
@@ -187,10 +187,10 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
sf.thread4.nr_urb_entries = key->nr_urb_entries;
sf.thread4.urb_entry_allocation_size = key->sfsize - 1;
- /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or
- * 48(IGDNG) threads
+ /* Each SF thread produces 1 PUE, and there can be up to 24 (Pre-Ironlake) or
+ * 48 (Ironlake) threads.
*/
- if (BRW_IS_IGDNG(brw))
+ if (intel->is_ironlake)
chipset_max_threads = 48;
else
chipset_max_threads = 24;
@@ -308,8 +308,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT,
key, sizeof(*key),
reloc_bufs, 2,
- &sf, sizeof(sf),
- NULL, NULL);
+ &sf, sizeof(sf));
/* STATE_PREFETCH command description describes this state as being
* something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain.
diff --git a/i965/brw_state.h b/i965/brw_state.h
index b129b1f..f790cfa 100644
--- a/i965/brw_state.h
+++ b/i965/brw_state.h
@@ -35,7 +35,7 @@
#include "brw_context.h"
-static inline void
+static INLINE void
brw_add_validated_bo(struct brw_context *brw, dri_bo *bo)
{
assert(brw->state.validated_bo_count < ARRAY_SIZE(brw->state.validated_bos));
@@ -90,6 +90,23 @@ const struct brw_tracked_state brw_drawing_rect;
const struct brw_tracked_state brw_indices;
const struct brw_tracked_state brw_vertices;
const struct brw_tracked_state brw_index_buffer;
+const struct brw_tracked_state gen6_binding_table_pointers;
+const struct brw_tracked_state gen6_blend_state;
+const struct brw_tracked_state gen6_cc_state_pointers;
+const struct brw_tracked_state gen6_cc_vp;
+const struct brw_tracked_state gen6_clip_state;
+const struct brw_tracked_state gen6_clip_vp;
+const struct brw_tracked_state gen6_color_calc_state;
+const struct brw_tracked_state gen6_depth_stencil_state;
+const struct brw_tracked_state gen6_gs_state;
+const struct brw_tracked_state gen6_sampler_state;
+const struct brw_tracked_state gen6_scissor_state;
+const struct brw_tracked_state gen6_sf_state;
+const struct brw_tracked_state gen6_sf_vp;
+const struct brw_tracked_state gen6_urb;
+const struct brw_tracked_state gen6_viewport_state;
+const struct brw_tracked_state gen6_vs_state;
+const struct brw_tracked_state gen6_wm_state;
/**
* Use same key for WM and VS surfaces.
@@ -124,16 +141,26 @@ dri_bo *brw_cache_data(struct brw_cache *cache,
dri_bo **reloc_bufs,
GLuint nr_reloc_bufs);
-dri_bo *brw_upload_cache( struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *key,
- GLuint key_sz,
- dri_bo **reloc_bufs,
- GLuint nr_reloc_bufs,
- const void *data,
- GLuint data_sz,
- const void *aux,
- void *aux_return );
+drm_intel_bo *brw_upload_cache(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_sz,
+ dri_bo **reloc_bufs,
+ GLuint nr_reloc_bufs,
+ const void *data,
+ GLuint data_sz);
+
+drm_intel_bo *brw_upload_cache_with_auxdata(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_sz,
+ dri_bo **reloc_bufs,
+ GLuint nr_reloc_bufs,
+ const void *data,
+ GLuint data_sz,
+ const void *aux,
+ GLuint aux_sz,
+ void *aux_return);
dri_bo *brw_search_cache( struct brw_cache *cache,
enum brw_cache_id cache_id,
@@ -151,7 +178,7 @@ void brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo);
/***********************************************************************
* brw_state_batch.c
*/
-#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS)
+#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)))
#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
GLboolean brw_cached_batch_struct( struct brw_context *brw,
diff --git a/i965/brw_state_batch.c b/i965/brw_state_batch.c
index 7821898..3901941 100644
--- a/i965/brw_state_batch.c
+++ b/i965/brw_state_batch.c
@@ -48,7 +48,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
struct header *newheader = (struct header *)data;
if (brw->emit_state_always) {
- intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
+ intel_batchbuffer_data(brw->intel.batch, data, sz);
return GL_TRUE;
}
@@ -57,8 +57,8 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
if (item->sz == sz && memcmp(item->header, newheader, sz) == 0)
return GL_FALSE;
if (item->sz != sz) {
- _mesa_free(item->header);
- item->header = _mesa_malloc(sz);
+ free(item->header);
+ item->header = malloc(sz);
item->sz = sz;
}
goto emit;
@@ -68,14 +68,14 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
assert(!item);
item = CALLOC_STRUCT(brw_cached_batch_item);
- item->header = _mesa_malloc(sz);
+ item->header = malloc(sz);
item->sz = sz;
item->next = brw->cached_batch_items;
brw->cached_batch_items = item;
emit:
memcpy(item->header, newheader, sz);
- intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
+ intel_batchbuffer_data(brw->intel.batch, data, sz);
return GL_TRUE;
}
diff --git a/i965/brw_state_cache.c b/i965/brw_state_cache.c
index e4c9ba7..c08cb45 100644
--- a/i965/brw_state_cache.c
+++ b/i965/brw_state_cache.c
@@ -59,37 +59,27 @@
#include "main/imports.h"
#include "brw_state.h"
#include "intel_batchbuffer.h"
-
-/* XXX: Fixme - have to include these to get the sizes of the prog_key
- * structs:
- */
#include "brw_wm.h"
-#include "brw_vs.h"
-#include "brw_clip.h"
-#include "brw_sf.h"
-#include "brw_gs.h"
static GLuint
-hash_key(const void *key, GLuint key_size,
- dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
+hash_key(struct brw_cache_item *item)
{
- GLuint *ikey = (GLuint *)key;
- GLuint hash = 0, i;
+ GLuint *ikey = (GLuint *)item->key;
+ GLuint hash = item->cache_id, i;
- assert(key_size % 4 == 0);
+ assert(item->key_size % 4 == 0);
/* I'm sure this can be improved on:
*/
- for (i = 0; i < key_size/4; i++) {
+ for (i = 0; i < item->key_size/4; i++) {
hash ^= ikey[i];
hash = (hash << 5) | (hash >> 27);
}
/* Include the BO pointers as key data as well */
- ikey = (GLuint *)reloc_bufs;
- key_size = nr_reloc_bufs * sizeof(dri_bo *);
- for (i = 0; i < key_size/4; i++) {
+ ikey = (GLuint *)item->reloc_bufs;
+ for (i = 0; i < item->nr_reloc_bufs * sizeof(drm_intel_bo *) / 4; i++) {
hash ^= ikey[i];
hash = (hash << 5) | (hash >> 27);
}
@@ -114,11 +104,22 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
cache->brw->state.dirty.cache |= 1 << cache_id;
}
+static int
+brw_cache_item_equals(const struct brw_cache_item *a,
+ const struct brw_cache_item *b)
+{
+ return a->cache_id == b->cache_id &&
+ a->hash == b->hash &&
+ a->key_size == b->key_size &&
+ (memcmp(a->key, b->key, a->key_size) == 0) &&
+ a->nr_reloc_bufs == b->nr_reloc_bufs &&
+ (memcmp(a->reloc_bufs, b->reloc_bufs,
+ a->nr_reloc_bufs * sizeof(dri_bo *)) == 0);
+}
static struct brw_cache_item *
-search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
- GLuint hash, const void *key, GLuint key_size,
- dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
+search_cache(struct brw_cache *cache, GLuint hash,
+ struct brw_cache_item *lookup)
{
struct brw_cache_item *c;
@@ -133,13 +134,7 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
#endif
for (c = cache->items[hash % cache->size]; c; c = c->next) {
- if (c->cache_id == cache_id &&
- c->hash == hash &&
- c->key_size == key_size &&
- memcmp(c->key, key, key_size) == 0 &&
- c->nr_reloc_bufs == nr_reloc_bufs &&
- memcmp(c->reloc_bufs, reloc_bufs,
- nr_reloc_bufs * sizeof(dri_bo *)) == 0)
+ if (brw_cache_item_equals(lookup, c))
return c;
}
@@ -155,7 +150,7 @@ rehash(struct brw_cache *cache)
GLuint size, i;
size = cache->size * 3;
- items = (struct brw_cache_item**) _mesa_calloc(size * sizeof(*items));
+ items = (struct brw_cache_item**) calloc(1, size * sizeof(*items));
for (i = 0; i < cache->size; i++)
for (c = cache->items[i]; c; c = next) {
@@ -182,10 +177,18 @@ brw_search_cache(struct brw_cache *cache,
void *aux_return)
{
struct brw_cache_item *item;
- GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
+ struct brw_cache_item lookup;
+ GLuint hash;
- item = search_cache(cache, cache_id, hash, key, key_size,
- reloc_bufs, nr_reloc_bufs);
+ lookup.cache_id = cache_id;
+ lookup.key = key;
+ lookup.key_size = key_size;
+ lookup.reloc_bufs = reloc_bufs;
+ lookup.nr_reloc_bufs = nr_reloc_bufs;
+ hash = hash_key(&lookup);
+ lookup.hash = hash;
+
+ item = search_cache(cache, hash, &lookup);
if (item == NULL)
return NULL;
@@ -200,48 +203,52 @@ brw_search_cache(struct brw_cache *cache,
}
-dri_bo *
-brw_upload_cache( struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *key,
- GLuint key_size,
- dri_bo **reloc_bufs,
- GLuint nr_reloc_bufs,
- const void *data,
- GLuint data_size,
- const void *aux,
- void *aux_return )
+drm_intel_bo *
+brw_upload_cache_with_auxdata(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_size,
+ dri_bo **reloc_bufs,
+ GLuint nr_reloc_bufs,
+ const void *data,
+ GLuint data_size,
+ const void *aux,
+ GLuint aux_size,
+ void *aux_return)
{
struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
- GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
+ GLuint hash;
GLuint relocs_size = nr_reloc_bufs * sizeof(dri_bo *);
- GLuint aux_size = cache->aux_size[cache_id];
void *tmp;
dri_bo *bo;
int i;
+ item->cache_id = cache_id;
+ item->key = key;
+ item->key_size = key_size;
+ item->reloc_bufs = reloc_bufs;
+ item->nr_reloc_bufs = nr_reloc_bufs;
+ hash = hash_key(item);
+ item->hash = hash;
+
/* Create the buffer object to contain the data */
bo = dri_bo_alloc(cache->brw->intel.bufmgr,
cache->name[cache_id], data_size, 1 << 6);
/* Set up the memory containing the key, aux_data, and reloc_bufs */
- tmp = _mesa_malloc(key_size + aux_size + relocs_size);
+ tmp = malloc(key_size + aux_size + relocs_size);
memcpy(tmp, key, key_size);
- memcpy(tmp + key_size, aux, cache->aux_size[cache_id]);
+ memcpy(tmp + key_size, aux, aux_size);
memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size);
for (i = 0; i < nr_reloc_bufs; i++) {
if (reloc_bufs[i] != NULL)
dri_bo_reference(reloc_bufs[i]);
}
- item->cache_id = cache_id;
item->key = tmp;
- item->hash = hash;
- item->key_size = key_size;
item->reloc_bufs = tmp + key_size + aux_size;
- item->nr_reloc_bufs = nr_reloc_bufs;
item->bo = bo;
dri_bo_reference(bo);
@@ -255,12 +262,11 @@ brw_upload_cache( struct brw_cache *cache,
cache->n_items++;
if (aux_return) {
- assert(cache->aux_size[cache_id]);
*(void **)aux_return = (void *)((char *)item->key + item->key_size);
}
if (INTEL_DEBUG & DEBUG_STATE)
- _mesa_printf("upload %s: %d bytes to cache id %d\n",
+ printf("upload %s: %d bytes to cache id %d\n",
cache->name[cache_id],
data_size, cache_id);
@@ -272,6 +278,23 @@ brw_upload_cache( struct brw_cache *cache,
return bo;
}
+drm_intel_bo *
+brw_upload_cache(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_size,
+ dri_bo **reloc_bufs,
+ GLuint nr_reloc_bufs,
+ const void *data,
+ GLuint data_size)
+{
+ return brw_upload_cache_with_auxdata(cache, cache_id,
+ key, key_size,
+ reloc_bufs, nr_reloc_bufs,
+ data, data_size,
+ NULL, 0,
+ NULL);
+}
/**
* Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
@@ -292,11 +315,18 @@ brw_cache_data(struct brw_cache *cache,
GLuint nr_reloc_bufs)
{
dri_bo *bo;
- struct brw_cache_item *item;
- GLuint hash = hash_key(data, data_size, reloc_bufs, nr_reloc_bufs);
-
- item = search_cache(cache, cache_id, hash, data, data_size,
- reloc_bufs, nr_reloc_bufs);
+ struct brw_cache_item *item, lookup;
+ GLuint hash;
+
+ lookup.cache_id = cache_id;
+ lookup.key = data;
+ lookup.key_size = data_size;
+ lookup.reloc_bufs = reloc_bufs;
+ lookup.nr_reloc_bufs = nr_reloc_bufs;
+ hash = hash_key(&lookup);
+ lookup.hash = hash;
+
+ item = search_cache(cache, hash, &lookup);
if (item) {
update_cache_last(cache, cache_id, item->bo);
dri_bo_reference(item->bo);
@@ -306,8 +336,7 @@ brw_cache_data(struct brw_cache *cache,
bo = brw_upload_cache(cache, cache_id,
data, data_size,
reloc_bufs, nr_reloc_bufs,
- data, data_size,
- NULL, NULL);
+ data, data_size);
return bo;
}
@@ -321,11 +350,9 @@ enum pool_type {
static void
brw_init_cache_id(struct brw_cache *cache,
const char *name,
- enum brw_cache_id id,
- GLuint aux_size)
+ enum brw_cache_id id)
{
cache->name[id] = strdup(name);
- cache->aux_size[id] = aux_size;
}
@@ -339,82 +366,31 @@ brw_init_non_surface_cache(struct brw_context *brw)
cache->size = 7;
cache->n_items = 0;
cache->items = (struct brw_cache_item **)
- _mesa_calloc(cache->size * sizeof(struct brw_cache_item));
-
- brw_init_cache_id(cache,
- "CC_VP",
- BRW_CC_VP,
- 0);
-
- brw_init_cache_id(cache,
- "CC_UNIT",
- BRW_CC_UNIT,
- 0);
-
- brw_init_cache_id(cache,
- "WM_PROG",
- BRW_WM_PROG,
- sizeof(struct brw_wm_prog_data));
-
- brw_init_cache_id(cache,
- "SAMPLER_DEFAULT_COLOR",
- BRW_SAMPLER_DEFAULT_COLOR,
- 0);
-
- brw_init_cache_id(cache,
- "SAMPLER",
- BRW_SAMPLER,
- 0);
-
- brw_init_cache_id(cache,
- "WM_UNIT",
- BRW_WM_UNIT,
- 0);
-
- brw_init_cache_id(cache,
- "SF_PROG",
- BRW_SF_PROG,
- sizeof(struct brw_sf_prog_data));
-
- brw_init_cache_id(cache,
- "SF_VP",
- BRW_SF_VP,
- 0);
-
- brw_init_cache_id(cache,
- "SF_UNIT",
- BRW_SF_UNIT,
- 0);
-
- brw_init_cache_id(cache,
- "VS_UNIT",
- BRW_VS_UNIT,
- 0);
-
- brw_init_cache_id(cache,
- "VS_PROG",
- BRW_VS_PROG,
- sizeof(struct brw_vs_prog_data));
-
- brw_init_cache_id(cache,
- "CLIP_UNIT",
- BRW_CLIP_UNIT,
- 0);
-
- brw_init_cache_id(cache,
- "CLIP_PROG",
- BRW_CLIP_PROG,
- sizeof(struct brw_clip_prog_data));
-
- brw_init_cache_id(cache,
- "GS_UNIT",
- BRW_GS_UNIT,
- 0);
-
- brw_init_cache_id(cache,
- "GS_PROG",
- BRW_GS_PROG,
- sizeof(struct brw_gs_prog_data));
+ calloc(1, cache->size * sizeof(struct brw_cache_item));
+
+ brw_init_cache_id(cache, "CC_VP", BRW_CC_VP);
+ brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT);
+ brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG);
+ brw_init_cache_id(cache, "SAMPLER_DEFAULT_COLOR", BRW_SAMPLER_DEFAULT_COLOR);
+ brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER);
+ brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT);
+ brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG);
+ brw_init_cache_id(cache, "SF_VP", BRW_SF_VP);
+
+ brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT);
+
+ brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT);
+
+ brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG);
+
+ brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT);
+
+ brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG);
+
+ brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT);
+
+ brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG);
+ brw_init_cache_id(cache, "BLEND_STATE", BRW_BLEND_STATE);
}
@@ -428,17 +404,10 @@ brw_init_surface_cache(struct brw_context *brw)
cache->size = 7;
cache->n_items = 0;
cache->items = (struct brw_cache_item **)
- _mesa_calloc(cache->size * sizeof(struct brw_cache_item));
-
- brw_init_cache_id(cache,
- "SS_SURFACE",
- BRW_SS_SURFACE,
- 0);
+ calloc(1, cache->size * sizeof(struct brw_cache_item));
- brw_init_cache_id(cache,
- "SS_SURF_BIND",
- BRW_SS_SURF_BIND,
- 0);
+ brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE);
+ brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND);
}
@@ -457,7 +426,7 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
GLuint i;
if (INTEL_DEBUG & DEBUG_STATE)
- _mesa_printf("%s\n", __FUNCTION__);
+ printf("%s\n", __FUNCTION__);
for (i = 0; i < cache->size; i++) {
for (c = cache->items[i]; c; c = next) {
@@ -476,7 +445,7 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
cache->n_items = 0;
if (brw->curbe.last_buf) {
- _mesa_free(brw->curbe.last_buf);
+ free(brw->curbe.last_buf);
brw->curbe.last_buf = NULL;
}
@@ -497,7 +466,7 @@ brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo)
GLuint i;
if (INTEL_DEBUG & DEBUG_STATE)
- _mesa_printf("%s\n", __FUNCTION__);
+ printf("%s\n", __FUNCTION__);
for (i = 0; i < cache->size; i++) {
for (prev = &cache->items[i]; *prev;) {
@@ -525,7 +494,7 @@ void
brw_state_cache_check_size(struct brw_context *brw)
{
if (INTEL_DEBUG & DEBUG_STATE)
- _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items);
+ printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items);
/* un-tuned guess. We've got around 20 state objects for a total of around
* 32k, so 1000 of them is around 1.5MB.
@@ -544,7 +513,7 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
GLuint i;
if (INTEL_DEBUG & DEBUG_STATE)
- _mesa_printf("%s\n", __FUNCTION__);
+ printf("%s\n", __FUNCTION__);
brw_clear_cache(brw, cache);
for (i = 0; i < BRW_MAX_CACHE; i++) {
diff --git a/i965/brw_state_dump.c b/i965/brw_state_dump.c
index e94fa7d..020ac52 100644
--- a/i965/brw_state_dump.c
+++ b/i965/brw_state_dump.c
@@ -28,7 +28,6 @@
#include "main/mtypes.h"
#include "brw_context.h"
-#include "brw_state.h"
#include "brw_defines.h"
/**
diff --git a/i965/brw_state_upload.c b/i965/brw_state_upload.c
index af8dfb4..9e54f29 100644
--- a/i965/brw_state_upload.c
+++ b/i965/brw_state_upload.c
@@ -35,6 +35,7 @@
#include "brw_state.h"
#include "intel_batchbuffer.h"
#include "intel_buffers.h"
+#include "intel_chipset.h"
/* This is used to initialize brw->state.atoms[]. We could use this
* list directly except for a single atom, brw_constant_buffer, which
@@ -42,7 +43,7 @@
* current fragment and vertex programs, and so cannot be a static
* value.
*/
-const struct brw_tracked_state *atoms[] =
+static const struct brw_tracked_state *gen4_atoms[] =
{
&brw_check_fallback,
@@ -101,6 +102,63 @@ const struct brw_tracked_state *atoms[] =
&brw_constant_buffer
};
+const struct brw_tracked_state *gen6_atoms[] =
+{
+ &brw_check_fallback,
+
+ &brw_wm_input_sizes,
+ &brw_vs_prog,
+ &brw_gs_prog,
+ &brw_wm_prog,
+
+ &gen6_clip_vp,
+ &gen6_sf_vp,
+ &gen6_cc_vp,
+
+ /* Command packets: */
+ &brw_invarient_state,
+
+ &gen6_viewport_state, /* must do after *_vp stages */
+
+ &gen6_urb,
+ &gen6_blend_state, /* must do before cc unit */
+ &gen6_color_calc_state, /* must do before cc unit */
+ &gen6_depth_stencil_state, /* must do before cc unit */
+ &gen6_cc_state_pointers,
+
+ &brw_vs_surfaces, /* must do before unit */
+ &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */
+ &brw_wm_surfaces, /* must do before samplers and unit */
+
+ &brw_wm_samplers,
+ &gen6_sampler_state,
+
+ &gen6_vs_state,
+ &gen6_gs_state,
+ &gen6_clip_state,
+ &gen6_sf_state,
+ &gen6_wm_state,
+
+ &gen6_scissor_state,
+
+ &brw_state_base_address,
+
+ &gen6_binding_table_pointers,
+
+ &brw_depthbuffer,
+
+ &brw_polygon_stipple,
+ &brw_polygon_stipple_offset,
+
+ &brw_line_stipple,
+ &brw_aa_line_parameters,
+
+ &brw_drawing_rect,
+
+ &brw_indices,
+ &brw_index_buffer,
+ &brw_vertices,
+};
void brw_init_state( struct brw_context *brw )
{
@@ -208,7 +266,6 @@ static struct dirty_bit_map brw_bits[] = {
DEFINE_BIT(BRW_NEW_CONTEXT),
DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
DEFINE_BIT(BRW_NEW_PSP),
- DEFINE_BIT(BRW_NEW_FENCE),
DEFINE_BIT(BRW_NEW_INDICES),
DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
DEFINE_BIT(BRW_NEW_VERTICES),
@@ -218,6 +275,7 @@ static struct dirty_bit_map brw_bits[] = {
};
static struct dirty_bit_map cache_bits[] = {
+ DEFINE_BIT(CACHE_NEW_BLEND_STATE),
DEFINE_BIT(CACHE_NEW_CC_VP),
DEFINE_BIT(CACHE_NEW_CC_UNIT),
DEFINE_BIT(CACHE_NEW_WM_PROG),
@@ -277,6 +335,8 @@ void brw_validate_state( struct brw_context *brw )
struct intel_context *intel = &brw->intel;
struct brw_state_flags *state = &brw->state.dirty;
GLuint i;
+ const struct brw_tracked_state **atoms;
+ int num_atoms;
brw_clear_validated_bos(brw);
@@ -285,6 +345,14 @@ void brw_validate_state( struct brw_context *brw )
brw_add_validated_bo(brw, intel->batch->buf);
+ if (IS_GEN6(intel->intelScreen->deviceID)) {
+ atoms = gen6_atoms;
+ num_atoms = ARRAY_SIZE(gen6_atoms);
+ } else {
+ atoms = gen4_atoms;
+ num_atoms = ARRAY_SIZE(gen4_atoms);
+ }
+
if (brw->emit_state_always) {
state->mesa |= ~0;
state->brw |= ~0;
@@ -312,7 +380,7 @@ void brw_validate_state( struct brw_context *brw )
brw->intel.Fallback = GL_FALSE; /* boolean, not bitfield */
/* do prepare stage for all atoms */
- for (i = 0; i < Elements(atoms); i++) {
+ for (i = 0; i < num_atoms; i++) {
const struct brw_tracked_state *atom = atoms[i];
if (brw->intel.Fallback)
@@ -344,9 +412,20 @@ void brw_validate_state( struct brw_context *brw )
void brw_upload_state(struct brw_context *brw)
{
+ struct intel_context *intel = &brw->intel;
struct brw_state_flags *state = &brw->state.dirty;
int i;
static int dirty_count = 0;
+ const struct brw_tracked_state **atoms;
+ int num_atoms;
+
+ if (IS_GEN6(intel->intelScreen->deviceID)) {
+ atoms = gen6_atoms;
+ num_atoms = ARRAY_SIZE(gen6_atoms);
+ } else {
+ atoms = gen4_atoms;
+ num_atoms = ARRAY_SIZE(gen4_atoms);
+ }
brw_clear_validated_bos(brw);
@@ -356,10 +435,10 @@ void brw_upload_state(struct brw_context *brw)
* state atoms are ordered correctly in the list.
*/
struct brw_state_flags examined, prev;
- _mesa_memset(&examined, 0, sizeof(examined));
+ memset(&examined, 0, sizeof(examined));
prev = *state;
- for (i = 0; i < Elements(atoms); i++) {
+ for (i = 0; i < num_atoms; i++) {
const struct brw_tracked_state *atom = atoms[i];
struct brw_state_flags generated;
@@ -388,7 +467,7 @@ void brw_upload_state(struct brw_context *brw)
}
}
else {
- for (i = 0; i < Elements(atoms); i++) {
+ for (i = 0; i < num_atoms; i++) {
const struct brw_tracked_state *atom = atoms[i];
if (brw->intel.Fallback)
diff --git a/i965/brw_structs.h b/i965/brw_structs.h
index 66d4127..3c2adfc 100644
--- a/i965/brw_structs.h
+++ b/i965/brw_structs.h
@@ -658,7 +658,105 @@ struct brw_clip_unit_state
GLfloat viewport_ymax;
};
+struct gen6_blend_state
+{
+ struct {
+ GLuint dest_blend_factor:5;
+ GLuint source_blend_factor:5;
+ GLuint pad3:1;
+ GLuint blend_func:3;
+ GLuint pad2:1;
+ GLuint ia_dest_blend_factor:5;
+ GLuint ia_source_blend_factor:5;
+ GLuint pad1:1;
+ GLuint ia_blend_func:3;
+ GLuint pad0:1;
+ GLuint ia_blend_enable:1;
+ GLuint blend_enable:1;
+ } blend0;
+
+ struct {
+ GLuint post_blend_clamp_enable:1;
+ GLuint pre_blend_clamp_enable:1;
+ GLuint clamp_range:2;
+ GLuint pad0:4;
+ GLuint x_dither_offset:2;
+ GLuint y_dither_offset:2;
+ GLuint dither_enable:1;
+ GLuint alpha_test_func:3;
+ GLuint alpha_test_enable:1;
+ GLuint pad1:1;
+ GLuint logic_op_func:4;
+ GLuint logic_op_enable:1;
+ GLuint pad2:1;
+ GLuint write_disable_b:1;
+ GLuint write_disable_g:1;
+ GLuint write_disable_r:1;
+ GLuint write_disable_a:1;
+ GLuint pad3:1;
+ GLuint alpha_to_coverage_dither:1;
+ GLuint alpha_to_one:1;
+ GLuint alpha_to_coverage:1;
+ } blend1;
+};
+
+struct gen6_color_calc_state
+{
+ struct {
+ GLuint alpha_test_format:1;
+ GLuint pad0:14;
+ GLuint round_disable:1;
+ GLuint bf_stencil_ref:8;
+ GLuint stencil_ref:8;
+ } cc0;
+ union {
+ GLfloat alpha_ref_f;
+ struct {
+ GLuint ui:8;
+ GLuint pad0:24;
+ } alpha_ref_fi;
+ } cc1;
+
+ GLfloat constant_r;
+ GLfloat constant_g;
+ GLfloat constant_b;
+ GLfloat constant_a;
+};
+
+struct gen6_depth_stencil_state
+{
+ struct {
+ GLuint pad0:3;
+ GLuint bf_stencil_pass_depth_pass_op:3;
+ GLuint bf_stencil_pass_depth_fail_op:3;
+ GLuint bf_stencil_fail_op:3;
+ GLuint bf_stencil_func:3;
+ GLuint bf_stencil_enable:1;
+ GLuint pad1:2;
+ GLuint stencil_write_enable:1;
+ GLuint stencil_pass_depth_pass_op:3;
+ GLuint stencil_pass_depth_fail_op:3;
+ GLuint stencil_fail_op:3;
+ GLuint stencil_func:3;
+ GLuint stencil_enable:1;
+ } ds0;
+
+ struct {
+ GLuint bf_stencil_write_mask:8;
+ GLuint bf_stencil_test_mask:8;
+ GLuint stencil_write_mask:8;
+ GLuint stencil_test_mask:8;
+ } ds1;
+
+ struct {
+ GLuint pad0:25;
+ GLuint depth_write_enable:1;
+ GLuint depth_test_func:3;
+ GLuint pad1:1;
+ GLuint depth_test_enable:1;
+ } ds2;
+};
struct brw_cc_unit_state
{
@@ -752,8 +850,6 @@ struct brw_cc_unit_state
} cc7;
};
-
-
struct brw_sf_unit_state
{
struct thread0 thread0;
@@ -813,6 +909,11 @@ struct brw_sf_unit_state
};
+struct gen6_scissor_state
+{
+ GLuint ymin, xmin;
+ GLuint ymax, xmax;
+};
struct brw_gs_unit_state
{
@@ -1043,6 +1144,15 @@ struct brw_sf_viewport
} scissor;
};
+struct gen6_sf_viewport {
+ GLfloat m00;
+ GLfloat m11;
+ GLfloat m22;
+ GLfloat m30;
+ GLfloat m31;
+ GLfloat m32;
+};
+
/* Documented in the subsystem/shared-functions/sampler chapter...
*/
struct brw_surface_state
diff --git a/i965/brw_tex_layout.c b/i965/brw_tex_layout.c
index e59e52e..09edfd8 100644
--- a/i965/brw_tex_layout.c
+++ b/i965/brw_tex_layout.c
@@ -36,7 +36,6 @@
#include "intel_tex_layout.h"
#include "intel_context.h"
#include "main/macros.h"
-#include "intel_chipset.h"
#define FILE_DEBUG_FLAG DEBUG_MIPTREE
@@ -49,7 +48,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel,
switch (mt->target) {
case GL_TEXTURE_CUBE_MAP:
- if (IS_IGDNG(intel->intelScreen->deviceID)) {
+ if (intel->is_ironlake) {
GLuint align_h = 2, align_w = 4;
GLuint level;
GLuint x = 0;
diff --git a/i965/brw_urb.c b/i965/brw_urb.c
index 8c6f435..4f6b900 100644
--- a/i965/brw_urb.c
+++ b/i965/brw_urb.c
@@ -105,7 +105,8 @@ static GLboolean check_urb_layout( struct brw_context *brw )
brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize;
brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize;
- return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= URB_SIZES(brw);
+ return brw->urb.cs_start + brw->urb.nr_cs_entries *
+ brw->urb.csize <= brw->urb.size;
}
/* Most minimal update, forces re-emit of URB fence packet after GS
@@ -113,6 +114,7 @@ static GLboolean check_urb_layout( struct brw_context *brw )
*/
static void recalculate_urb_fence( struct brw_context *brw )
{
+ struct intel_context *intel = &brw->intel;
GLuint csize = brw->curbe.total_size;
GLuint vsize = brw->vs.prog_data->urb_entry_size;
GLuint sfsize = brw->sf.prog_data->urb_entry_size;
@@ -146,7 +148,7 @@ static void recalculate_urb_fence( struct brw_context *brw )
brw->urb.constrained = 0;
- if (BRW_IS_IGDNG(brw)) {
+ if (intel->is_ironlake) {
brw->urb.nr_vs_entries = 128;
brw->urb.nr_sf_entries = 48;
if (check_urb_layout(brw)) {
@@ -156,7 +158,7 @@ static void recalculate_urb_fence( struct brw_context *brw )
brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
}
- } else if (BRW_IS_G4X(brw)) {
+ } else if (intel->is_g4x) {
brw->urb.nr_vs_entries = 64;
if (check_urb_layout(brw)) {
goto done;
@@ -184,23 +186,23 @@ static void recalculate_urb_fence( struct brw_context *brw )
* entries and the values for minimum nr of entries
* provided above.
*/
- _mesa_printf("couldn't calculate URB layout!\n");
+ printf("couldn't calculate URB layout!\n");
exit(1);
}
if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS))
- _mesa_printf("URB CONSTRAINED\n");
+ printf("URB CONSTRAINED\n");
}
done:
if (INTEL_DEBUG & DEBUG_URB)
- _mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
+ printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
brw->urb.vs_start,
brw->urb.gs_start,
brw->urb.clip_start,
brw->urb.sf_start,
brw->urb.cs_start,
- URB_SIZES(brw));
+ brw->urb.size);
brw->state.dirty.brw |= BRW_NEW_URB_FENCE;
}
@@ -244,7 +246,7 @@ void brw_upload_urb_fence(struct brw_context *brw)
uf.bits0.gs_fence = brw->urb.clip_start;
uf.bits0.clp_fence = brw->urb.sf_start;
uf.bits1.sf_fence = brw->urb.cs_start;
- uf.bits1.cs_fence = URB_SIZES(brw);
+ uf.bits1.cs_fence = brw->urb.size;
BRW_BATCH_STRUCT(brw, &uf);
}
diff --git a/i965/brw_vs.c b/i965/brw_vs.c
index fd055e2..44b085e 100644
--- a/i965/brw_vs.c
+++ b/i965/brw_vs.c
@@ -35,6 +35,7 @@
#include "brw_util.h"
#include "brw_state.h"
#include "shader/prog_print.h"
+#include "shader/prog_parameter.h"
@@ -42,9 +43,11 @@ static void do_vs_prog( struct brw_context *brw,
struct brw_vertex_program *vp,
struct brw_vs_prog_key *key )
{
+ GLcontext *ctx = &brw->intel.ctx;
GLuint program_size;
const GLuint *program;
struct brw_vs_compile c;
+ int aux_size;
memset(&c, 0, sizeof(c));
memcpy(&c.key, key, sizeof(*key));
@@ -73,13 +76,27 @@ static void do_vs_prog( struct brw_context *brw,
*/
program = brw_get_program(&c.func, &program_size);
+ /* We upload from &c.prog_data including the constant_map assuming
+ * they're packed together. It would be nice to have a
+ * compile-time assert macro here.
+ */
+ assert(c.constant_map == (int8_t *)&c.prog_data +
+ sizeof(c.prog_data));
+ assert(ctx->Const.VertexProgram.MaxNativeParameters ==
+ ARRAY_SIZE(c.constant_map));
+
+ aux_size = sizeof(c.prog_data);
+ if (c.vp->use_const_buffer)
+ aux_size += c.vp->program.Base.Parameters->NumParameters;
+
dri_bo_unreference(brw->vs.prog_bo);
- brw->vs.prog_bo = brw_upload_cache( &brw->cache, BRW_VS_PROG,
- &c.key, sizeof(c.key),
- NULL, 0,
- program, program_size,
- &c.prog_data,
- &brw->vs.prog_data );
+ brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG,
+ &c.key, sizeof(c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ aux_size,
+ &brw->vs.prog_data);
}
@@ -109,6 +126,8 @@ static void brw_upload_vs_prog(struct brw_context *brw)
&brw->vs.prog_data);
if (brw->vs.prog_bo == NULL)
do_vs_prog(brw, vp, &key);
+ brw->vs.constant_map = ((int8_t *)brw->vs.prog_data +
+ sizeof(*brw->vs.prog_data));
}
diff --git a/i965/brw_vs.h b/i965/brw_vs.h
index 4a59136..95e0501 100644
--- a/i965/brw_vs.h
+++ b/i965/brw_vs.h
@@ -51,6 +51,7 @@ struct brw_vs_compile {
struct brw_compile func;
struct brw_vs_prog_key key;
struct brw_vs_prog_data prog_data;
+ int8_t constant_map[1024];
struct brw_vertex_program *vp;
@@ -81,6 +82,8 @@ struct brw_vs_compile {
GLint index;
struct brw_reg reg;
} current_const[3];
+
+ GLboolean needs_stack;
};
void brw_vs_emit( struct brw_vs_compile *c );
diff --git a/i965/brw_vs_emit.c b/i965/brw_vs_emit.c
index 27aac8b..a7c4b58 100644
--- a/i965/brw_vs_emit.c
+++ b/i965/brw_vs_emit.c
@@ -67,6 +67,7 @@ static void release_tmps( struct brw_vs_compile *c )
*/
static void brw_vs_alloc_regs( struct brw_vs_compile *c )
{
+ struct intel_context *intel = &c->func.brw->intel;
GLuint i, reg = 0, mrf;
int attributes_in_vue;
@@ -103,9 +104,47 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
/* Vertex program parameters from curbe:
*/
if (c->vp->use_const_buffer) {
- /* get constants from a real constant buffer */
- c->prog_data.curb_read_length = 0;
- c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */
+ int max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries;
+ int constant = 0;
+
+ /* We've got more constants than we can load with the push
+ * mechanism. This is often correlated with reladdr loads where
+ * we should probably be using a pull mechanism anyway to avoid
+ * excessive reading. However, the pull mechanism is slow in
+ * general. So, we try to allocate as many non-reladdr-loaded
+ * constants through the push buffer as we can before giving up.
+ */
+ memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters);
+ for (i = 0;
+ i < c->vp->program.Base.NumInstructions && constant < max_constant;
+ i++) {
+ struct prog_instruction *inst = &c->vp->program.Base.Instructions[i];
+ int arg;
+
+ for (arg = 0; arg < 3 && constant < max_constant; arg++) {
+ if ((inst->SrcReg[arg].File != PROGRAM_STATE_VAR &&
+ inst->SrcReg[arg].File != PROGRAM_CONSTANT &&
+ inst->SrcReg[arg].File != PROGRAM_UNIFORM &&
+ inst->SrcReg[arg].File != PROGRAM_ENV_PARAM &&
+ inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) ||
+ inst->SrcReg[arg].RelAddr)
+ continue;
+
+ if (c->constant_map[inst->SrcReg[arg].Index] == -1) {
+ c->constant_map[inst->SrcReg[arg].Index] = constant++;
+ }
+ }
+ }
+
+ for (i = 0; i < constant; i++) {
+ c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2,
+ (i%2) * 4),
+ 0, 4, 1);
+ }
+ reg += (constant + 1) / 2;
+ c->prog_data.curb_read_length = reg - 1;
+ /* XXX 0 causes a bug elsewhere... */
+ c->prog_data.nr_params = MAX2(constant * 4, 4);
}
else {
/* use a section of the GRF for constants */
@@ -141,10 +180,12 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
c->first_output = reg;
c->first_overflow_output = 0;
- if (BRW_IS_IGDNG(c->func.brw))
- mrf = 8;
+ if (intel->gen >= 6)
+ mrf = 6;
+ else if (intel->is_ironlake)
+ mrf = 8;
else
- mrf = 4;
+ mrf = 4;
for (i = 0; i < VERT_RESULT_MAX; i++) {
if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) {
@@ -213,8 +254,10 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
}
}
- c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
- reg += 2;
+ if (c->needs_stack) {
+ c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
+ reg += 2;
+ }
/* Some opcodes need an internal temporary:
*/
@@ -238,17 +281,19 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
*/
attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs);
- if (BRW_IS_IGDNG(c->func.brw))
- c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
+ if (intel->gen >= 6)
+ c->prog_data.urb_entry_size = (attributes_in_vue + 4 + 7) / 8;
+ else if (intel->is_ironlake)
+ c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
else
- c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
+ c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
c->prog_data.total_grf = reg;
if (INTEL_DEBUG & DEBUG_VS) {
- _mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs);
- _mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries);
- _mesa_printf("%s reg = %d\n", __FUNCTION__, reg);
+ printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs);
+ printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries);
+ printf("%s reg = %d\n", __FUNCTION__, reg);
}
}
@@ -438,9 +483,11 @@ static void emit_math1( struct brw_vs_compile *c,
* whether that turns out to be a simulator bug or not:
*/
struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
struct brw_reg tmp = dst;
- GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
- dst.file != BRW_GENERAL_REGISTER_FILE);
+ GLboolean need_tmp = (intel->gen < 6 &&
+ (dst.dw1.bits.writemask != 0xf ||
+ dst.file != BRW_GENERAL_REGISTER_FILE));
if (need_tmp)
tmp = get_tmp(c);
@@ -469,9 +516,11 @@ static void emit_math2( struct brw_vs_compile *c,
GLuint precision)
{
struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
struct brw_reg tmp = dst;
- GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
- dst.file != BRW_GENERAL_REGISTER_FILE);
+ GLboolean need_tmp = (intel->gen < 6 &&
+ (dst.dw1.bits.writemask != 0xf ||
+ dst.file != BRW_GENERAL_REGISTER_FILE));
if (need_tmp)
tmp = get_tmp(c);
@@ -761,15 +810,14 @@ get_constant(struct brw_vs_compile *c,
{
const struct prog_src_register *src = &inst->SrcReg[argIndex];
struct brw_compile *p = &c->func;
- struct brw_reg const_reg;
- struct brw_reg const2_reg;
- const GLboolean relAddr = src->RelAddr;
+ struct brw_reg const_reg = c->current_const[argIndex].reg;
assert(argIndex < 3);
- if (c->current_const[argIndex].index != src->Index || relAddr) {
+ if (c->current_const[argIndex].index != src->Index) {
struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
+ /* Keep track of the last constant loaded in this slot, for reuse. */
c->current_const[argIndex].index = src->Index;
#if 0
@@ -778,48 +826,74 @@ get_constant(struct brw_vs_compile *c,
#endif
/* need to fetch the constant now */
brw_dp_READ_4_vs(p,
- c->current_const[argIndex].reg,/* writeback dest */
+ const_reg, /* writeback dest */
0, /* oword */
- relAddr, /* relative indexing? */
+ 0, /* relative indexing? */
addrReg, /* address register */
16 * src->Index, /* byte offset */
SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
);
-
- if (relAddr) {
- /* second read */
- const2_reg = get_tmp(c);
-
- /* use upper half of address reg for second read */
- addrReg = stride(addrReg, 0, 4, 0);
- addrReg.subnr = 16;
-
- brw_dp_READ_4_vs(p,
- const2_reg, /* writeback dest */
- 1, /* oword */
- relAddr, /* relative indexing? */
- addrReg, /* address register */
- 16 * src->Index, /* byte offset */
- SURF_INDEX_VERT_CONST_BUFFER
- );
- }
}
- const_reg = c->current_const[argIndex].reg;
+ /* replicate lower four floats into upper half (to get XYZWXYZW) */
+ const_reg = stride(const_reg, 0, 4, 0);
+ const_reg.subnr = 0;
- if (relAddr) {
- /* merge the two Owords into the constant register */
- /* const_reg[7..4] = const2_reg[7..4] */
- brw_MOV(p,
- suboffset(stride(const_reg, 0, 4, 1), 4),
- suboffset(stride(const2_reg, 0, 4, 1), 4));
- release_tmp(c, const2_reg);
- }
- else {
- /* replicate lower four floats into upper half (to get XYZWXYZW) */
- const_reg = stride(const_reg, 0, 4, 0);
- const_reg.subnr = 0;
- }
+ return const_reg;
+}
+
+static struct brw_reg
+get_reladdr_constant(struct brw_vs_compile *c,
+ const struct prog_instruction *inst,
+ GLuint argIndex)
+{
+ const struct prog_src_register *src = &inst->SrcReg[argIndex];
+ struct brw_compile *p = &c->func;
+ struct brw_reg const_reg = c->current_const[argIndex].reg;
+ struct brw_reg const2_reg;
+ struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
+
+ assert(argIndex < 3);
+
+ /* Can't reuse a reladdr constant load. */
+ c->current_const[argIndex].index = -1;
+
+ #if 0
+ printf(" fetch const[a0.x+%d] for arg %d into reg %d\n",
+ src->Index, argIndex, c->current_const[argIndex].reg.nr);
+#endif
+
+ /* fetch the first vec4 */
+ brw_dp_READ_4_vs(p,
+ const_reg, /* writeback dest */
+ 0, /* oword */
+ 1, /* relative indexing? */
+ addrReg, /* address register */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
+ );
+ /* second vec4 */
+ const2_reg = get_tmp(c);
+
+ /* use upper half of address reg for second read */
+ addrReg = stride(addrReg, 0, 4, 0);
+ addrReg.subnr = 16;
+
+ brw_dp_READ_4_vs(p,
+ const2_reg, /* writeback dest */
+ 1, /* oword */
+ 1, /* relative indexing? */
+ addrReg, /* address register */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_VERT_CONST_BUFFER
+ );
+
+ /* merge the two Owords into the constant register */
+ /* const_reg[7..4] = const2_reg[7..4] */
+ brw_MOV(p,
+ suboffset(stride(const_reg, 0, 4, 1), 4),
+ suboffset(stride(const2_reg, 0, 4, 1), 4));
+ release_tmp(c, const2_reg);
return const_reg;
}
@@ -927,7 +1001,13 @@ get_src_reg( struct brw_vs_compile *c,
case PROGRAM_ENV_PARAM:
case PROGRAM_LOCAL_PARAM:
if (c->vp->use_const_buffer) {
- return get_constant(c, inst, argIndex);
+ if (!relAddr && c->constant_map[index] != -1) {
+ assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0);
+ return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]];
+ } else if (relAddr)
+ return get_reladdr_constant(c, inst, argIndex);
+ else
+ return get_constant(c, inst, argIndex);
}
else if (relAddr) {
return deref(c, c->regs[PROGRAM_STATE_VAR][0], index);
@@ -1113,11 +1193,13 @@ static void emit_swz( struct brw_vs_compile *c,
static void emit_vertex_write( struct brw_vs_compile *c)
{
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
struct brw_reg m0 = brw_message_reg(0);
struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS];
struct brw_reg ndc;
int eot;
- GLuint len_vertext_header = 2;
+ GLuint len_vertex_header = 2;
if (c->key.copy_edgeflag) {
brw_MOV(p,
@@ -1125,18 +1207,20 @@ static void emit_vertex_write( struct brw_vs_compile *c)
get_reg(c, PROGRAM_INPUT, VERT_ATTRIB_EDGEFLAG));
}
- /* Build ndc coords */
- ndc = get_tmp(c);
- /* ndc = 1.0 / pos.w */
- emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
- /* ndc.xyz = pos * ndc */
- brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc);
+ if (intel->gen < 6) {
+ /* Build ndc coords */
+ ndc = get_tmp(c);
+ /* ndc = 1.0 / pos.w */
+ emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
+ /* ndc.xyz = pos * ndc */
+ brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc);
+ }
/* Update the header for point size, user clipping flags, and -ve rhw
* workaround.
*/
if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
- c->key.nr_userclip || BRW_IS_965(p->brw))
+ c->key.nr_userclip || brw->has_negative_rhw_bug)
{
struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
GLuint i;
@@ -1167,7 +1251,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
* Later, clipping will detect ucp[6] and ensure the primitive is
* clipped against all fixed planes.
*/
- if (BRW_IS_965(p->brw)) {
+ if (brw->has_negative_rhw_bug) {
brw_CMP(p,
vec8(brw_null_reg()),
BRW_CONDITIONAL_L,
@@ -1193,21 +1277,41 @@ static void emit_vertex_write( struct brw_vs_compile *c)
* of zeros followed by two sets of NDC coordinates:
*/
brw_set_access_mode(p, BRW_ALIGN_1);
- brw_MOV(p, offset(m0, 2), ndc);
-
- if (BRW_IS_IGDNG(p->brw)) {
- /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */
- brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */
- /* m4, m5 contain the distances from vertex to the user clip planeXXX.
- * Seems it is useless for us.
- * m6 is used for aligning, so that the remainder of vertex element is
- * reg-aligned.
- */
- brw_MOV(p, offset(m0, 7), pos); /* the remainder of vertex element */
- len_vertext_header = 6;
+
+ if (intel->gen >= 6) {
+ /* There are 16 DWs (D0-D15) in VUE header on Sandybridge:
+ * dword 0-3 (m1) of the header is indices, point width, clip flags.
+ * dword 4-7 (m2) is the 4D space position
+ * dword 8-15 (m3,m4) of the vertex header is the user clip distance.
+ * m5 is the first vertex data we fill, which is the vertex position.
+ */
+ brw_MOV(p, offset(m0, 2), pos);
+ brw_MOV(p, offset(m0, 5), pos);
+ len_vertex_header = 4;
+ } else if (intel->is_ironlake) {
+ /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
+ * dword 0-3 (m1) of the header is indices, point width, clip flags.
+ * dword 4-7 (m2) is the ndc position (set above)
+ * dword 8-11 (m3) of the vertex header is the 4D space position
+ * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
+ * m6 is a pad so that the vertex element data is aligned
+ * m7 is the first vertex data we fill, which is the vertex position.
+ */
+ brw_MOV(p, offset(m0, 2), ndc);
+ brw_MOV(p, offset(m0, 3), pos);
+ brw_MOV(p, offset(m0, 7), pos);
+ len_vertex_header = 6;
} else {
- brw_MOV(p, offset(m0, 3), pos);
- len_vertext_header = 2;
+ /* There are 8 dwords in VUE header pre-Ironlake:
+ * dword 0-3 (m1) is indices, point width, clip flags.
+ * dword 4-7 (m2) is ndc position (set above)
+ *
+ * dword 8-11 (m3) is the first vertex data, which we always have be the
+ * vertex position.
+ */
+ brw_MOV(p, offset(m0, 2), ndc);
+ brw_MOV(p, offset(m0, 3), pos);
+ len_vertex_header = 2;
}
eot = (c->first_overflow_output == 0);
@@ -1218,7 +1322,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
c->r0, /* src */
0, /* allocate */
1, /* used */
- MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */
+ MIN2(c->nr_outputs + 1 + len_vertex_header, (BRW_MAX_MRF-1)), /* msg len */
0, /* response len */
eot, /* eot */
eot, /* writes complete */
@@ -1359,29 +1463,32 @@ void brw_vs_emit(struct brw_vs_compile *c )
#define MAX_LOOP_DEPTH 32
struct brw_compile *p = &c->func;
struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
const GLuint nr_insns = c->vp->program.Base.NumInstructions;
GLuint insn, if_depth = 0, loop_depth = 0;
GLuint end_offset = 0;
struct brw_instruction *end_inst, *last_inst;
- struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
+ struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH] = { 0 };
const struct brw_indirect stack_index = brw_indirect(0, 0);
GLuint index;
GLuint file;
if (INTEL_DEBUG & DEBUG_VS) {
- _mesa_printf("vs-mesa:\n");
+ printf("vs-mesa:\n");
_mesa_print_program(&c->vp->program.Base);
- _mesa_printf("\n");
+ printf("\n");
}
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_access_mode(p, BRW_ALIGN_16);
-
- /* Message registers can't be read, so copy the output into GRF register
- if they are used in source registers */
+
for (insn = 0; insn < nr_insns; insn++) {
GLuint i;
struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
+
+ /* Message registers can't be read, so copy the output into GRF
+ * register if they are used in source registers
+ */
for (i = 0; i < 3; i++) {
struct prog_src_register *src = &inst->SrcReg[i];
GLuint index = src->Index;
@@ -1389,12 +1496,23 @@ void brw_vs_emit(struct brw_vs_compile *c )
if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS)
c->output_regs[index].used_in_src = GL_TRUE;
}
+
+ switch (inst->Opcode) {
+ case OPCODE_CAL:
+ case OPCODE_RET:
+ c->needs_stack = GL_TRUE;
+ break;
+ default:
+ break;
+ }
}
/* Static register allocation
*/
brw_vs_alloc_regs(c);
- brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
+
+ if (c->needs_stack)
+ brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
for (insn = 0; insn < nr_insns; insn++) {
@@ -1592,7 +1710,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
loop_depth--;
- if (BRW_IS_IGDNG(brw))
+ if (intel->is_ironlake)
br = 2;
inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
@@ -1708,9 +1826,9 @@ void brw_vs_emit(struct brw_vs_compile *c )
if (INTEL_DEBUG & DEBUG_VS) {
int i;
- _mesa_printf("vs-native:\n");
+ printf("vs-native:\n");
for (i = 0; i < p->nr_insn; i++)
brw_disasm(stderr, &p->store[i]);
- _mesa_printf("\n");
+ printf("\n");
}
}
diff --git a/i965/brw_vs_state.c b/i965/brw_vs_state.c
index 7285466..fd9f2fe 100644
--- a/i965/brw_vs_state.c
+++ b/i965/brw_vs_state.c
@@ -82,9 +82,9 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
static dri_bo *
vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
{
+ struct intel_context *intel = &brw->intel;
struct brw_vs_unit_state vs;
dri_bo *bo;
- int chipset_max_threads;
memset(&vs, 0, sizeof(vs));
@@ -98,7 +98,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
*/
vs.thread1.single_program_flow = 0;
- if (BRW_IS_IGDNG(brw))
+ if (intel->is_ironlake)
vs.thread1.binding_table_entry_count = 0; /* hardware requirement */
else
vs.thread1.binding_table_entry_count = key->nr_surfaces;
@@ -109,7 +109,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
vs.thread3.urb_entry_read_offset = 0;
vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
- if (BRW_IS_IGDNG(brw)) {
+ if (intel->is_ironlake) {
switch (key->nr_urb_entries) {
case 8:
case 12:
@@ -135,7 +135,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
case 32:
break;
case 64:
- assert(BRW_IS_G4X(brw));
+ assert(intel->is_g4x);
break;
default:
assert(0);
@@ -145,17 +145,8 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
- if (BRW_IS_IGDNG(brw))
- chipset_max_threads = 72;
- else if (BRW_IS_G4X(brw))
- chipset_max_threads = 32;
- else
- chipset_max_threads = 16;
vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2,
- 1, chipset_max_threads) - 1;
-
- if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
- vs.thread4.max_threads = 0;
+ 1, brw->vs_max_threads) - 1;
/* No samplers for ARB_vp programs:
*/
@@ -173,8 +164,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT,
key, sizeof(*key),
&brw->vs.prog_bo, 1,
- &vs, sizeof(vs),
- NULL, NULL);
+ &vs, sizeof(vs));
/* Emit VS program relocation */
dri_bo_emit_reloc(bo,
diff --git a/i965/brw_vs_surface_state.c b/i965/brw_vs_surface_state.c
index 3bc9840..4007b5a 100644
--- a/i965/brw_vs_surface_state.c
+++ b/i965/brw_vs_surface_state.c
@@ -35,7 +35,6 @@
#include "brw_context.h"
#include "brw_state.h"
-#include "brw_defines.h"
/* Creates a new VS constant buffer reflecting the current VS program's
* constants, if needed by the VS program.
@@ -68,13 +67,13 @@ brw_vs_update_constant_buffer(struct brw_context *brw)
*/
_mesa_load_state_parameters(&brw->intel.ctx, vp->program.Base.Parameters);
- intel_bo_map_gtt_preferred(intel, const_buffer, GL_TRUE);
+ drm_intel_gem_bo_map_gtt(const_buffer);
for (i = 0; i < params->NumParameters; i++) {
memcpy(const_buffer->virtual + i * 4 * sizeof(float),
params->ParameterValues[i],
4 * sizeof(float));
}
- intel_bo_unmap_gtt_preferred(intel, const_buffer);
+ drm_intel_gem_bo_unmap_gtt(const_buffer);
return const_buffer;
}
@@ -105,7 +104,7 @@ brw_update_vs_constant_surface( GLcontext *ctx,
/* If there's no constant buffer, then no surface BO is needed to point at
* it.
*/
- if (vp->const_buffer == 0) {
+ if (vp->const_buffer == NULL) {
drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
brw->vs.surf_bo[surf] = NULL;
return;
@@ -133,7 +132,7 @@ brw_update_vs_constant_surface( GLcontext *ctx,
brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
BRW_SS_SURFACE,
&key, sizeof(key),
- &key.bo, key.bo ? 1 : 0,
+ &key.bo, 1,
NULL);
if (brw->vs.surf_bo[surf] == NULL) {
brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key);
@@ -156,7 +155,7 @@ brw_vs_get_binding_table(struct brw_context *brw)
if (bind_bo == NULL) {
GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint);
- uint32_t *data = malloc(data_size);
+ uint32_t data[BRW_VS_MAX_SURF];
int i;
for (i = 0; i < BRW_VS_MAX_SURF; i++)
@@ -168,8 +167,7 @@ brw_vs_get_binding_table(struct brw_context *brw)
bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->vs.surf_bo, BRW_VS_MAX_SURF,
- data, data_size,
- NULL, NULL);
+ data, data_size);
/* Emit binding table relocations to surface state */
for (i = 0; i < BRW_VS_MAX_SURF; i++) {
@@ -182,8 +180,6 @@ brw_vs_get_binding_table(struct brw_context *brw)
I915_GEM_DOMAIN_INSTRUCTION, 0);
}
}
-
- free(data);
}
return bind_bo;
diff --git a/i965/brw_vtbl.c b/i965/brw_vtbl.c
index 34aaea3..96a44bf 100644
--- a/i965/brw_vtbl.c
+++ b/i965/brw_vtbl.c
@@ -44,7 +44,6 @@
#include "brw_state.h"
#include "brw_draw.h"
#include "brw_state.h"
-#include "brw_fallback.h"
#include "brw_vs.h"
#include "brw_wm.h"
@@ -68,11 +67,11 @@ static void brw_destroy_context( struct intel_context *intel )
brw_draw_destroy( brw );
brw_clear_validated_bos(brw);
if (brw->wm.compile_data) {
- _mesa_free(brw->wm.compile_data->instruction);
- _mesa_free(brw->wm.compile_data->vreg);
- _mesa_free(brw->wm.compile_data->refs);
- _mesa_free(brw->wm.compile_data->prog_instructions);
- _mesa_free(brw->wm.compile_data);
+ free(brw->wm.compile_data->instruction);
+ free(brw->wm.compile_data->vreg);
+ free(brw->wm.compile_data->refs);
+ free(brw->wm.compile_data->prog_instructions);
+ free(brw->wm.compile_data);
}
for (i = 0; i < brw->state.nr_color_regions; i++)
@@ -103,6 +102,9 @@ static void brw_destroy_context( struct intel_context *intel )
dri_bo_release(&brw->cc.prog_bo);
dri_bo_release(&brw->cc.state_bo);
dri_bo_release(&brw->cc.vp_bo);
+ dri_bo_release(&brw->cc.blend_state_bo);
+ dri_bo_release(&brw->cc.depth_stencil_state_bo);
+ dri_bo_release(&brw->cc.color_calc_state_bo);
}
@@ -140,6 +142,12 @@ static void brw_finish_batch(struct intel_context *intel)
{
struct brw_context *brw = brw_context(&intel->ctx);
brw_emit_query_end(brw);
+
+ if (brw->curbe.curbe_bo) {
+ drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo);
+ drm_intel_bo_unreference(brw->curbe.curbe_bo);
+ brw->curbe.curbe_bo = NULL;
+ }
}
@@ -150,11 +158,6 @@ static void brw_new_batch( struct intel_context *intel )
{
struct brw_context *brw = brw_context(&intel->ctx);
- /* Check that we didn't just wrap our batchbuffer at a bad time. */
- assert(!brw->no_batch_wrap);
-
- brw->curbe.need_new_bo = GL_TRUE;
-
/* Mark all context state as needing to be re-emitted.
* This is probably not as severe as on 915, since almost all of our state
* is just in referenced buffers.
@@ -175,12 +178,6 @@ static void brw_new_batch( struct intel_context *intel )
}
}
-
-static void brw_note_fence( struct intel_context *intel, GLuint fence )
-{
- brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE;
-}
-
static void brw_invalidate_state( struct intel_context *intel, GLuint new_state )
{
/* nothing */
@@ -196,7 +193,6 @@ void brwInitVtbl( struct brw_context *brw )
brw->intel.vtbl.update_texture_state = 0;
brw->intel.vtbl.invalidate_state = brw_invalidate_state;
- brw->intel.vtbl.note_fence = brw_note_fence;
brw->intel.vtbl.new_batch = brw_new_batch;
brw->intel.vtbl.finish_batch = brw_finish_batch;
brw->intel.vtbl.destroy = brw_destroy_context;
diff --git a/i965/brw_wm.c b/i965/brw_wm.c
index 6895f64..991e1b9 100644
--- a/i965/brw_wm.c
+++ b/i965/brw_wm.c
@@ -30,7 +30,6 @@
*/
#include "brw_context.h"
-#include "brw_util.h"
#include "brw_wm.h"
#include "brw_state.h"
@@ -152,11 +151,11 @@ static void do_wm_prog( struct brw_context *brw,
*/
return;
}
- c->instruction = _mesa_calloc(BRW_WM_MAX_INSN * sizeof(*c->instruction));
- c->prog_instructions = _mesa_calloc(BRW_WM_MAX_INSN *
+ c->instruction = calloc(1, BRW_WM_MAX_INSN * sizeof(*c->instruction));
+ c->prog_instructions = calloc(1, BRW_WM_MAX_INSN *
sizeof(*c->prog_instructions));
- c->vreg = _mesa_calloc(BRW_WM_MAX_VREG * sizeof(*c->vreg));
- c->refs = _mesa_calloc(BRW_WM_MAX_REF * sizeof(*c->refs));
+ c->vreg = calloc(1, BRW_WM_MAX_VREG * sizeof(*c->vreg));
+ c->refs = calloc(1, BRW_WM_MAX_REF * sizeof(*c->refs));
} else {
void *instruction = c->instruction;
void *prog_instructions = c->prog_instructions;
@@ -199,12 +198,13 @@ static void do_wm_prog( struct brw_context *brw,
program = brw_get_program(&c->func, &program_size);
dri_bo_unreference(brw->wm.prog_bo);
- brw->wm.prog_bo = brw_upload_cache( &brw->cache, BRW_WM_PROG,
- &c->key, sizeof(c->key),
- NULL, 0,
- program, program_size,
- &c->prog_data,
- &brw->wm.prog_data );
+ brw->wm.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_WM_PROG,
+ &c->key, sizeof(c->key),
+ NULL, 0,
+ program, program_size,
+ &c->prog_data,
+ sizeof(c->prog_data),
+ &brw->wm.prog_data);
}
@@ -336,11 +336,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
* drawable height in order to invert the Y axis.
*/
if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) {
- if (brw->intel.driDrawable != NULL) {
- key->origin_x = brw->intel.driDrawable->x;
- key->origin_y = brw->intel.driDrawable->y;
- key->drawable_height = brw->intel.driDrawable->h;
- }
+ key->drawable_height = ctx->DrawBuffer->Height;
}
key->nr_color_regions = brw->state.nr_color_regions;
diff --git a/i965/brw_wm.h b/i965/brw_wm.h
index 9dcb6e1..88d84ee 100644
--- a/i965/brw_wm.h
+++ b/i965/brw_wm.h
@@ -76,10 +76,9 @@ struct brw_wm_prog_key {
GLushort tex_swizzles[BRW_MAX_TEX_UNIT];
- GLuint program_string_id:32;
- GLushort origin_x, origin_y;
GLushort drawable_height;
GLbitfield64 vp_outputs_written;
+ GLuint program_string_id:32;
};
diff --git a/i965/brw_wm_debug.c b/i965/brw_wm_debug.c
index 2208210..a78cc8b 100644
--- a/i965/brw_wm_debug.c
+++ b/i965/brw_wm_debug.c
@@ -41,21 +41,21 @@ void brw_wm_print_value( struct brw_wm_compile *c,
if (c->state >= PASS2_DONE)
brw_print_reg(value->hw_reg);
else if( value == &c->undef_value )
- _mesa_printf("undef");
+ printf("undef");
else if( value - c->vreg >= 0 &&
value - c->vreg < BRW_WM_MAX_VREG)
- _mesa_printf("r%d", value - c->vreg);
+ printf("r%d", value - c->vreg);
else if (value - c->creg >= 0 &&
value - c->creg < BRW_WM_MAX_PARAM)
- _mesa_printf("c%d", value - c->creg);
+ printf("c%d", value - c->creg);
else if (value - c->payload.input_interp >= 0 &&
value - c->payload.input_interp < FRAG_ATTRIB_MAX)
- _mesa_printf("i%d", value - c->payload.input_interp);
+ printf("i%d", value - c->payload.input_interp);
else if (value - c->payload.depth >= 0 &&
value - c->payload.depth < FRAG_ATTRIB_MAX)
- _mesa_printf("d%d", value - c->payload.depth);
+ printf("d%d", value - c->payload.depth);
else
- _mesa_printf("?");
+ printf("?");
}
void brw_wm_print_ref( struct brw_wm_compile *c,
@@ -64,16 +64,16 @@ void brw_wm_print_ref( struct brw_wm_compile *c,
struct brw_reg hw_reg = ref->hw_reg;
if (ref->unspill_reg)
- _mesa_printf("UNSPILL(%x)/", ref->value->spill_slot);
+ printf("UNSPILL(%x)/", ref->value->spill_slot);
if (c->state >= PASS2_DONE)
brw_print_reg(ref->hw_reg);
else {
- _mesa_printf("%s", hw_reg.negate ? "-" : "");
- _mesa_printf("%s", hw_reg.abs ? "abs/" : "");
+ printf("%s", hw_reg.negate ? "-" : "");
+ printf("%s", hw_reg.abs ? "abs/" : "");
brw_wm_print_value(c, ref->value);
if ((hw_reg.nr&1) || hw_reg.subnr) {
- _mesa_printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr);
+ printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr);
}
}
}
@@ -84,22 +84,22 @@ void brw_wm_print_insn( struct brw_wm_compile *c,
GLuint i, arg;
GLuint nr_args = brw_wm_nr_args(inst->opcode);
- _mesa_printf("[");
+ printf("[");
for (i = 0; i < 4; i++) {
if (inst->dst[i]) {
brw_wm_print_value(c, inst->dst[i]);
if (inst->dst[i]->spill_slot)
- _mesa_printf("/SPILL(%x)",inst->dst[i]->spill_slot);
+ printf("/SPILL(%x)",inst->dst[i]->spill_slot);
}
else
- _mesa_printf("#");
+ printf("#");
if (i < 3)
- _mesa_printf(",");
+ printf(",");
}
- _mesa_printf("]");
+ printf("]");
if (inst->writemask != WRITEMASK_XYZW)
- _mesa_printf(".%s%s%s%s",
+ printf(".%s%s%s%s",
GET_BIT(inst->writemask, 0) ? "x" : "",
GET_BIT(inst->writemask, 1) ? "y" : "",
GET_BIT(inst->writemask, 2) ? "z" : "",
@@ -107,58 +107,58 @@ void brw_wm_print_insn( struct brw_wm_compile *c,
switch (inst->opcode) {
case WM_PIXELXY:
- _mesa_printf(" = PIXELXY");
+ printf(" = PIXELXY");
break;
case WM_DELTAXY:
- _mesa_printf(" = DELTAXY");
+ printf(" = DELTAXY");
break;
case WM_PIXELW:
- _mesa_printf(" = PIXELW");
+ printf(" = PIXELW");
break;
case WM_WPOSXY:
- _mesa_printf(" = WPOSXY");
+ printf(" = WPOSXY");
break;
case WM_PINTERP:
- _mesa_printf(" = PINTERP");
+ printf(" = PINTERP");
break;
case WM_LINTERP:
- _mesa_printf(" = LINTERP");
+ printf(" = LINTERP");
break;
case WM_CINTERP:
- _mesa_printf(" = CINTERP");
+ printf(" = CINTERP");
break;
case WM_FB_WRITE:
- _mesa_printf(" = FB_WRITE");
+ printf(" = FB_WRITE");
break;
case WM_FRONTFACING:
- _mesa_printf(" = FRONTFACING");
+ printf(" = FRONTFACING");
break;
default:
- _mesa_printf(" = %s", _mesa_opcode_string(inst->opcode));
+ printf(" = %s", _mesa_opcode_string(inst->opcode));
break;
}
if (inst->saturate)
- _mesa_printf("_SAT");
+ printf("_SAT");
for (arg = 0; arg < nr_args; arg++) {
- _mesa_printf(" [");
+ printf(" [");
for (i = 0; i < 4; i++) {
if (inst->src[arg][i]) {
brw_wm_print_ref(c, inst->src[arg][i]);
}
else
- _mesa_printf("%%");
+ printf("%%");
if (i < 3)
- _mesa_printf(",");
+ printf(",");
else
- _mesa_printf("]");
+ printf("]");
}
}
- _mesa_printf("\n");
+ printf("\n");
}
void brw_wm_print_program( struct brw_wm_compile *c,
@@ -166,9 +166,9 @@ void brw_wm_print_program( struct brw_wm_compile *c,
{
GLuint insn;
- _mesa_printf("%s:\n", stage);
+ printf("%s:\n", stage);
for (insn = 0; insn < c->nr_insns; insn++)
brw_wm_print_insn(c, &c->instruction[insn]);
- _mesa_printf("\n");
+ printf("\n");
}
diff --git a/i965/brw_wm_emit.c b/i965/brw_wm_emit.c
index 5390fd2..9315bca 100644
--- a/i965/brw_wm_emit.c
+++ b/i965/brw_wm_emit.c
@@ -138,19 +138,43 @@ void emit_wpos_xy(struct brw_wm_compile *c,
* X and Y channels.
*/
if (mask & WRITEMASK_X) {
- /* X' = X - origin */
- brw_ADD(p,
- dst[0],
- retype(arg0[0], BRW_REGISTER_TYPE_W),
- brw_imm_d(0 - c->key.origin_x));
+ if (c->fp->program.PixelCenterInteger) {
+ /* X' = X */
+ brw_MOV(p,
+ dst[0],
+ retype(arg0[0], BRW_REGISTER_TYPE_W));
+ } else {
+ /* X' = X + 0.5 */
+ brw_ADD(p,
+ dst[0],
+ retype(arg0[0], BRW_REGISTER_TYPE_W),
+ brw_imm_f(0.5));
+ }
}
if (mask & WRITEMASK_Y) {
- /* Y' = height - (Y - origin_y) = height + origin_y - Y */
- brw_ADD(p,
- dst[1],
- negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
- brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
+ if (c->fp->program.OriginUpperLeft) {
+ if (c->fp->program.PixelCenterInteger) {
+ /* Y' = Y */
+ brw_MOV(p,
+ dst[1],
+ retype(arg0[1], BRW_REGISTER_TYPE_W));
+ } else {
+ /* Y' = Y + 0.5 */
+ brw_ADD(p,
+ dst[1],
+ retype(arg0[1], BRW_REGISTER_TYPE_W),
+ brw_imm_f(0.5));
+ }
+ } else {
+ float center_offset = c->fp->program.PixelCenterInteger ? 0.0 : 0.5;
+
+ /* Y' = (height - 1) - Y + center */
+ brw_ADD(p,
+ dst[1],
+ negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
+ brw_imm_f(c->key.drawable_height - 1 + center_offset));
+ }
}
}
@@ -692,7 +716,7 @@ void emit_xpd(struct brw_compile *p,
{
GLuint i;
- assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
+ assert((mask & WRITEMASK_W) != WRITEMASK_W);
for (i = 0 ; i < 3; i++) {
if (mask & (1<<i)) {
@@ -830,6 +854,7 @@ void emit_tex(struct brw_wm_compile *c,
GLboolean shadow)
{
struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
struct brw_reg dst_retyped;
GLuint cur_mrf = 2, response_length;
GLuint i, nr_texcoords;
@@ -873,7 +898,7 @@ void emit_tex(struct brw_wm_compile *c,
}
/* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
- if (!BRW_IS_IGDNG(p->brw) && c->dispatch_width == 8)
+ if (!intel->is_ironlake && c->dispatch_width == 8)
nr_texcoords = 3;
/* For shadow comparisons, we have to supply u,v,r. */
@@ -891,7 +916,7 @@ void emit_tex(struct brw_wm_compile *c,
/* Fill in the shadow comparison reference value. */
if (shadow) {
- if (BRW_IS_IGDNG(p->brw)) {
+ if (intel->is_ironlake) {
/* Fill in the cube map array index value. */
brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
cur_mrf += mrf_per_channel;
@@ -904,7 +929,7 @@ void emit_tex(struct brw_wm_compile *c,
cur_mrf += mrf_per_channel;
}
- if (BRW_IS_IGDNG(p->brw)) {
+ if (intel->is_ironlake) {
if (shadow)
msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG;
else
@@ -944,6 +969,7 @@ void emit_txb(struct brw_wm_compile *c,
GLuint sampler)
{
struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
GLuint msgLength;
GLuint msg_type;
GLuint mrf_per_channel;
@@ -955,8 +981,8 @@ void emit_txb(struct brw_wm_compile *c,
* undefined, and trust the execution mask to keep the undefined pixels
* from mattering.
*/
- if (c->dispatch_width == 16 || !BRW_IS_IGDNG(p->brw)) {
- if (BRW_IS_IGDNG(p->brw))
+ if (c->dispatch_width == 16 || !intel->is_ironlake) {
+ if (intel->is_ironlake)
msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
else
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
@@ -1084,7 +1110,7 @@ static void emit_kil_nv( struct brw_wm_compile *c )
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+ brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
brw_pop_insn_state(p);
}
@@ -1174,7 +1200,7 @@ void emit_fb_write(struct brw_wm_compile *c,
brw_push_insn_state(p);
for (channel = 0; channel < 4; channel++) {
- if (c->dispatch_width == 16 && (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) {
+ if (c->dispatch_width == 16 && brw->has_compr4) {
/* By setting the high bit of the MRF register number, we indicate
* that we want COMPR4 mode - instead of doing the usual destination
* + 1 for the second half we get destination + 4.
@@ -1596,10 +1622,10 @@ void brw_wm_emit( struct brw_wm_compile *c )
break;
default:
- _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
- inst->opcode, inst->opcode < MAX_OPCODE ?
- _mesa_opcode_string(inst->opcode) :
- "unknown");
+ printf("Unsupported opcode %i (%s) in fragment shader\n",
+ inst->opcode, inst->opcode < MAX_OPCODE ?
+ _mesa_opcode_string(inst->opcode) :
+ "unknown");
}
for (i = 0; i < 4; i++)
@@ -1612,9 +1638,9 @@ void brw_wm_emit( struct brw_wm_compile *c )
if (INTEL_DEBUG & DEBUG_WM) {
int i;
- _mesa_printf("wm-native:\n");
+ printf("wm-native:\n");
for (i = 0; i < p->nr_insn; i++)
brw_disasm(stderr, &p->store[i]);
- _mesa_printf("\n");
+ printf("\n");
}
}
diff --git a/i965/brw_wm_fp.c b/i965/brw_wm_fp.c
index 7d03179..d73c391 100644
--- a/i965/brw_wm_fp.c
+++ b/i965/brw_wm_fp.c
@@ -138,7 +138,6 @@ static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
reg.CondMask = COND_TR;
reg.CondSwizzle = 0;
reg.CondSrc = 0;
- reg.pad = 0;
return reg;
}
@@ -160,7 +159,7 @@ static struct prog_dst_register get_temp( struct brw_wm_compile *c )
int bit = _mesa_ffs( ~c->fp_temp );
if (!bit) {
- _mesa_printf("%s: out of temporaries\n", __FILE__);
+ printf("%s: out of temporaries\n", __FILE__);
exit(1);
}
@@ -1035,7 +1034,7 @@ static void print_insns( const struct prog_instruction *insn,
{
GLuint i;
for (i = 0; i < nr; i++, insn++) {
- _mesa_printf("%3d: ", i);
+ printf("%3d: ", i);
if (insn->Opcode < MAX_OPCODE)
_mesa_print_instruction(insn);
else if (insn->Opcode < MAX_WM_OPCODE) {
@@ -1046,7 +1045,7 @@ static void print_insns( const struct prog_instruction *insn,
3);
}
else
- _mesa_printf("965 Opcode %d\n", insn->Opcode);
+ printf("965 Opcode %d\n", insn->Opcode);
}
}
@@ -1061,9 +1060,9 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
GLuint insn;
if (INTEL_DEBUG & DEBUG_WM) {
- _mesa_printf("pre-fp:\n");
+ printf("pre-fp:\n");
_mesa_print_program(&fp->program.Base);
- _mesa_printf("\n");
+ printf("\n");
}
c->pixel_xy = src_undef();
@@ -1169,9 +1168,9 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
}
if (INTEL_DEBUG & DEBUG_WM) {
- _mesa_printf("pass_fp:\n");
+ printf("pass_fp:\n");
print_insns( c->prog_instructions, c->nr_fp_insns );
- _mesa_printf("\n");
+ printf("\n");
}
}
diff --git a/i965/brw_wm_glsl.c b/i965/brw_wm_glsl.c
index e8c2cb6..562608e 100644
--- a/i965/brw_wm_glsl.c
+++ b/i965/brw_wm_glsl.c
@@ -743,7 +743,7 @@ static void emit_kil(struct brw_wm_compile *c)
struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+ brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
brw_AND(p, depth, c->emit_mask_reg, depth);
brw_pop_insn_state(p);
}
@@ -1826,6 +1826,7 @@ get_argument_regs(struct brw_wm_compile *c,
static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
{
+ struct intel_context *intel = &brw->intel;
#define MAX_IF_DEPTH 32
#define MAX_LOOP_DEPTH 32
struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
@@ -1848,7 +1849,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
c->cur_inst = i;
#if 0
- _mesa_printf("Inst %d: ", i);
+ printf("Inst %d: ", i);
_mesa_print_instruction(inst);
#endif
@@ -1876,10 +1877,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
else
brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
- dst_flags = inst->DstReg.WriteMask;
- if (inst->SaturateMode == SATURATE_ZERO_ONE)
- dst_flags |= SATURATE;
-
switch (inst->Opcode) {
case WM_PIXELXY:
emit_pixel_xy(c, dst, dst_flags);
@@ -2043,6 +2040,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8);
break;
case OPCODE_ELSE:
+ assert(if_depth > 0);
if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
break;
case OPCODE_ENDIF:
@@ -2096,9 +2094,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
struct brw_instruction *inst0, *inst1;
GLuint br = 1;
- if (BRW_IS_IGDNG(brw))
+ if (intel->is_ironlake)
br = 2;
-
+
+ assert(loop_depth > 0);
loop_depth--;
inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
/* patch all the BREAK/CONT instructions from last BGNLOOP */
@@ -2116,7 +2115,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
}
break;
default:
- _mesa_printf("unsupported IR in fragment shader %d\n",
+ printf("unsupported IR in fragment shader %d\n",
inst->Opcode);
}
@@ -2128,10 +2127,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
post_wm_emit(c);
if (INTEL_DEBUG & DEBUG_WM) {
- _mesa_printf("wm-native:\n");
+ printf("wm-native:\n");
for (i = 0; i < p->nr_insn; i++)
brw_disasm(stderr, &p->store[i]);
- _mesa_printf("\n");
+ printf("\n");
}
}
@@ -2142,7 +2141,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
{
if (INTEL_DEBUG & DEBUG_WM) {
- _mesa_printf("brw_wm_glsl_emit:\n");
+ printf("brw_wm_glsl_emit:\n");
}
/* initial instruction translation/simplification */
diff --git a/i965/brw_wm_pass0.c b/i965/brw_wm_pass0.c
index ff4c082..60bd92e 100644
--- a/i965/brw_wm_pass0.c
+++ b/i965/brw_wm_pass0.c
@@ -105,7 +105,7 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c,
GLuint i = c->prog_data.nr_params++;
if (i >= BRW_WM_MAX_PARAM) {
- _mesa_printf("%s: out of params\n", __FUNCTION__);
+ printf("%s: out of params\n", __FUNCTION__);
c->prog_data.error = 1;
return NULL;
}
@@ -154,7 +154,7 @@ static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c,
return c->constref[i].ref;
}
else {
- _mesa_printf("%s: out of constrefs\n", __FUNCTION__);
+ printf("%s: out of constrefs\n", __FUNCTION__);
c->prog_data.error = 1;
return NULL;
}
diff --git a/i965/brw_wm_sampler_state.c b/i965/brw_wm_sampler_state.c
index aa2e519..d7650af 100644
--- a/i965/brw_wm_sampler_state.c
+++ b/i965/brw_wm_sampler_state.c
@@ -89,7 +89,6 @@ struct wm_sampler_key {
float max_aniso;
GLenum minfilter, magfilter;
GLenum comparemode, comparefunc;
- dri_bo *sdc_bo;
/** If target is cubemap, take context setting.
*/
@@ -105,7 +104,7 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key,
dri_bo *sdc_bo,
struct brw_sampler_state *sampler)
{
- _mesa_memset(sampler, 0, sizeof(*sampler));
+ memset(sampler, 0, sizeof(*sampler));
switch (key->minfilter) {
case GL_NEAREST:
@@ -230,7 +229,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
GLcontext *ctx = &brw->intel.ctx;
int unit;
- memset(key, 0, sizeof(*key));
+ key->sampler_count = 0;
for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
if (ctx->Texture.Unit[unit]._ReallyEnabled) {
@@ -241,6 +240,8 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
struct gl_texture_image *firstImage =
texObj->Image[0][intelObj->firstLevel];
+ memset(entry, 0, sizeof(*entry));
+
entry->tex_target = texObj->Target;
entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP)
@@ -262,10 +263,10 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
dri_bo_unreference(brw->wm.sdc_bo[unit]);
if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
float bordercolor[4] = {
- texObj->BorderColor[0],
- texObj->BorderColor[0],
- texObj->BorderColor[0],
- texObj->BorderColor[0]
+ texObj->BorderColor.f[0],
+ texObj->BorderColor.f[0],
+ texObj->BorderColor.f[0],
+ texObj->BorderColor.f[0]
};
/* GL specs that border color for depth textures is taken from the
* R channel, while the hardware uses A. Spam R into all the
@@ -274,7 +275,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
brw->wm.sdc_bo[unit] = upload_default_color(brw, bordercolor);
} else {
brw->wm.sdc_bo[unit] = upload_default_color(brw,
- texObj->BorderColor);
+ texObj->BorderColor.f);
}
key->sampler_count = unit + 1;
}
@@ -289,7 +290,7 @@ static void upload_wm_samplers( struct brw_context *brw )
{
GLcontext *ctx = &brw->intel.ctx;
struct wm_sampler_key key;
- int i;
+ int i, sampler_key_size;
brw_wm_sampler_populate_key(brw, &key);
@@ -303,8 +304,11 @@ static void upload_wm_samplers( struct brw_context *brw )
if (brw->wm.sampler_count == 0)
return;
+ /* Only include the populated portion of the key in the search. */
+ sampler_key_size = offsetof(struct wm_sampler_key,
+ sampler[key.sampler_count]);
brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER,
- &key, sizeof(key),
+ &key, sampler_key_size,
brw->wm.sdc_bo, key.sampler_count,
NULL);
@@ -324,10 +328,9 @@ static void upload_wm_samplers( struct brw_context *brw )
}
brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER,
- &key, sizeof(key),
+ &key, sampler_key_size,
brw->wm.sdc_bo, key.sampler_count,
- &sampler, sizeof(sampler),
- NULL, NULL);
+ &sampler, sizeof(sampler));
/* Emit SDC relocations */
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
diff --git a/i965/brw_wm_state.c b/i965/brw_wm_state.c
index f89ed9b..a7f80db 100644
--- a/i965/brw_wm_state.c
+++ b/i965/brw_wm_state.c
@@ -49,8 +49,6 @@ struct brw_wm_unit_key {
unsigned int curbe_offset;
unsigned int urb_size;
- unsigned int max_threads;
-
unsigned int nr_surfaces, sampler_count;
GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable;
@@ -67,18 +65,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
memset(key, 0, sizeof(*key));
- if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
- key->max_threads = 1;
- else {
- /* WM maximum threads is number of EUs times number of threads per EU. */
- if (BRW_IS_IGDNG(brw))
- key->max_threads = 12 * 6;
- else if (BRW_IS_G4X(brw))
- key->max_threads = 10 * 5;
- else
- key->max_threads = 8 * 4;
- }
-
/* CACHE_NEW_WM_PROG */
key->total_grf = brw->wm.prog_data->total_grf;
key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
@@ -140,6 +126,7 @@ static dri_bo *
wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
dri_bo **reloc_bufs)
{
+ struct intel_context *intel = &brw->intel;
struct brw_wm_unit_state wm;
dri_bo *bo;
@@ -150,7 +137,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
wm.thread1.depth_coef_urb_read_offset = 1;
wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
- if (BRW_IS_IGDNG(brw))
+ if (intel->is_ironlake)
wm.thread1.binding_table_entry_count = 0; /* hardware requirement */
else
wm.thread1.binding_table_entry_count = key->nr_surfaces;
@@ -170,7 +157,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
- if (BRW_IS_IGDNG(brw))
+ if (intel->is_ironlake)
wm.wm4.sampler_count = 0; /* hardware requirement */
else
wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
@@ -191,7 +178,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
else
wm.wm5.enable_16_pix = 1;
- wm.wm5.max_threads = key->max_threads - 1;
+ wm.wm5.max_threads = brw->wm_max_threads - 1;
wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */
wm.wm5.legacy_line_rast = 0;
wm.wm5.legacy_global_depth_bias = 0;
@@ -223,8 +210,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
key, sizeof(*key),
reloc_bufs, 3,
- &wm, sizeof(wm),
- NULL, NULL);
+ &wm, sizeof(wm));
/* Emit WM program relocation */
dri_bo_emit_reloc(bo,
@@ -268,7 +254,7 @@ static void upload_wm_unit( struct brw_context *brw )
*/
assert(key.total_scratch <= 12 * 1024);
if (key.total_scratch) {
- GLuint total = key.total_scratch * key.max_threads;
+ GLuint total = key.total_scratch * brw->wm_max_threads;
if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) {
dri_bo_unreference(brw->wm.scratch_bo);
diff --git a/i965/brw_wm_surface_state.c b/i965/brw_wm_surface_state.c
index 8335e5a..ce0bf0b 100644
--- a/i965/brw_wm_surface_state.c
+++ b/i965/brw_wm_surface_state.c
@@ -207,33 +207,14 @@ brw_create_texture_surface( struct brw_context *brw,
surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
surf.ss0.surface_type = translate_tex_target(key->target);
- if (key->bo) {
- surf.ss0.surface_format = translate_tex_format(key->format,
- key->internal_format,
- key->depthmode);
- }
- else {
- switch (key->depth) {
- case 32:
- surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
- break;
- default:
- case 24:
- surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
- break;
- case 16:
- surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
- break;
- }
- }
+ surf.ss0.surface_format = translate_tex_format(key->format,
+ key->internal_format,
+ key->depthmode);
/* This is ok for all textures with channel width 8bit or less:
*/
/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
- if (key->bo)
- surf.ss1.base_addr = key->bo->offset; /* reloc */
- else
- surf.ss1.base_addr = key->offset;
+ surf.ss1.base_addr = key->bo->offset; /* reloc */
surf.ss2.mip_count = key->last_level - key->first_level;
surf.ss2.width = key->width - 1;
@@ -255,18 +236,14 @@ brw_create_texture_surface( struct brw_context *brw,
bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
key, sizeof(*key),
- &key->bo, key->bo ? 1 : 0,
- &surf, sizeof(surf),
- NULL, NULL);
-
- if (key->bo) {
- /* Emit relocation to surface contents */
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_SAMPLER, 0,
- 0,
- offsetof(struct brw_surface_state, ss1),
- key->bo);
- }
+ &key->bo, 1,
+ &surf, sizeof(surf));
+
+ /* Emit relocation to surface contents */
+ drm_intel_bo_emit_reloc(bo, offsetof(struct brw_surface_state, ss1),
+ key->bo, 0,
+ I915_GEM_DOMAIN_SAMPLER, 0);
+
return bo;
}
@@ -282,19 +259,12 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
memset(&key, 0, sizeof(key));
- if (intelObj->imageOverride) {
- key.pitch = intelObj->pitchOverride / intelObj->mt->cpp;
- key.depth = intelObj->depthOverride;
- key.bo = NULL;
- key.offset = intelObj->textureOffset;
- } else {
- key.format = firstImage->TexFormat;
- key.internal_format = firstImage->InternalFormat;
- key.pitch = intelObj->mt->pitch;
- key.depth = firstImage->Depth;
- key.bo = intelObj->mt->region->buffer;
- key.offset = 0;
- }
+ key.format = firstImage->TexFormat;
+ key.internal_format = firstImage->InternalFormat;
+ key.pitch = intelObj->mt->pitch;
+ key.depth = firstImage->Depth;
+ key.bo = intelObj->mt->region->buffer;
+ key.offset = 0;
key.target = tObj->Target;
key.depthmode = tObj->DepthMode;
@@ -309,7 +279,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
BRW_SS_SURFACE,
&key, sizeof(key),
- &key.bo, key.bo ? 1 : 0,
+ &key.bo, 1,
NULL);
if (brw->wm.surf_bo[surf] == NULL) {
brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key);
@@ -337,10 +307,7 @@ brw_create_constant_surface( struct brw_context *brw,
surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
assert(key->bo);
- if (key->bo)
- surf.ss1.base_addr = key->bo->offset; /* reloc */
- else
- surf.ss1.base_addr = key->offset;
+ surf.ss1.base_addr = key->bo->offset; /* reloc */
surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */
surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */
@@ -350,21 +317,16 @@ brw_create_constant_surface( struct brw_context *brw,
bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
key, sizeof(*key),
- &key->bo, key->bo ? 1 : 0,
- &surf, sizeof(surf),
- NULL, NULL);
-
- if (key->bo) {
- /* Emit relocation to surface contents. Section 5.1.1 of the gen4
- * bspec ("Data Cache") says that the data cache does not exist as
- * a separate cache and is just the sampler cache.
- */
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_SAMPLER, 0,
- 0,
- offsetof(struct brw_surface_state, ss1),
- key->bo);
- }
+ &key->bo, 1,
+ &surf, sizeof(surf));
+
+ /* Emit relocation to surface contents. Section 5.1.1 of the gen4
+ * bspec ("Data Cache") says that the data cache does not exist as
+ * a separate cache and is just the sampler cache.
+ */
+ drm_intel_bo_emit_reloc(bo, offsetof(struct brw_surface_state, ss1),
+ key->bo, 0,
+ I915_GEM_DOMAIN_SAMPLER, 0);
return bo;
}
@@ -422,7 +384,7 @@ brw_update_wm_constant_surface( GLcontext *ctx,
/* If there's no constant buffer, then no surface BO is needed to point at
* it.
*/
- if (fp->const_buffer == 0) {
+ if (fp->const_buffer == NULL) {
drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
brw->wm.surf_bo[surf] = NULL;
return;
@@ -450,7 +412,7 @@ brw_update_wm_constant_surface( GLcontext *ctx,
brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
BRW_SS_SURFACE,
&key, sizeof(key),
- &key.bo, key.bo ? 1 : 0,
+ &key.bo, 1,
NULL);
if (brw->wm.surf_bo[surf] == NULL) {
brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key);
@@ -511,7 +473,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
struct gl_renderbuffer *rb,
unsigned int unit)
{
- GLcontext *ctx = &brw->intel.ctx;
+ struct intel_context *intel = &brw->intel;
+ GLcontext *ctx = &intel->ctx;
dri_bo *region_bo = NULL;
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
struct intel_region *region = irb ? irb->region : NULL;
@@ -522,7 +485,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
GLubyte color_mask[4];
GLboolean color_blend;
uint32_t tiling;
- uint32_t draw_offset;
+ uint32_t draw_x;
+ uint32_t draw_y;
} key;
memset(&key, 0, sizeof(key));
@@ -564,7 +528,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
}
key.pitch = region->pitch;
key.cpp = region->cpp;
- key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */
+ key.draw_x = region->draw_x;
+ key.draw_y = region->draw_y;
} else {
key.surface_type = BRW_SURFACE_NULL;
key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
@@ -572,20 +537,24 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
key.width = 1;
key.height = 1;
key.cpp = 4;
- key.draw_offset = 0;
+ key.draw_x = 0;
+ key.draw_y = 0;
}
- /* _NEW_COLOR */
- memcpy(key.color_mask, ctx->Color.ColorMask,
- sizeof(key.color_mask));
- /* As mentioned above, disable writes to the alpha component when the
- * renderbuffer is XRGB.
- */
- if (ctx->DrawBuffer->Visual.alphaBits == 0)
- key.color_mask[3] = GL_FALSE;
+ if (intel->gen < 6) {
+ /* _NEW_COLOR */
+ memcpy(key.color_mask, ctx->Color.ColorMask[unit],
+ sizeof(key.color_mask));
- key.color_blend = (!ctx->Color._LogicOpEnabled &&
- ctx->Color.BlendEnabled);
+ /* As mentioned above, disable writes to the alpha component when the
+ * renderbuffer is XRGB.
+ */
+ if (ctx->DrawBuffer->Visual.alphaBits == 0)
+ key.color_mask[3] = GL_FALSE;
+
+ key.color_blend = (!ctx->Color._LogicOpEnabled &&
+ (ctx->Color.BlendEnabled & (1 << unit)));
+ }
dri_bo_unreference(brw->wm.surf_bo[unit]);
brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache,
@@ -602,25 +571,32 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
surf.ss0.surface_format = key.surface_format;
surf.ss0.surface_type = key.surface_type;
if (key.tiling == I915_TILING_NONE) {
- surf.ss1.base_addr = key.draw_offset;
+ surf.ss1.base_addr = (key.draw_x + key.draw_y * key.pitch) * key.cpp;
} else {
- uint32_t tile_offset = key.draw_offset % 4096;
-
- surf.ss1.base_addr = key.draw_offset - tile_offset;
-
- assert(BRW_IS_G4X(brw) || tile_offset == 0);
- if (BRW_IS_G4X(brw)) {
- if (key.tiling == I915_TILING_X) {
- /* Note that the low bits of these fields are missing, so
- * there's the possibility of getting in trouble.
- */
- surf.ss5.x_offset = (tile_offset % 512) / key.cpp / 4;
- surf.ss5.y_offset = tile_offset / 512 / 2;
- } else {
- surf.ss5.x_offset = (tile_offset % 128) / key.cpp / 4;
- surf.ss5.y_offset = tile_offset / 128 / 2;
- }
+ uint32_t tile_base, tile_x, tile_y;
+ uint32_t pitch = key.pitch * key.cpp;
+
+ if (key.tiling == I915_TILING_X) {
+ tile_x = key.draw_x % (512 / key.cpp);
+ tile_y = key.draw_y % 8;
+ tile_base = ((key.draw_y / 8) * (8 * pitch));
+ tile_base += (key.draw_x - tile_x) / (512 / key.cpp) * 4096;
+ } else {
+ /* Y */
+ tile_x = key.draw_x % (128 / key.cpp);
+ tile_y = key.draw_y % 32;
+ tile_base = ((key.draw_y / 32) * (32 * pitch));
+ tile_base += (key.draw_x - tile_x) / (128 / key.cpp) * 4096;
}
+ assert(intel->is_g4x || (tile_x == 0 && tile_y == 0));
+ assert(tile_x % 4 == 0);
+ assert(tile_y % 2 == 0);
+ /* Note that the low bits of these fields are missing, so
+ * there's the possibility of getting in trouble.
+ */
+ surf.ss1.base_addr = tile_base;
+ surf.ss5.x_offset = tile_x / 4;
+ surf.ss5.y_offset = tile_y / 2;
}
if (region_bo != NULL)
surf.ss1.base_addr += region_bo->offset; /* reloc */
@@ -630,20 +606,21 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
brw_set_surface_tiling(&surf, key.tiling);
surf.ss3.pitch = (key.pitch * key.cpp) - 1;
- /* _NEW_COLOR */
- surf.ss0.color_blend = key.color_blend;
- surf.ss0.writedisable_red = !key.color_mask[0];
- surf.ss0.writedisable_green = !key.color_mask[1];
- surf.ss0.writedisable_blue = !key.color_mask[2];
- surf.ss0.writedisable_alpha = !key.color_mask[3];
+ if (intel->gen < 6) {
+ /* _NEW_COLOR */
+ surf.ss0.color_blend = key.color_blend;
+ surf.ss0.writedisable_red = !key.color_mask[0];
+ surf.ss0.writedisable_green = !key.color_mask[1];
+ surf.ss0.writedisable_blue = !key.color_mask[2];
+ surf.ss0.writedisable_alpha = !key.color_mask[3];
+ }
/* Key size will never match key size for textures, so we're safe. */
brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache,
BRW_SS_SURFACE,
&key, sizeof(key),
&region_bo, 1,
- &surf, sizeof(surf),
- NULL, NULL);
+ &surf, sizeof(surf));
if (region_bo != NULL) {
/* We might sample from it, and we might render to it, so flag
* them both. We might be able to figure out from other state
@@ -690,8 +667,7 @@ brw_wm_get_binding_table(struct brw_context *brw)
bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->wm.surf_bo, brw->wm.nr_surfaces,
- data, data_size,
- NULL, NULL);
+ data, data_size);
/* Emit binding table relocations to surface state */
for (i = 0; i < BRW_WM_MAX_SURF; i++) {
diff --git a/i965/gen6_cc.c b/i965/gen6_cc.c
new file mode 100644
index 0000000..f7acad6
--- /dev/null
+++ b/i965/gen6_cc.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "intel_batchbuffer.h"
+#include "main/macros.h"
+
+struct gen6_blend_state_key {
+ GLboolean color_blend, alpha_enabled;
+ GLboolean dither;
+
+ GLenum logic_op;
+
+ GLenum blend_eq_rgb, blend_eq_a;
+ GLenum blend_src_rgb, blend_src_a;
+ GLenum blend_dst_rgb, blend_dst_a;
+
+ GLenum alpha_func;
+};
+
+static void
+blend_state_populate_key(struct brw_context *brw,
+ struct gen6_blend_state_key *key)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+
+ memset(key, 0, sizeof(*key));
+
+ /* _NEW_COLOR */
+ if (ctx->Color._LogicOpEnabled)
+ key->logic_op = ctx->Color.LogicOp;
+ else
+ key->logic_op = GL_COPY;
+
+ /* _NEW_COLOR */
+ key->color_blend = ctx->Color.BlendEnabled;
+ if (key->color_blend) {
+ key->blend_eq_rgb = ctx->Color.BlendEquationRGB;
+ key->blend_eq_a = ctx->Color.BlendEquationA;
+ key->blend_src_rgb = ctx->Color.BlendSrcRGB;
+ key->blend_dst_rgb = ctx->Color.BlendDstRGB;
+ key->blend_src_a = ctx->Color.BlendSrcA;
+ key->blend_dst_a = ctx->Color.BlendDstA;
+ }
+
+ /* _NEW_COLOR */
+ key->alpha_enabled = ctx->Color.AlphaEnabled;
+ if (key->alpha_enabled) {
+ key->alpha_func = ctx->Color.AlphaFunc;
+ }
+
+ /* _NEW_COLOR */
+ key->dither = ctx->Color.DitherFlag;
+}
+
+/**
+ * Creates the state cache entry for the given CC unit key.
+ */
+static drm_intel_bo *
+blend_state_create_from_key(struct brw_context *brw,
+ struct gen6_blend_state_key *key)
+{
+ struct gen6_blend_state blend;
+ drm_intel_bo *bo;
+
+ memset(&blend, 0, sizeof(blend));
+
+ if (key->logic_op != GL_COPY) {
+ blend.blend1.logic_op_enable = 1;
+ blend.blend1.logic_op_func = intel_translate_logic_op(key->logic_op);
+ } else if (key->color_blend) {
+ GLenum eqRGB = key->blend_eq_rgb;
+ GLenum eqA = key->blend_eq_a;
+ GLenum srcRGB = key->blend_src_rgb;
+ GLenum dstRGB = key->blend_dst_rgb;
+ GLenum srcA = key->blend_src_a;
+ GLenum dstA = key->blend_dst_a;
+
+ if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
+ srcRGB = dstRGB = GL_ONE;
+ }
+
+ if (eqA == GL_MIN || eqA == GL_MAX) {
+ srcA = dstA = GL_ONE;
+ }
+
+ blend.blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB);
+ blend.blend0.source_blend_factor = brw_translate_blend_factor(srcRGB);
+ blend.blend0.blend_func = brw_translate_blend_equation(eqRGB);
+
+ blend.blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
+ blend.blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA);
+ blend.blend0.ia_blend_func = brw_translate_blend_equation(eqA);
+
+ blend.blend0.blend_enable = 1;
+ blend.blend0.ia_blend_enable = (srcA != srcRGB ||
+ dstA != dstRGB ||
+ eqA != eqRGB);
+ }
+
+ if (key->alpha_enabled) {
+ blend.blend1.alpha_test_enable = 1;
+ blend.blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func);
+
+ }
+
+ if (key->dither) {
+ blend.blend1.dither_enable = 1;
+ blend.blend1.y_dither_offset = 0;
+ blend.blend1.x_dither_offset = 0;
+ }
+
+ bo = brw_upload_cache(&brw->cache, BRW_BLEND_STATE,
+ key, sizeof(*key),
+ NULL, 0,
+ &blend, sizeof(blend));
+
+ return bo;
+}
+
+static void
+prepare_blend_state(struct brw_context *brw)
+{
+ struct gen6_blend_state_key key;
+
+ blend_state_populate_key(brw, &key);
+
+ drm_intel_bo_unreference(brw->cc.blend_state_bo);
+ brw->cc.blend_state_bo = brw_search_cache(&brw->cache, BRW_BLEND_STATE,
+ &key, sizeof(key),
+ NULL, 0,
+ NULL);
+
+ if (brw->cc.blend_state_bo == NULL)
+ brw->cc.blend_state_bo = blend_state_create_from_key(brw, &key);
+}
+
+const struct brw_tracked_state gen6_blend_state = {
+ .dirty = {
+ .mesa = _NEW_COLOR,
+ .brw = 0,
+ .cache = 0,
+ },
+ .prepare = prepare_blend_state,
+};
+
+struct gen6_color_calc_state_key {
+ GLubyte blend_constant_color[4];
+ GLclampf alpha_ref;
+ GLubyte stencil_ref[2];
+};
+
+static void
+color_calc_state_populate_key(struct brw_context *brw,
+ struct gen6_color_calc_state_key *key)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+
+ memset(key, 0, sizeof(*key));
+
+ /* _NEW_STENCIL */
+ if (ctx->Stencil._Enabled) {
+ const unsigned back = ctx->Stencil._BackFace;
+
+ key->stencil_ref[0] = ctx->Stencil.Ref[0];
+ if (ctx->Stencil._TestTwoSide)
+ key->stencil_ref[1] = ctx->Stencil.Ref[back];
+ }
+
+ /* _NEW_COLOR */
+ if (ctx->Color.AlphaEnabled)
+ key->alpha_ref = ctx->Color.AlphaRef;
+
+ key->blend_constant_color[0] = ctx->Color.BlendColor[0];
+ key->blend_constant_color[1] = ctx->Color.BlendColor[1];
+ key->blend_constant_color[2] = ctx->Color.BlendColor[2];
+ key->blend_constant_color[3] = ctx->Color.BlendColor[3];
+}
+
+/**
+ * Creates the state cache entry for the given CC state key.
+ */
+static drm_intel_bo *
+color_calc_state_create_from_key(struct brw_context *brw,
+ struct gen6_color_calc_state_key *key)
+{
+ struct gen6_color_calc_state cc;
+ drm_intel_bo *bo;
+
+ memset(&cc, 0, sizeof(cc));
+
+ cc.cc0.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
+ UNCLAMPED_FLOAT_TO_UBYTE(cc.cc1.alpha_ref_fi.ui, key->alpha_ref);
+
+ cc.cc0.stencil_ref = key->stencil_ref[0];
+ cc.cc0.bf_stencil_ref = key->stencil_ref[1];
+
+ cc.constant_r = key->blend_constant_color[0];
+ cc.constant_g = key->blend_constant_color[1];
+ cc.constant_b = key->blend_constant_color[2];
+ cc.constant_a = key->blend_constant_color[3];
+
+ bo = brw_upload_cache(&brw->cache, BRW_COLOR_CALC_STATE,
+ key, sizeof(*key),
+ NULL, 0,
+ &cc, sizeof(cc));
+
+ return bo;
+}
+
+static void
+prepare_color_calc_state(struct brw_context *brw)
+{
+ struct gen6_color_calc_state_key key;
+
+ color_calc_state_populate_key(brw, &key);
+
+ drm_intel_bo_unreference(brw->cc.state_bo);
+ brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_COLOR_CALC_STATE,
+ &key, sizeof(key),
+ NULL, 0,
+ NULL);
+
+ if (brw->cc.state_bo == NULL)
+ brw->cc.state_bo = color_calc_state_create_from_key(brw, &key);
+}
+
+const struct brw_tracked_state gen6_color_calc_state = {
+ .dirty = {
+ .mesa = _NEW_COLOR,
+ .brw = 0,
+ .cache = 0,
+ },
+ .prepare = prepare_color_calc_state,
+};
+
+static void upload_cc_state_pointers(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+
+ BEGIN_BATCH(4);
+ OUT_BATCH(CMD_3D_CC_STATE_POINTERS << 16 | (4 - 2));
+ OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+ OUT_RELOC(brw->cc.blend_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+ OUT_RELOC(brw->cc.depth_stencil_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+ ADVANCE_BATCH();
+
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+
+static void prepare_cc_state_pointers(struct brw_context *brw)
+{
+ brw_add_validated_bo(brw, brw->cc.state_bo);
+ brw_add_validated_bo(brw, brw->cc.blend_state_bo);
+ brw_add_validated_bo(brw, brw->cc.depth_stencil_state_bo);
+}
+
+const struct brw_tracked_state gen6_cc_state_pointers = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH,
+ .cache = (CACHE_NEW_BLEND_STATE |
+ CACHE_NEW_COLOR_CALC_STATE |
+ CACHE_NEW_DEPTH_STENCIL_STATE)
+ },
+ .prepare = prepare_cc_state_pointers,
+ .emit = upload_cc_state_pointers,
+};
diff --git a/i965/gen6_clip_state.c b/i965/gen6_clip_state.c
new file mode 100644
index 0000000..06f8145
--- /dev/null
+++ b/i965/gen6_clip_state.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_clip_state(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ GLcontext *ctx = &intel->ctx;
+ uint32_t depth_clamp = 0;
+ uint32_t provoking;
+
+ if (!ctx->Transform.DepthClamp)
+ depth_clamp = GEN6_CLIP_Z_TEST;
+
+ if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) {
+ provoking = 0;
+ } else {
+ provoking =
+ (2 << GEN6_CLIP_TRI_PROVOKE_SHIFT) |
+ (2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) |
+ (1 << GEN6_CLIP_LINE_PROVOKE_SHIFT);
+ }
+
+ BEGIN_BATCH(4);
+ OUT_BATCH(CMD_3D_CLIP_STATE << 16 | (4 - 2));
+ OUT_BATCH(GEN6_CLIP_STATISTICS_ENABLE);
+ OUT_BATCH(GEN6_CLIP_ENABLE |
+ GEN6_CLIP_API_OGL |
+ GEN6_CLIP_MODE_REJECT_ALL | /* XXX: debug: get VS working */
+ GEN6_CLIP_XY_TEST |
+ depth_clamp |
+ provoking);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_clip_state = {
+ .dirty = {
+ .mesa = _NEW_TRANSFORM,
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0
+ },
+ .emit = upload_clip_state,
+};
diff --git a/i965/gen6_depthstencil.c b/i965/gen6_depthstencil.c
new file mode 100644
index 0000000..4924f0f
--- /dev/null
+++ b/i965/gen6_depthstencil.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+
+struct brw_depth_stencil_state_key {
+ GLenum depth_func;
+ GLboolean depth_test, depth_write;
+ GLboolean stencil, stencil_two_side;
+ GLenum stencil_func[2], stencil_fail_op[2];
+ GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2];
+ GLubyte stencil_write_mask[2], stencil_test_mask[2];
+};
+
+static void
+depth_stencil_state_populate_key(struct brw_context *brw,
+ struct brw_depth_stencil_state_key *key)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ const unsigned back = ctx->Stencil._BackFace;
+
+ memset(key, 0, sizeof(*key));
+
+ /* _NEW_STENCIL */
+ key->stencil = ctx->Stencil._Enabled;
+ key->stencil_two_side = ctx->Stencil._TestTwoSide;
+
+ if (key->stencil) {
+ key->stencil_func[0] = ctx->Stencil.Function[0];
+ key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0];
+ key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0];
+ key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0];
+ key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0];
+ key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0];
+ }
+ if (key->stencil_two_side) {
+ key->stencil_func[1] = ctx->Stencil.Function[back];
+ key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back];
+ key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back];
+ key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back];
+ key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back];
+ key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back];
+ }
+
+ key->depth_test = ctx->Depth.Test;
+ if (key->depth_test) {
+ key->depth_func = ctx->Depth.Func;
+ key->depth_write = ctx->Depth.Mask;
+ }
+}
+
+/**
+ * Creates the state cache entry for the given DEPTH_STENCIL_STATE state key.
+ */
+static dri_bo *
+depth_stencil_state_create_from_key(struct brw_context *brw,
+ struct brw_depth_stencil_state_key *key)
+{
+ struct gen6_depth_stencil_state ds;
+ dri_bo *bo;
+
+ memset(&ds, 0, sizeof(ds));
+
+ /* _NEW_STENCIL */
+ if (key->stencil) {
+ ds.ds0.stencil_enable = 1;
+ ds.ds0.stencil_func =
+ intel_translate_compare_func(key->stencil_func[0]);
+ ds.ds0.stencil_fail_op =
+ intel_translate_stencil_op(key->stencil_fail_op[0]);
+ ds.ds0.stencil_pass_depth_fail_op =
+ intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]);
+ ds.ds0.stencil_pass_depth_pass_op =
+ intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]);
+ ds.ds1.stencil_write_mask = key->stencil_write_mask[0];
+ ds.ds1.stencil_test_mask = key->stencil_test_mask[0];
+
+ if (key->stencil_two_side) {
+ ds.ds0.bf_stencil_enable = 1;
+ ds.ds0.bf_stencil_func =
+ intel_translate_compare_func(key->stencil_func[1]);
+ ds.ds0.bf_stencil_fail_op =
+ intel_translate_stencil_op(key->stencil_fail_op[1]);
+ ds.ds0.bf_stencil_pass_depth_fail_op =
+ intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]);
+ ds.ds0.bf_stencil_pass_depth_pass_op =
+ intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]);
+ ds.ds1.bf_stencil_write_mask = key->stencil_write_mask[1];
+ ds.ds1.bf_stencil_test_mask = key->stencil_test_mask[1];
+ }
+
+ /* Not really sure about this:
+ */
+ if (key->stencil_write_mask[0] ||
+ (key->stencil_two_side && key->stencil_write_mask[1]))
+ ds.ds0.stencil_write_enable = 1;
+ }
+
+ /* _NEW_DEPTH */
+ if (key->depth_test) {
+ ds.ds2.depth_test_enable = 1;
+ ds.ds2.depth_test_func = intel_translate_compare_func(key->depth_func);
+ ds.ds2.depth_write_enable = key->depth_write;
+ }
+
+ bo = brw_upload_cache(&brw->cache, BRW_DEPTH_STENCIL_STATE,
+ key, sizeof(*key),
+ NULL, 0,
+ &ds, sizeof(ds));
+
+ return bo;
+}
+
+static void
+prepare_depth_stencil_state(struct brw_context *brw)
+{
+ struct brw_depth_stencil_state_key key;
+
+ depth_stencil_state_populate_key(brw, &key);
+
+ dri_bo_unreference(brw->cc.depth_stencil_state_bo);
+ brw->cc.depth_stencil_state_bo = brw_search_cache(&brw->cache,
+ BRW_DEPTH_STENCIL_STATE,
+ &key, sizeof(key),
+ NULL, 0,
+ NULL);
+
+ if (brw->cc.depth_stencil_state_bo == NULL)
+ brw->cc.depth_stencil_state_bo =
+ depth_stencil_state_create_from_key(brw, &key);
+}
+
+const struct brw_tracked_state gen6_depth_stencil_state = {
+ .dirty = {
+ .mesa = _NEW_DEPTH | _NEW_STENCIL,
+ .brw = 0,
+ .cache = 0,
+ },
+ .prepare = prepare_depth_stencil_state,
+};
diff --git a/i965/gen6_gs_state.c b/i965/gen6_gs_state.c
new file mode 100644
index 0000000..161e7b8
--- /dev/null
+++ b/i965/gen6_gs_state.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_gs_state(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+
+ /* Disable all the constant buffers. */
+ BEGIN_BATCH(5);
+ OUT_BATCH(CMD_3D_CONSTANT_GS_STATE << 16 | (5 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+
+ if (brw->gs.prog_bo) {
+ BEGIN_BATCH(7);
+ OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2));
+ OUT_RELOC(brw->gs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
+ (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ OUT_BATCH(0); /* scratch space base offset */
+ OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
+ (brw->gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT) |
+ (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
+ OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
+ GEN6_GS_STATISTICS_ENABLE |
+ GEN6_GS_RENDERING_ENABLE);
+ OUT_BATCH(GEN6_GS_ENABLE);
+ ADVANCE_BATCH();
+ } else {
+ BEGIN_BATCH(7);
+ OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2));
+ OUT_BATCH(0); /* prog_bo */
+ OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
+ (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ OUT_BATCH(0); /* scratch space base offset */
+ OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
+ (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
+ (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
+ OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
+ GEN6_GS_STATISTICS_ENABLE |
+ GEN6_GS_RENDERING_ENABLE);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
+}
+
+const struct brw_tracked_state gen6_gs_state = {
+ .dirty = {
+ .mesa = _NEW_TRANSFORM,
+ .brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_URB_FENCE |
+ BRW_NEW_CONTEXT),
+ .cache = CACHE_NEW_GS_PROG
+ },
+ .emit = upload_gs_state,
+};
diff --git a/i965/gen6_sampler_state.c b/i965/gen6_sampler_state.c
new file mode 100644
index 0000000..ab8e751
--- /dev/null
+++ b/i965/gen6_sampler_state.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_sampler_state_pointers(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+
+ BEGIN_BATCH(4);
+ OUT_BATCH(CMD_3D_SAMPLER_STATE_POINTERS << 16 |
+ VS_SAMPLER_STATE_CHANGE |
+ GS_SAMPLER_STATE_CHANGE |
+ PS_SAMPLER_STATE_CHANGE |
+ (4 - 2));
+ OUT_BATCH(0); /* VS */
+ OUT_BATCH(0); /* GS */
+ if (brw->wm.sampler_bo)
+ OUT_RELOC(brw->wm.sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ else
+ OUT_BATCH(0);
+
+ ADVANCE_BATCH();
+
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+
+static void
+prepare_sampler_state_pointers(struct brw_context *brw)
+{
+ brw_add_validated_bo(brw, brw->wm.sampler_bo);
+}
+
+const struct brw_tracked_state gen6_sampler_state = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH,
+ .cache = CACHE_NEW_SAMPLER
+ },
+ .prepare = prepare_sampler_state_pointers,
+ .emit = upload_sampler_state_pointers,
+};
diff --git a/i965/gen6_scissor_state.c b/i965/gen6_scissor_state.c
new file mode 100644
index 0000000..2e21e5f
--- /dev/null
+++ b/i965/gen6_scissor_state.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+static void
+prepare_scissor_state(struct brw_context *brw)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
+ struct gen6_scissor_state scissor;
+
+ /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */
+
+ /* The scissor only needs to handle the intersection of drawable and
+ * scissor rect. Clipping to the boundaries of static shared buffers
+ * for front/back/depth is covered by looping over cliprects in brw_draw.c.
+ *
+ * Note that the hardware's coordinates are inclusive, while Mesa's min is
+ * inclusive but max is exclusive.
+ */
+ if (render_to_fbo) {
+ /* texmemory: Y=0=bottom */
+ scissor.xmin = ctx->DrawBuffer->_Xmin;
+ scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+ scissor.ymin = ctx->DrawBuffer->_Ymin;
+ scissor.ymax = ctx->DrawBuffer->_Ymax - 1;
+ }
+ else {
+ /* memory: Y=0=top */
+ scissor.xmin = ctx->DrawBuffer->_Xmin;
+ scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+ scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax;
+ scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
+ }
+
+ drm_intel_bo_unreference(brw->sf.state_bo);
+ brw->sf.state_bo = brw_cache_data(&brw->cache, BRW_SF_UNIT,
+ &scissor, sizeof(scissor),
+ NULL, 0);
+}
+
+const struct brw_tracked_state gen6_scissor_state = {
+ .dirty = {
+ .mesa = _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT,
+ .brw = 0,
+ .cache = 0,
+ },
+ .prepare = prepare_scissor_state,
+};
+
+static void upload_scissor_state_pointers(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+
+ BEGIN_BATCH(2);
+ OUT_BATCH(CMD_3D_SCISSOR_STATE_POINTERS << 16 | (2 - 2));
+ OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ ADVANCE_BATCH();
+
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+
+static void prepare_scissor_state_pointers(struct brw_context *brw)
+{
+ brw_add_validated_bo(brw, brw->sf.state_bo);
+}
+
+const struct brw_tracked_state gen6_scissor_state_pointers = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH,
+ .cache = CACHE_NEW_SF_UNIT
+ },
+ .prepare = prepare_scissor_state_pointers,
+ .emit = upload_scissor_state_pointers,
+};
diff --git a/i965/gen6_sf_state.c b/i965/gen6_sf_state.c
new file mode 100644
index 0000000..8d96b44
--- /dev/null
+++ b/i965/gen6_sf_state.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "main/macros.h"
+#include "intel_batchbuffer.h"
+
+static uint32_t
+get_attr_override(struct brw_context *brw, int attr)
+{
+ uint32_t attr_override;
+ int attr_index = 0, i;
+
+ /* Find the source index (0 = first attribute after the 4D position)
+ * for this output attribute. attr is currently a VERT_RESULT_* but should
+ * be FRAG_ATTRIB_*.
+ */
+ for (i = 0; i < attr; i++) {
+ if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(i))
+ attr_index++;
+ }
+ attr_override = attr_index;
+
+ return attr_index;
+}
+
+static void
+upload_sf_state(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ GLcontext *ctx = &intel->ctx;
+ /* CACHE_NEW_VS_PROG */
+ uint32_t num_inputs = brw_count_bits(brw->vs.prog_data->outputs_written);
+ /* This should probably be FS inputs read */
+ uint32_t num_outputs = brw_count_bits(brw->vs.prog_data->outputs_written);
+ uint32_t dw1, dw2, dw3, dw4;
+ int i;
+ /* _NEW_BUFFER */
+ GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+ int attr = 0;
+
+ dw1 =
+ num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT |
+ (num_inputs + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
+ 3 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
+ dw2 = GEN6_SF_VIEWPORT_TRANSFORM_ENABLE |
+ GEN6_SF_STATISTICS_ENABLE;
+ dw3 = 0;
+ dw4 = 0;
+
+ /* _NEW_POLYGON */
+ if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo)
+ dw2 |= GEN6_SF_WINDING_CCW;
+
+ /* _NEW_SCISSOR */
+ if (ctx->Scissor.Enabled)
+ dw3 |= GEN6_SF_SCISSOR_ENABLE;
+
+ /* _NEW_POLYGON */
+ if (ctx->Polygon.CullFlag) {
+ switch (ctx->Polygon.CullFaceMode) {
+ case GL_FRONT:
+ dw3 |= GEN6_SF_CULL_BOTH;
+ break;
+ case GL_BACK:
+ dw3 |= GEN6_SF_CULL_BACK;
+ break;
+ case GL_FRONT_AND_BACK:
+ dw3 |= GEN6_SF_CULL_BOTH;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ } else {
+ dw3 |= GEN6_SF_CULL_NONE;
+ }
+
+ /* _NEW_LINE */
+ dw3 |= U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7) <<
+ GEN6_SF_LINE_WIDTH_SHIFT;
+ if (ctx->Line.SmoothFlag) {
+ dw3 |= GEN6_SF_LINE_AA_ENABLE;
+ dw3 |= GEN6_SF_LINE_AA_MODE_TRUE;
+ dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
+ }
+
+ /* _NEW_POINT */
+ if (ctx->Point._Attenuated)
+ dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH;
+
+ dw4 |= U_FIXED(CLAMP(ctx->Point.Size, 0.125, 225.875), 3) <<
+ GEN6_SF_POINT_WIDTH_SHIFT;
+ if (render_to_fbo)
+ dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
+
+ /* _NEW_LIGHT */
+ if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
+ dw4 |=
+ (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
+ (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
+ (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
+ } else {
+ dw4 |=
+ (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
+ }
+
+ BEGIN_BATCH(20);
+ OUT_BATCH(CMD_3D_SF_STATE << 16 | (20 - 2));
+ OUT_BATCH(dw1);
+ OUT_BATCH(dw2);
+ OUT_BATCH(dw3);
+ OUT_BATCH(dw4);
+ OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant. copied from gen4 */
+ OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */
+ OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */
+ for (i = 0; i < 8; i++) {
+ uint32_t attr_overrides = 0;
+
+ /* These should be generating FS inputs read instead of VS
+ * outputs written
+ */
+ for (; attr < 64; attr++) {
+ if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(attr)) {
+ attr_overrides |= get_attr_override(brw, attr);
+ attr++;
+ break;
+ }
+ }
+
+ for (; attr < 64; attr++) {
+ if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(attr)) {
+ attr_overrides |= get_attr_override(brw, attr) << 16;
+ attr++;
+ break;
+ }
+ }
+ OUT_BATCH(attr_overrides);
+ }
+ OUT_BATCH(0); /* point sprite texcoord bitmask */
+ OUT_BATCH(0); /* constant interp bitmask */
+ OUT_BATCH(0); /* wrapshortest enables 0-7 */
+ OUT_BATCH(0); /* wrapshortest enables 8-15 */
+ ADVANCE_BATCH();
+
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_sf_state = {
+ .dirty = {
+ .mesa = (_NEW_LIGHT |
+ _NEW_POLYGON |
+ _NEW_LINE |
+ _NEW_SCISSOR |
+ _NEW_BUFFERS),
+ .brw = BRW_NEW_CONTEXT,
+ .cache = CACHE_NEW_VS_PROG
+ },
+ .emit = upload_sf_state,
+};
diff --git a/i965/gen6_urb.c b/i965/gen6_urb.c
new file mode 100644
index 0000000..5445e40
--- /dev/null
+++ b/i965/gen6_urb.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "main/macros.h"
+#include "intel_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+static void
+prepare_urb( struct brw_context *brw )
+{
+ brw->urb.nr_vs_entries = 24;
+ if (brw->gs.prog_bo)
+ brw->urb.nr_gs_entries = 4;
+ else
+ brw->urb.nr_gs_entries = 0;
+ /* CACHE_NEW_VS_PROG */
+ brw->urb.vs_size = MIN2(brw->vs.prog_data->urb_entry_size, 1);
+
+ /* Check that the number of URB rows (8 floats each) allocated is less
+ * than the URB space.
+ */
+ assert((brw->urb.nr_vs_entries +
+ brw->urb.nr_gs_entries) * brw->urb.vs_size * 8 < 64 * 1024);
+}
+
+static void
+upload_urb(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+
+ assert(brw->urb.nr_vs_entries % 4 == 0);
+ assert(brw->urb.nr_gs_entries % 4 == 0);
+ /* GS requirement */
+ assert(!brw->gs.prog_bo || brw->urb.vs_size < 5);
+
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+
+ BEGIN_BATCH(3);
+ OUT_BATCH(CMD_URB << 16 | (3 - 2));
+ OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) |
+ ((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT));
+ OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) |
+ ((brw->urb.nr_gs_entries) << GEN6_URB_GS_ENTRIES_SHIFT));
+ ADVANCE_BATCH();
+
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_urb = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_CONTEXT,
+ .cache = CACHE_NEW_VS_PROG,
+ },
+ .prepare = prepare_urb,
+ .emit = upload_urb,
+};
diff --git a/i965/gen6_viewport_state.c b/i965/gen6_viewport_state.c
new file mode 100644
index 0000000..0c2aa42
--- /dev/null
+++ b/i965/gen6_viewport_state.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+#include "main/macros.h"
+
+/* The clip VP defines the guardband region where expensive clipping is skipped
+ * and fragments are allowed to be generated and clipped out cheaply by the SF.
+ *
+ * By setting it to NDC bounds of [-1,1], we don't do GB clipping. It's
+ * supposed to cause seams to become visible in apps due to shared edges taking
+ * different clip/no clip paths depending on whether the rest of the prim ends
+ * up in the guardband or not.
+ */
+static void
+prepare_clip_vp(struct brw_context *brw)
+{
+ struct brw_clipper_viewport vp;
+
+ vp.xmin = -1.0;
+ vp.xmax = 1.0;
+ vp.ymin = -1.0;
+ vp.ymax = 1.0;
+
+ drm_intel_bo_unreference(brw->clip.vp_bo);
+ brw->clip.vp_bo = brw_cache_data(&brw->cache, BRW_CLIP_VP,
+ &vp, sizeof(vp),
+ NULL, 0);
+}
+
+const struct brw_tracked_state gen6_clip_vp = {
+ .dirty = {
+ .mesa = _NEW_VIEWPORT, /* XXX: not really, but we need nonzero */
+ .brw = 0,
+ .cache = 0,
+ },
+ .prepare = prepare_clip_vp,
+};
+
+static void
+prepare_sf_vp(struct brw_context *brw)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+ struct brw_sf_viewport sfv;
+ GLfloat y_scale, y_bias;
+ const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
+ const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+ memset(&sfv, 0, sizeof(sfv));
+
+ /* _NEW_BUFFERS */
+ if (render_to_fbo) {
+ y_scale = 1.0;
+ y_bias = 0;
+ } else {
+ y_scale = -1.0;
+ y_bias = ctx->DrawBuffer->Height;
+ }
+
+ /* _NEW_VIEWPORT */
+ sfv.viewport.m00 = v[MAT_SX];
+ sfv.viewport.m11 = v[MAT_SY] * y_scale;
+ sfv.viewport.m22 = v[MAT_SZ] * depth_scale;
+ sfv.viewport.m30 = v[MAT_TX];
+ sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias;
+ sfv.viewport.m32 = v[MAT_TZ] * depth_scale;
+
+ drm_intel_bo_unreference(brw->sf.vp_bo);
+ brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP,
+ &sfv, sizeof(sfv),
+ NULL, 0);
+}
+
+const struct brw_tracked_state gen6_sf_vp = {
+ .dirty = {
+ .mesa = _NEW_VIEWPORT | _NEW_BUFFERS,
+ .brw = 0,
+ .cache = 0,
+ },
+ .prepare = prepare_sf_vp,
+};
+
+static void
+prepare_cc_vp(struct brw_context *brw)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ struct brw_cc_viewport ccv;
+
+ /* _NEW_TRANSOFORM */
+ if (ctx->Transform.DepthClamp) {
+ /* _NEW_VIEWPORT */
+ ccv.min_depth = MIN2(ctx->Viewport.Near, ctx->Viewport.Far);
+ ccv.max_depth = MAX2(ctx->Viewport.Near, ctx->Viewport.Far);
+ } else {
+ ccv.min_depth = 0.0;
+ ccv.max_depth = 1.0;
+ }
+
+ drm_intel_bo_unreference(brw->cc.vp_bo);
+ brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv),
+ NULL, 0);
+}
+
+const struct brw_tracked_state gen6_cc_vp = {
+ .dirty = {
+ .mesa = _NEW_VIEWPORT | _NEW_TRANSFORM,
+ .brw = 0,
+ .cache = 0,
+ },
+ .prepare = prepare_cc_vp,
+};
+
+static void prepare_viewport_state_pointers(struct brw_context *brw)
+{
+ brw_add_validated_bo(brw, brw->sf.state_bo);
+}
+
+static void upload_viewport_state_pointers(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+
+ BEGIN_BATCH(4);
+ OUT_BATCH(CMD_VIEWPORT_STATE_POINTERS << 16 | (4 - 2) |
+ GEN6_CC_VIEWPORT_MODIFY |
+ GEN6_SF_VIEWPORT_MODIFY |
+ GEN6_CLIP_VIEWPORT_MODIFY);
+ OUT_RELOC(brw->clip.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_RELOC(brw->sf.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_RELOC(brw->cc.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ ADVANCE_BATCH();
+
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_viewport_state = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH,
+ .cache = (CACHE_NEW_CLIP_VP |
+ CACHE_NEW_SF_VP |
+ CACHE_NEW_CC_VP)
+ },
+ .prepare = prepare_viewport_state_pointers,
+ .emit = upload_viewport_state_pointers,
+};
diff --git a/i965/gen6_vs_state.c b/i965/gen6_vs_state.c
new file mode 100644
index 0000000..fe597df
--- /dev/null
+++ b/i965/gen6_vs_state.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_vs_state(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ GLcontext *ctx = &intel->ctx;
+ const struct brw_vertex_program *vp =
+ brw_vertex_program_const(brw->vertex_program);
+ unsigned int nr_params = vp->program.Base.Parameters->NumParameters;
+ drm_intel_bo *constant_bo;
+ int i;
+
+ if (vp->use_const_buffer || nr_params == 0) {
+ /* Disable the push constant buffers. */
+ BEGIN_BATCH(5);
+ OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ } else {
+ if (brw->vertex_program->IsNVProgram)
+ _mesa_load_tracked_matrices(ctx);
+
+ /* Updates the ParamaterValues[i] pointers for all parameters of the
+ * basic type of PROGRAM_STATE_VAR.
+ */
+ _mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
+
+ constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo",
+ nr_params * 4 * sizeof(float),
+ 4096);
+ drm_intel_gem_bo_map_gtt(constant_bo);
+ for (i = 0; i < nr_params; i++) {
+ memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float),
+ vp->program.Base.Parameters->ParameterValues[i],
+ 4 * sizeof(float));
+ }
+ drm_intel_gem_bo_unmap_gtt(constant_bo);
+
+ BEGIN_BATCH(5);
+ OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 |
+ GEN6_CONSTANT_BUFFER_0_ENABLE |
+ (5 - 2));
+ OUT_RELOC(constant_bo,
+ I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
+ ALIGN(nr_params, 2) / 2 - 1);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
+ drm_intel_bo_unreference(constant_bo);
+ }
+
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+
+ BEGIN_BATCH(6);
+ OUT_BATCH(CMD_3D_VS_STATE << 16 | (6 - 2));
+ OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
+ (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ OUT_BATCH(0); /* scratch space base offset */
+ OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
+ (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
+ (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
+ OUT_BATCH((0 << GEN6_VS_MAX_THREADS_SHIFT) |
+ GEN6_VS_STATISTICS_ENABLE);
+ ADVANCE_BATCH();
+
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_vs_state = {
+ .dirty = {
+ .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+ .brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_NR_VS_SURFACES |
+ BRW_NEW_URB_FENCE |
+ BRW_NEW_CONTEXT),
+ .cache = CACHE_NEW_VS_PROG
+ },
+ .emit = upload_vs_state,
+};
diff --git a/i965/gen6_wm_state.c b/i965/gen6_wm_state.c
new file mode 100644
index 0000000..1eb17ca
--- /dev/null
+++ b/i965/gen6_wm_state.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_wm_state(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ GLcontext *ctx = &intel->ctx;
+ const struct brw_fragment_program *fp =
+ brw_fragment_program_const(brw->fragment_program);
+ unsigned int nr_params = fp->program.Base.Parameters->NumParameters;
+ drm_intel_bo *constant_bo;
+ int i;
+ uint32_t dw2, dw4, dw5, dw6;
+
+ if (fp->use_const_buffer || nr_params == 0) {
+ /* Disable the push constant buffers. */
+ BEGIN_BATCH(5);
+ OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | (5 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ } else {
+ /* Updates the ParamaterValues[i] pointers for all parameters of the
+ * basic type of PROGRAM_STATE_VAR.
+ */
+ _mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
+
+ constant_bo = drm_intel_bo_alloc(intel->bufmgr, "WM constant_bo",
+ nr_params * 4 * sizeof(float),
+ 4096);
+ drm_intel_gem_bo_map_gtt(constant_bo);
+ for (i = 0; i < nr_params; i++) {
+ memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float),
+ fp->program.Base.Parameters->ParameterValues[i],
+ 4 * sizeof(float));
+ }
+ drm_intel_gem_bo_unmap_gtt(constant_bo);
+
+ BEGIN_BATCH(5);
+ OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 |
+ GEN6_CONSTANT_BUFFER_0_ENABLE |
+ (5 - 2));
+ OUT_RELOC(constant_bo,
+ I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
+ ALIGN(nr_params, 2) / 2 - 1);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
+ drm_intel_bo_unreference(constant_bo);
+ }
+
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+
+ dw2 = dw4 = dw5 = dw6 = 0;
+ dw4 |= GEN6_WM_STATISTICS_ENABLE;
+ dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
+ dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
+
+ /* BRW_NEW_NR_SURFACES */
+ dw2 |= brw->wm.nr_surfaces << GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT;
+
+ /* CACHE_NEW_SAMPLER */
+ dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT;
+ dw4 |= (1 << GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
+
+ dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT;
+ dw5 |= GEN6_WM_DISPATCH_ENABLE;
+
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ if (fp->isGLSL)
+ dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
+ else
+ dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
+
+ /* _NEW_LINE */
+ if (ctx->Line.StippleFlag)
+ dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;
+
+ /* _NEW_POLYGONSTIPPLE */
+ if (ctx->Polygon.StippleFlag)
+ dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;
+
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ if (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS))
+ dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W;
+ if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+ dw5 |= GEN6_WM_COMPUTED_DEPTH;
+
+ /* _NEW_COLOR */
+ if (fp->program.UsesKill || ctx->Color.AlphaEnabled)
+ dw5 |= GEN6_WM_KILL_ENABLE;
+
+ /* This should probably be FS inputs read */
+ dw6 |= brw_count_bits(brw->vs.prog_data->outputs_written) <<
+ GEN6_WM_NUM_SF_OUTPUTS_SHIFT;
+
+ BEGIN_BATCH(9);
+ OUT_BATCH(CMD_3D_WM_STATE << 16 | (9 - 2));
+ OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_BATCH(dw2);
+ OUT_BATCH(0); /* scratch space base offset */
+ OUT_BATCH(dw4);
+ OUT_BATCH(dw5);
+ OUT_BATCH(dw6);
+ OUT_BATCH(0); /* kernel 1 pointer */
+ OUT_BATCH(0); /* kernel 2 pointer */
+ ADVANCE_BATCH();
+
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_wm_state = {
+ .dirty = {
+ .mesa = _NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR,
+ .brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_FRAGMENT_PROGRAM |
+ BRW_NEW_NR_WM_SURFACES |
+ BRW_NEW_URB_FENCE |
+ BRW_NEW_BATCH),
+ .cache = CACHE_NEW_SAMPLER
+ },
+ .emit = upload_wm_state,
+};