summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhenyu Wang <zhenyuw@linux.intel.com>2010-08-19 10:41:54 +0800
committerZhenyu Wang <zhenyuw@linux.intel.com>2010-08-19 10:41:54 +0800
commit2ae2625133237c1f1df19c29c8e7616382256ce7 (patch)
tree19f7bd8530b93570dc33dfc0393a35f8920f661c
parenta575067d7029c7af3bb6d650d6bd944ac8bb6bb7 (diff)
i965: get vertex position right on sandybridge
Just pull in current hacks that get vertex position output right on sandybridge. The most tricky part is for align16 instruction, even horizon stride is not used but still need to be set to 1! Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c9
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_disasm.c140
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw_upload.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.c26
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c110
-rw-r--r--src/mesa/drivers/dri/i965/brw_structs.h16
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c16
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c74
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c16
-rw-r--r--src/mesa/drivers/dri/i965/gen6_gs_state.c8
-rw-r--r--src/mesa/drivers/dri/i965/gen6_viewport_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen6_vs_state.c5
-rw-r--r--src/mesa/drivers/dri/intel/intel_chipset.h4
-rw-r--r--src/mesa/drivers/dri/intel/intel_clear.c11
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.c10
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.h2
-rw-r--r--src/mesa/drivers/dri/intel/intel_decode.c80
-rw-r--r--src/mesa/drivers/dri/intel/intel_reg.h12
20 files changed, 448 insertions, 102 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 6d064b822e5..d2b20165f9d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -144,7 +144,8 @@ GLboolean brwCreateContext( int api,
brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_GM45;
brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
brw->has_surface_tile_offset = GL_TRUE;
- brw->has_compr4 = GL_TRUE;
+ if (intel->gen < 6)
+ brw->has_compr4 = GL_TRUE;
brw->has_aa_line_parameters = GL_TRUE;
brw->has_pln = GL_TRUE;
} else {
@@ -153,7 +154,11 @@ GLboolean brwCreateContext( int api,
}
/* WM maximum threads is number of EUs times number of threads per EU. */
- if (intel->gen == 5) {
+ if (intel->gen >= 6) {
+ brw->urb.size = 1024;
+ brw->vs_max_threads = 60;
+ brw->wm_max_threads = 80;
+ } else if (intel->gen == 5) {
brw->urb.size = 1024;
brw->vs_max_threads = 72;
brw->wm_max_threads = 12 * 6;
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index f7a68cead7c..6f8439c6673 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -904,6 +904,8 @@
# define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9)
# define GEN6_GS_RENDERING_ENABLE (1 << 8)
/* DW6 */
+# define GEN6_GS_REORDER_ENABLE (1 << 30)
+# define GEN6_GS_DISCARD_ADJACENY (1 << 29)
# define GEN6_GS_ENABLE (1 << 15)
#define CMD_3D_CLIP_STATE 0x7812 /* GEN6+ */
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index c148dbc087a..f4d7b132f33 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -159,6 +159,11 @@ char *saturate[2] = {
[1] = ".sat"
};
+char *accwr[2] = {
+ [0] = "",
+ [1] = "AccWrEnable"
+};
+
char *exec_size[8] = {
[0] = "1",
[1] = "2",
@@ -227,23 +232,34 @@ char *access_mode[2] = {
};
char *reg_encoding[8] = {
- [0] = "UD",
- [1] = "D",
- [2] = "UW",
- [3] = "W",
- [4] = "UB",
- [5] = "B",
- [7] = "F"
+ [0] = ":UD",
+ [1] = ":D",
+ [2] = ":UW",
+ [3] = ":W",
+ [4] = ":UB",
+ [5] = ":B",
+ [7] = ":F"
+};
+
+int reg_type_size[8] = {
+ [0] = 4,
+ [1] = 4,
+ [2] = 2,
+ [3] = 2,
+ [4] = 1,
+ [5] = 1,
+ [7] = 4
};
char *imm_encoding[8] = {
- [0] = "UD",
- [1] = "D",
- [2] = "UW",
- [3] = "W",
- [5] = "VF",
- [6] = "V",
- [7] = "F"
+ [0] = ":UD",
+ [1] = ":D",
+ [2] = ":UW",
+ [3] = ":W",
+ [4] = ":UV",
+ [5] = ":VF",
+ [6] = ":V",
+ [7] = ":F"
};
char *reg_file[4] = {
@@ -482,7 +498,8 @@ static int dest (FILE *file, struct brw_instruction *inst)
if (err == -1)
return 0;
if (inst->bits1.da1.dest_subreg_nr)
- format (file, ".%d", inst->bits1.da1.dest_subreg_nr);
+ format (file, ".%d", inst->bits1.da1.dest_subreg_nr /
+ reg_type_size[inst->bits1.da1.dest_reg_type]);
format (file, "<%d>", inst->bits1.da1.dest_horiz_stride);
err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
}
@@ -490,7 +507,8 @@ static int dest (FILE *file, struct brw_instruction *inst)
{
string (file, "g[a0");
if (inst->bits1.ia1.dest_subreg_nr)
- format (file, ".%d", inst->bits1.ia1.dest_subreg_nr);
+ format (file, ".%d", inst->bits1.ia1.dest_subreg_nr /
+ reg_type_size[inst->bits1.ia1.dest_reg_type]);
if (inst->bits1.ia1.dest_indirect_offset)
format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
string (file, "]");
@@ -506,7 +524,8 @@ static int dest (FILE *file, struct brw_instruction *inst)
if (err == -1)
return 0;
if (inst->bits1.da16.dest_subreg_nr)
- format (file, ".%d", inst->bits1.da16.dest_subreg_nr);
+ format (file, ".%d", inst->bits1.da16.dest_subreg_nr /
+ reg_type_size[inst->bits1.da16.dest_reg_type]);
string (file, "<1>");
err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
@@ -547,7 +566,7 @@ static int src_da1 (FILE *file, GLuint type, GLuint _reg_file,
if (err == -1)
return 0;
if (sub_reg_num)
- format (file, ".%d", sub_reg_num);
+ format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */
src_align1_region (file, _vert_stride, _width, _horiz_stride);
err |= control (file, "src reg encoding", reg_encoding, type, NULL);
return err;
@@ -601,11 +620,12 @@ static int src_da16 (FILE *file,
if (err == -1)
return 0;
if (_subreg_nr)
- format (file, ".%d", _subreg_nr);
+ /* bit4 for subreg number byte addressing. Make this same meaning as
+ in da1 case, so output looks consistent. */
+ format (file, ".%d", 16 / reg_type_size[_reg_type]);
string (file, "<");
err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
string (file, ",4,1>");
- err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
/*
* Three kinds of swizzle display:
* identity - nothing printed
@@ -617,13 +637,17 @@ static int src_da16 (FILE *file,
swz_z == BRW_CHANNEL_Z &&
swz_w == BRW_CHANNEL_W)
{
- ;
+ string (file, ".xyzw");
}
+/*
+ XXX try to also print all channels for 1->all.
+
else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
{
string (file, ".");
err |= control (file, "channel select", chan_sel, swz_x, NULL);
}
+*/
else
{
string (file, ".");
@@ -632,6 +656,7 @@ static int src_da16 (FILE *file,
err |= control (file, "channel select", chan_sel, swz_z, NULL);
err |= control (file, "channel select", chan_sel, swz_w, NULL);
}
+ err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
return err;
}
@@ -785,11 +810,56 @@ static int src1 (FILE *file, struct brw_instruction *inst)
}
}
+int esize[6] = {
+ [0] = 1,
+ [1] = 2,
+ [2] = 4,
+ [3] = 8,
+ [4] = 16,
+ [5] = 32,
+};
+
+static int qtr_ctl(FILE *file, struct brw_instruction *inst)
+{
+ int qtr_ctl = inst->header.compression_control;
+ int exec_size = esize[inst->header.execution_size];
+
+ if (exec_size == 8) {
+ switch (qtr_ctl) {
+ case 0:
+ string (file, " 1Q");
+ break;
+ case 1:
+ string (file, " 2Q");
+ break;
+ case 2:
+ string (file, " 3Q");
+ break;
+ case 3:
+ string (file, " 4Q");
+ break;
+ }
+ } else if (exec_size == 16){
+ if (qtr_ctl < 2)
+ string (file, " 1H");
+ else
+ string (file, " 2H");
+ }
+}
+
int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
{
int err = 0;
int space = 0;
+#if 1
+ fprintf(stderr, "[0x%08x 0x%08x 0x%08x 0x%08x]:\n",
+ inst->bits3.ud,
+ inst->bits2.ud,
+ inst->bits1.ud,
+ *((unsigned int *)&inst->header));
+#endif
+
if (inst->header.predicate_control) {
string (file, "(");
err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL);
@@ -935,6 +1005,11 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
inst->bits3.urb.used, &space);
err |= control (file, "urb complete", urb_complete,
inst->bits3.urb.complete, &space);
+ if (gen >= 5) {
+ format (file, " msg_length %d, reponse_length %d\n",
+ inst->bits3.urb_gen5.msg_length,
+ inst->bits3.urb_gen5.response_length);
+ }
break;
case BRW_MESSAGE_TARGET_THREAD_SPAWNER:
break;
@@ -964,16 +1039,23 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space);
err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
- if (inst->header.compression_control == BRW_COMPRESSION_COMPRESSED &&
- opcode[inst->header.opcode].ndst > 0 &&
- inst->bits1.da1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE &&
- inst->bits1.da1.dest_reg_nr & (1 << 7)) {
- format (file, " compr4");
- } else {
- err |= control (file, "compression control", compr_ctrl,
- inst->header.compression_control, &space);
+ if (gen >= 6)
+ err |= qtr_ctl (file, inst);
+ else {
+ if (inst->header.compression_control == BRW_COMPRESSION_COMPRESSED &&
+ opcode[inst->header.opcode].ndst > 0 &&
+ inst->bits1.da1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE &&
+ inst->bits1.da1.dest_reg_nr & (1 << 7)) {
+ format (file, " compr4");
+ } else {
+ err |= control (file, "compression control", compr_ctrl,
+ inst->header.compression_control, &space);
+ }
}
+
err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
+ if (gen >= 6)
+ err |= control (file, "acc write control", accwr, inst->header.acc_wr_control, &space);
if (inst->header.opcode == BRW_OPCODE_SEND)
err |= control (file, "end of thread", end_of_thread,
inst->bits3.generic.end_of_thread, &space);
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index f07aab86e90..249e874ab1a 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -476,7 +476,7 @@ static void brw_emit_vertices(struct brw_context *brw)
if (brw->vb.nr_enabled == 0) {
BEGIN_BATCH(3);
OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
- if (IS_GEN6(intel->intelScreen->deviceID)) {
+ if (intel->gen >= 6) {
OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) |
GEN6_VE0_VALID |
(BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
@@ -553,7 +553,7 @@ static void brw_emit_vertices(struct brw_context *brw)
break;
}
- if (IS_GEN6(intel->intelScreen->deviceID)) {
+ if (intel->gen >= 6) {
OUT_BATCH((i << GEN6_VE0_INDEX_SHIFT) |
GEN6_VE0_VALID |
(format << BRW_VE0_FORMAT_SHIFT) |
diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c
index 4e7c1226ad4..e1f04906bc5 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.c
+++ b/src/mesa/drivers/dri/i965/brw_eu.c
@@ -72,12 +72,29 @@ void brw_set_access_mode( struct brw_compile *p, GLuint access_mode )
void brw_set_compression_control( struct brw_compile *p, GLboolean compression_control )
{
- p->current->header.compression_control = compression_control;
+#if 0
+ struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
+
+ /* XXX gen6 has new quarter control, no more compression control */
+ if (intel->gen >= 6)
+ p->current->header.compression_control = 0;
+ else
+#endif
+ /* XXX used for exec_size! */
+ p->current->header.compression_control = compression_control;
}
void brw_set_mask_control( struct brw_compile *p, GLuint value )
{
- p->current->header.mask_control = value;
+ struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
+
+ /* Hack to always set normal mask */
+ if (intel->gen >= 6)
+ p->current->header.mask_control = BRW_WE_NORMAL;
+ else
+ p->current->header.mask_control = value;
}
void brw_set_saturate( struct brw_compile *p, GLuint value )
@@ -85,6 +102,11 @@ void brw_set_saturate( struct brw_compile *p, GLuint value )
p->current->header.saturate = value;
}
+void brw_set_acc_write_control(struct brw_compile *p, GLuint value)
+{
+ p->current->header.acc_wr_control = value;
+}
+
void brw_push_insn_state( struct brw_compile *p )
{
assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index ffdddd0a388..1c938542e1d 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -766,6 +766,7 @@ void brw_set_compression_control( struct brw_compile *p, GLboolean control );
void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value );
void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
+void brw_set_acc_write_control(struct brw_compile *p, GLuint value);
void brw_init_compile( struct brw_context *, struct brw_compile *p );
const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 0d5d17f501d..25ce1bf41f4 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -75,6 +75,8 @@ static void brw_set_dest( struct brw_instruction *insn,
else {
insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
+ /* even ignored in da16, still need to set as '01' */
+ insn->bits1.da16.dest_horiz_stride = 1;
}
}
else {
@@ -90,6 +92,8 @@ static void brw_set_dest( struct brw_instruction *insn,
}
else {
insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+ /* even ignored in da16, still need to set as '01' */
+ insn->bits1.ia16.dest_horiz_stride = 1;
}
}
@@ -338,6 +342,8 @@ static void brw_set_urb_message( struct brw_context *brw,
* below on Ironlake even do anything?
*/
insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_gen5.end_of_thread = end_of_thread;
} else {
insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
insn->bits2.send_gen5.end_of_thread = end_of_thread;
@@ -368,9 +374,23 @@ static void brw_set_dp_write_message( struct brw_context *brw,
GLuint send_commit_msg)
{
struct intel_context *intel = &brw->intel;
- brw_set_src1(insn, brw_imm_d(0));
+ brw_set_src1(insn, brw_imm_ud(0));
- if (intel->gen == 5) {
+ if (intel->gen >= 6) {
+ insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
+ insn->bits3.dp_render_cache.msg_control = msg_control;
+ insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear;
+ insn->bits3.dp_render_cache.msg_type = msg_type;
+ insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg;
+ insn->bits3.dp_render_cache.header_present = 0; /* XXX */
+ insn->bits3.dp_render_cache.response_length = response_length;
+ insn->bits3.dp_render_cache.msg_length = msg_length;
+ insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
+ insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+ /* XXX really need below? */
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+ insn->bits2.send_gen5.end_of_thread = end_of_thread;
+ } else if (intel->gen == 5) {
insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
insn->bits3.dp_write_gen5.msg_control = msg_control;
insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
@@ -1340,24 +1360,72 @@ void brw_fb_WRITE(struct brw_compile *p,
GLuint response_length,
GLboolean eot)
{
- struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
-
- insn->header.predicate_control = 0; /* XXX */
- insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditionalmod = msg_reg_nr;
-
- brw_set_dest(insn, dest);
- brw_set_src0(insn, src0);
- brw_set_dp_write_message(p->brw,
- insn,
- binding_table_index,
- BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
- BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
- msg_length,
- 1, /* pixel scoreboard */
- response_length,
- eot,
- 0 /* send_commit_msg */);
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_instruction *insn;
+
+ fprintf(stderr, "brw_fb_WRITE: msg %d, bind %d, msg_len %d, response len %d\n",
+ msg_reg_nr, binding_table_index, msg_length, response_length);
+
+ if (intel->gen >= 6) {
+ /* headerless version, just submit color payload */
+ src0 = brw_message_reg(msg_reg_nr);
+ insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+
+ msg_length -= 2; //XXX remove old header?
+
+ /* as gen6 SIMD16 write changed from previous layout, should
+ care for SIMD8 write. use the real SIMD8 write source control */
+ if (msg_length >= 8) {
+ fprintf(stderr, "[WM] SIMD16 fb write\n");
+ brw_set_dp_write_message(p->brw,
+ insn,
+ binding_table_index,
+ BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
+ BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6, /* msg_type */
+ msg_length,
+ 1, /* pixel scoreboard */
+ response_length,
+ eot,
+ 0);
+ } else {
+
+ fprintf(stderr, "[WM] SIMD8 fb write\n");
+ brw_set_dp_write_message(p->brw,
+ insn,
+ binding_table_index,
+ BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01, /* msg_control */
+ BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6, /* msg_type */
+ msg_length,
+ 1, /* pixel scoreboard */
+ response_length,
+ eot,
+ 0);
+ }
+ } else {
+ insn = next_insn(p, BRW_OPCODE_SEND);
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_dp_write_message(p->brw,
+ insn,
+ binding_table_index,
+ BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
+ BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
+ msg_length,
+ 1, /* pixel scoreboard */
+ response_length,
+ eot,
+ 0 /* send_commit_msg */);
+ }
}
@@ -1520,7 +1588,7 @@ void brw_urb_WRITE(struct brw_compile *p,
brw_set_dest(insn, dest);
brw_set_src0(insn, src0);
- brw_set_src1(insn, brw_imm_d(0));
+ brw_set_src1(insn, brw_imm_ud(0));
if (intel->gen < 6)
insn->header.destreg__conditionalmod = msg_reg_nr;
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
index 2fde42a7060..851a4d92abb 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -1210,10 +1210,9 @@ struct brw_surface_state
struct {
GLuint pad1:16;
- GLuint llc_mapping:1;
- GLuint mlc_mapping:1;
+ GLuint cache_control:2;
GLuint gfdt:1;
- GLuint gfdt_src:1;
+ GLuint encrypt:1;
GLuint y_offset:4;
GLuint pad0:1;
GLuint x_offset:7;
@@ -1305,13 +1304,14 @@ struct brw_instruction
GLuint access_mode:1;
GLuint mask_control:1;
GLuint dependency_control:2;
- GLuint compression_control:2;
+ GLuint compression_control:2; /* gen6: quater control */
GLuint thread_control:2;
GLuint predicate_control:4;
GLuint predicate_inverse:1;
GLuint execution_size:3;
GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */
- GLuint pad0:2;
+ GLuint acc_wr_control:1;
+ GLuint cmpt_control:1;
GLuint debug_control:1;
GLuint saturate:1;
} header;
@@ -1359,7 +1359,7 @@ struct brw_instruction
GLuint dest_writemask:4;
GLuint dest_subreg_nr:1;
GLuint dest_reg_nr:8;
- GLuint pad1:2;
+ GLuint dest_horiz_stride:2;
GLuint dest_address_mode:1;
} da16;
@@ -1373,9 +1373,10 @@ struct brw_instruction
GLuint dest_writemask:4;
GLint dest_indirect_offset:6;
GLuint dest_subreg_nr:3;
- GLuint pad1:2;
+ GLuint dest_horiz_stride:2;
GLuint dest_address_mode:1;
} ia16;
+ GLuint ud;
} bits1;
@@ -1450,6 +1451,7 @@ struct brw_instruction
GLuint sfid:4;
} send_gen5; /* for Ironlake only */
+ GLuint ud;
} bits2;
union
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 7b946eb0d8e..ff21bbe4208 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -121,7 +121,11 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
else
c->vp->use_const_buffer = GL_FALSE;
- /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/
+ /* Gen6 only support push constant */
+ if (intel->gen >= 6)
+ c->vp->use_const_buffer = GL_FALSE;
+
+ fprintf(stderr, "[VS] use_const_buffer = %d\n", c->vp->use_const_buffer);
/* r0 -- reserved as usual
*/
@@ -1348,8 +1352,11 @@ static void emit_vertex_write( struct brw_vs_compile *c)
if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ];
- brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
- brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8));
+ if (intel->gen < 6) {
+ brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
+ brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8));
+ } else
+ brw_MOV(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0));
}
for (i = 0; i < c->key.nr_userclip; i++) {
@@ -1394,6 +1401,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
* of zeros followed by two sets of NDC coordinates:
*/
brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_set_acc_write_control(p, 0);
/* The VUE layout is documented in Volume 2a. */
if (intel->gen >= 6) {
@@ -1572,6 +1580,8 @@ void brw_vs_emit(struct brw_vs_compile *c )
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_access_mode(p, BRW_ALIGN_16);
+ if (intel->gen >= 6)
+ brw_set_acc_write_control(p, 1);
for (insn = 0; insn < nr_insns; insn++) {
GLuint i;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index f01fffbd5c8..dbbd797132c 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -173,6 +173,7 @@ void emit_delta_xy(struct brw_compile *p,
GLuint mask,
const struct brw_reg *arg0)
{
+ struct intel_context *intel = &p->brw->intel;
struct brw_reg r1 = brw_vec1_grf(1, 0);
if (mask == 0)
@@ -180,6 +181,21 @@ void emit_delta_xy(struct brw_compile *p,
assert(mask == WRITEMASK_XY);
+ if (intel->gen >= 6) {
+ /* XXX Gen6 WM doesn't have Xstart/Ystart in payload r1.0/r1.1.
+ Just add them with 0.0 for dst reg.. */
+ r1 = brw_imm_v(0x00000000);
+ brw_ADD(p,
+ dst[0],
+ retype(arg0[0], BRW_REGISTER_TYPE_UW),
+ r1);
+ brw_ADD(p,
+ dst[1],
+ retype(arg0[1], BRW_REGISTER_TYPE_UW),
+ r1);
+ return;
+ }
+
/* Calc delta X,Y by subtracting origin in r1 from the pixel
* centers produced by emit_pixel_xy().
*/
@@ -253,6 +269,13 @@ void emit_pixel_w(struct brw_wm_compile *c,
{
struct brw_compile *p = &c->func;
struct intel_context *intel = &p->brw->intel;
+ struct brw_reg src;
+ struct brw_reg temp_dst;
+
+ if (intel->gen >= 6)
+ temp_dst = dst[3];
+ else
+ temp_dst = brw_message_reg(2);
/* Don't need this if all you are doing is interpolating color, for
* instance.
@@ -264,24 +287,29 @@ void emit_pixel_w(struct brw_wm_compile *c,
* result straight into a message reg.
*/
if (can_do_pln(intel, deltas)) {
- brw_PLN(p, brw_message_reg(2), interp3, deltas[0]);
+ brw_PLN(p, temp_dst, interp3, deltas[0]);
} else {
brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
- brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
+ brw_MAC(p, temp_dst, suboffset(interp3, 1), deltas[1]);
}
/* Calc w */
+ if (intel->gen >= 6)
+ src = temp_dst;
+ else
+ src = brw_null_reg();
+
if (c->dispatch_width == 16) {
brw_math_16(p, dst[3],
BRW_MATH_FUNCTION_INV,
BRW_MATH_SATURATE_NONE,
- 2, brw_null_reg(),
+ 2, src,
BRW_MATH_PRECISION_FULL);
} else {
brw_math(p, dst[3],
BRW_MATH_FUNCTION_INV,
BRW_MATH_SATURATE_NONE,
- 2, brw_null_reg(),
+ 2, src,
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
}
@@ -1230,6 +1258,7 @@ static void fire_fb_write( struct brw_wm_compile *c,
GLuint eot )
{
struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
struct brw_reg dst;
if (c->dispatch_width == 16)
@@ -1240,6 +1269,7 @@ static void fire_fb_write( struct brw_wm_compile *c,
/* Pass through control information:
*/
/* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
+ if (intel->gen < 6) /* gen6, use headerless for fb write */
{
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
@@ -1294,8 +1324,11 @@ void emit_fb_write(struct brw_wm_compile *c,
{
struct brw_compile *p = &c->func;
struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
GLuint nr = 2;
GLuint channel;
+ int step = 0, inc;
+ int base_reg; /* For gen6 fb write with no header, starting from color payload directly!. */
/* Reserve a space for AA - may not be needed:
*/
@@ -1307,6 +1340,17 @@ void emit_fb_write(struct brw_wm_compile *c,
*/
brw_push_insn_state(p);
+ if (intel->gen >= 6)
+ base_reg = nr;
+ else
+ base_reg = 0;
+
+ /* XXX hack for const output...*/
+ if (c->dispatch_width == 16)
+ inc = 2;
+ else
+ inc = 1;
+
for (channel = 0; channel < 4; channel++) {
if (c->dispatch_width == 16 && brw->has_compr4) {
/* By setting the high bit of the MRF register number, we indicate
@@ -1316,6 +1360,16 @@ void emit_fb_write(struct brw_wm_compile *c,
brw_MOV(p,
brw_message_reg(nr + channel + BRW_MRF_COMPR4),
arg0[channel]);
+ } else if (intel->gen >= 6) {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, brw_message_reg(nr + channel + step), arg0[channel]);
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p,
+ brw_message_reg(nr + channel + step + 1),
+ sechalf(arg0[channel]));
+ ++step;
+ }
} else {
/* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
/* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
@@ -1334,7 +1388,12 @@ void emit_fb_write(struct brw_wm_compile *c,
}
/* skip over the regs populated above:
*/
- nr += 8;
+ /* XXX care for SIMD8 write? */
+ if (c->dispatch_width == 16)
+ nr += 8;
+ else
+ nr += 4;
+
brw_pop_insn_state(p);
if (c->key.source_depth_to_render_target)
@@ -1371,7 +1430,7 @@ void emit_fb_write(struct brw_wm_compile *c,
if (c->key.aa_dest_stencil_reg)
emit_aa(c, arg1, 2);
- fire_fb_write(c, 0, nr, target, eot);
+ fire_fb_write(c, base_reg, nr, target, eot);
}
else {
struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
@@ -1496,9 +1555,12 @@ static void spill_values( struct brw_wm_compile *c,
void brw_wm_emit( struct brw_wm_compile *c )
{
struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
GLuint insn;
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ if (intel->gen >= 6)
+ brw_set_acc_write_control(p, 1);
/* Check if any of the payload regs need to be spilled:
*/
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index f13b0aaf957..ebce4857deb 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -429,7 +429,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
if (inst->DstReg.WriteMask == WRITEMASK_XY &&
!c->wm_regs[inst->DstReg.File][inst->DstReg.Index][0].inited &&
!c->wm_regs[inst->DstReg.File][inst->DstReg.Index][1].inited &&
- (IS_G4X(intel->intelScreen->deviceID) || intel->gen == 5)) {
+ (IS_G4X(intel->intelScreen->deviceID) || intel->gen >= 5)) {
int grf;
for (grf = c->first_free_grf & ~1;
@@ -1813,6 +1813,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
+ if (intel->gen >= 6)
+ brw_set_acc_write_control(p, 1);
+
for (i = 0; i < c->nr_fp_insns; i++) {
const struct prog_instruction *inst = &c->prog_instructions[i];
int dst_flags;
@@ -1846,10 +1849,13 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
if (inst->SaturateMode == SATURATE_ZERO_ONE)
dst_flags |= SATURATE;
- if (inst->CondUpdate)
- brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
- else
- brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+ /* gen6 has different usage for conditionalmod */
+ if (intel->gen < 6) {
+ if (inst->CondUpdate)
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ else
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+ }
switch (inst->Opcode) {
case WM_PIXELXY:
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c
index cefc93ba48b..4dcd82b7692 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c
@@ -54,15 +54,19 @@ upload_gs_state(struct brw_context *brw)
(0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
(0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
OUT_BATCH(0); /* scratch space base offset */
+ fprintf(stderr, "[GS] vertex urb read length %d\n", brw->gs.prog_data->urb_read_length);
OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
(brw->gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT) |
(0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
GEN6_GS_STATISTICS_ENABLE |
- GEN6_GS_RENDERING_ENABLE);
- OUT_BATCH(GEN6_GS_ENABLE);
+ GEN6_GS_RENDERING_ENABLE); /* XXX true for debug VS? */
+ OUT_BATCH(GEN6_GS_DISCARD_ADJACENY |
+ //GEN6_GS_ENABLE);
+ 0);
ADVANCE_BATCH();
} else {
+ fprintf(stderr, "[GS] disabled\n");
BEGIN_BATCH(7);
OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2));
OUT_BATCH(0); /* prog_bo */
diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c
index 301c68e7f9e..84bea323f8a 100644
--- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c
@@ -107,7 +107,9 @@ const struct brw_tracked_state gen6_sf_vp = {
static void prepare_viewport_state_pointers(struct brw_context *brw)
{
- brw_add_validated_bo(brw, brw->sf.state_bo);
+ brw_add_validated_bo(brw, brw->clip.vp_bo);
+ brw_add_validated_bo(brw, brw->sf.vp_bo);
+ brw_add_validated_bo(brw, brw->cc.vp_bo);
}
static void upload_viewport_state_pointers(struct brw_context *brw)
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index 4080a9dedfd..b97af92df74 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -44,7 +44,10 @@ upload_vs_state(struct brw_context *brw)
drm_intel_bo *constant_bo;
int i;
- if (vp->use_const_buffer || nr_params == 0) {
+ fprintf(stderr, "[VS] nr_params %d\n", nr_params);
+
+ if (nr_params == 0) {
+ fprintf(stderr, "[VS] disable constant buffer\n");
/* Disable the push constant buffers. */
BEGIN_BATCH(5);
OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2));
diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h
index 72a74322ee5..b5f180bbc88 100644
--- a/src/mesa/drivers/dri/intel/intel_chipset.h
+++ b/src/mesa/drivers/dri/intel/intel_chipset.h
@@ -73,6 +73,7 @@
#define PCI_CHIP_SANDYBRIDGE 0x0102
#define PCI_CHIP_SANDYBRIDGE_M 0x0106
+#define PCI_CHIP_SANDYBRIDGE_M_D0 0x0126
#define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \
devid == PCI_CHIP_I915_GM || \
@@ -119,7 +120,8 @@
#define IS_IRONLAKE(devid) IS_GEN5(devid)
#define IS_GEN6(devid) (devid == PCI_CHIP_SANDYBRIDGE || \
- devid == PCI_CHIP_SANDYBRIDGE_M)
+ devid == PCI_CHIP_SANDYBRIDGE_M || \
+ devid == PCI_CHIP_SANDYBRIDGE_M_D0)
#define IS_965(devid) (IS_GEN4(devid) || \
IS_G4X(devid) || \
diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c
index 3c221188660..432ffe50d83 100644
--- a/src/mesa/drivers/dri/intel/intel_clear.c
+++ b/src/mesa/drivers/dri/intel/intel_clear.c
@@ -76,7 +76,7 @@ intelClear(GLcontext *ctx, GLbitfield mask)
intel->front_buffer_dirty = GL_TRUE;
}
- if (0)
+ if (1)
fprintf(stderr, "%s\n", __FUNCTION__);
/* HW color buffers (front, back, aux, generic FBO, etc) */
@@ -146,6 +146,15 @@ intelClear(GLcontext *ctx, GLbitfield mask)
/* SW fallback clearing */
swrast_mask = mask & ~tri_mask & ~blit_mask;
+#if 1
+ /* XXX debug */
+ if (intel->gen >= 6) {
+ blit_mask = 0;
+ tri_mask = 0;
+ swrast_mask = 1;
+ }
+#endif
+
{
/* look for non-Intel renderbuffers (clear them with swrast) */
GLbitfield blit_or_tri = blit_mask | tri_mask;
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index e19f44035fd..f2c901b6628 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -508,6 +508,7 @@ static const struct dri_debug_control debug_control[] = {
{ "urb", DEBUG_URB },
{ "vs", DEBUG_VS },
{ "clip", DEBUG_CLIP },
+ { "aub", DEBUG_AUB },
{ NULL, 0 }
};
@@ -783,6 +784,15 @@ intelInitContext(struct intel_context *intel,
if (INTEL_DEBUG & DEBUG_BUFMGR)
dri_bufmgr_set_debug(intel->bufmgr, GL_TRUE);
+ if (INTEL_DEBUG & DEBUG_AUB) {
+ fprintf(stderr, "Enable Aub file dump.\n");
+ intel->aub_file = fopen("i965.aub", "w");
+ if (intel->aub_file)
+ drm_intel_bufmgr_gem_set_aubfile(intel->bufmgr, intel->aub_file);
+ else
+ fprintf(stderr, "Fail to create aub file.\n");
+ }
+
intel->batch = intel_batchbuffer_alloc(intel);
intel_fbo_init(intel);
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index f245ba843bd..89babe7f943 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -250,6 +250,7 @@ struct intel_context
* Configuration cache
*/
driOptionCache optionCache;
+ FILE *aub_file;
};
extern char *__progname;
@@ -344,6 +345,7 @@ extern int INTEL_DEBUG;
#define DEBUG_VS 0x2000000
#define DEBUG_GLSL_FORCE 0x4000000
#define DEBUG_CLIP 0x8000000
+#define DEBUG_AUB 0x10000000
#define DBG(...) do { \
if (INTEL_DEBUG & FILE_DEBUG_FLAG) \
diff --git a/src/mesa/drivers/dri/intel/intel_decode.c b/src/mesa/drivers/dri/intel/intel_decode.c
index 25b4131594f..d1dcb1e1cfb 100644
--- a/src/mesa/drivers/dri/intel/intel_decode.c
+++ b/src/mesa/drivers/dri/intel/intel_decode.c
@@ -1582,14 +1582,15 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int
{ 0x6101, 6, 6, "STATE_BASE_ADDRESS" },
{ 0x6102, 2, 2 , "STATE_SIP" },
{ 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" },
+ { 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT_CTG+" },
{ 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" },
{ 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" },
{ 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" },
{ 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" },
- { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" },
{ 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" },
{ 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" },
{ 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" },
+ { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" },
{ 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" },
{ 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" },
{ 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" },
@@ -1601,16 +1602,19 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int
{ 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" },
{ 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" },
{ 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" },
+ { 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" },
{ 0x7b00, 6, 6, "3DPRIMITIVE" },
+ /* gen6 */
{ 0x7802, 4, 4, "3DSTATE_SAMPLER_STATE_POINTERS" },
{ 0x7805, 3, 3, "3DSTATE_URB" },
+ { 0x780d, 4, 4, "3DSTATE_VIEWPORT_STATE_POINTERS" },
{ 0x780e, 4, 4, "3DSTATE_CC_STATE_POINTERS" },
+ { 0x780f, 2, 2, "3DSTATE_SCISSOR_STATE_POINTERS" },
{ 0x7810, 6, 6, "3DSTATE_VS_STATE" },
{ 0x7811, 7, 7, "3DSTATE_GS_STATE" },
{ 0x7812, 4, 4, "3DSTATE_CLIP_STATE" },
{ 0x7813, 20, 20, "3DSTATE_SF_STATE" },
{ 0x7814, 9, 9, "3DSTATE_WM_STATE" },
- { 0x7812, 4, 4, "3DSTATE_CLIP_STATE" },
{ 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" },
{ 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" },
{ 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" },
@@ -1709,7 +1713,7 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int
"GS mod %d, PS mod %d\n",
(data[0] & (1 << 8)) != 0,
(data[0] & (1 << 9)) != 0,
- (data[0] & (1 << 10)) != 0);
+ (data[0] & (1 << 12)) != 0);
instr_out(data, hw_offset, 1, "VS binding table\n");
instr_out(data, hw_offset, 2, "GS binding table\n");
instr_out(data, hw_offset, 3, "WM binding table\n");
@@ -1726,9 +1730,17 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int
instr_out(data, hw_offset, 0, "3DSTATE_VERTEX_BUFFERS\n");
for (i = 1; i < len;) {
+ int idx, access;
+ if (IS_GEN6(devid)) {
+ idx = 26;
+ access = 20;
+ } else {
+ idx = 27;
+ access = 26;
+ }
instr_out(data, hw_offset, i, "buffer %d: %s, pitch %db\n",
- data[i] >> 27,
- data[i] & (1 << 26) ? "random" : "sequential",
+ data[i] >> idx,
+ data[i] & (1 << access) ? "random" : "sequential",
data[i] & 0x07ff);
i++;
instr_out(data, hw_offset, i++, "buffer address\n");
@@ -1746,10 +1758,18 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int
instr_out(data, hw_offset, 0, "3DSTATE_VERTEX_ELEMENTS\n");
for (i = 1; i < len;) {
+ int index, valid;
+ if (IS_GEN6(devid)) {
+ index = 26;
+ valid = 25;
+ } else {
+ index = 27;
+ valid = 26;
+ }
instr_out(data, hw_offset, i, "buffer %d: %svalid, type 0x%04x, "
"src offset 0x%04x bytes\n",
- data[i] >> 27,
- data[i] & (1 << 26) ? "" : "in",
+ data[i] >> index,
+ data[i] & (1 << valid) ? "" : "in",
(data[i] >> 16) & 0x1ff,
data[i] & 0x07ff);
i++;
@@ -1834,27 +1854,49 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int
case 0x7a00:
len = (data[0] & 0xff) + 2;
- if (len != 4)
+ if (IS_GEN6(devid)) {
+ if (len != 4 && len != 5)
+ fprintf(out, "Bad count in PIPE_CONTROL\n");
+ } else if (len != 4)
fprintf(out, "Bad count in PIPE_CONTROL\n");
if (count < len)
BUFFER_FAIL(count, len, "PIPE_CONTROL");
- switch ((data[0] >> 14) & 0x3) {
+ if (IS_GEN6(devid))
+ i = data[1];
+ else
+ i = data[0];
+ switch ((i >> 14) & 0x3) {
case 0: desc1 = "no write"; break;
case 1: desc1 = "qword write"; break;
case 2: desc1 = "PS_DEPTH_COUNT write"; break;
case 3: desc1 = "TIMESTAMP write"; break;
}
- instr_out(data, hw_offset, 0,
- "PIPE_CONTROL: %s, %sdepth stall, %sRC write flush, "
- "%sinst flush\n",
- desc1,
- data[0] & (1 << 13) ? "" : "no ",
- data[0] & (1 << 12) ? "" : "no ",
- data[0] & (1 << 11) ? "" : "no ");
- instr_out(data, hw_offset, 1, "destination address\n");
- instr_out(data, hw_offset, 2, "immediate dword low\n");
- instr_out(data, hw_offset, 3, "immediate dword high\n");
+ if (IS_GEN6(devid)) {
+ instr_out(data, hw_offset, 0, "PIPE_CONTROL:\n");
+ instr_out(data, hw_offset, 1,
+ "%s, %sdepth stall, %sRC write flush, "
+ "%sinst flush\n",
+ desc1,
+ data[1] & (1 << 13) ? "" : "no ",
+ data[1] & (1 << 12) ? "" : "no ",
+ data[1] & (1 << 11) ? "" : "no ");
+ instr_out(data, hw_offset, 2, "destination address\n");
+ instr_out(data, hw_offset, 3, "immediate dword low\n");
+ if (len == 5)
+ instr_out(data, hw_offset, 4, "immediate dword high\n");
+ } else {
+ instr_out(data, hw_offset, 0,
+ "PIPE_CONTROL: %s, %sdepth stall, %sRC write flush, "
+ "%sinst flush\n",
+ desc1,
+ data[0] & (1 << 13) ? "" : "no ",
+ data[0] & (1 << 12) ? "" : "no ",
+ data[0] & (1 << 11) ? "" : "no ");
+ instr_out(data, hw_offset, 1, "destination address\n");
+ instr_out(data, hw_offset, 2, "immediate dword low\n");
+ instr_out(data, hw_offset, 3, "immediate dword high\n");
+ }
return len;
case 0x7b00:
diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h
index c1a281f261e..955b100b212 100644
--- a/src/mesa/drivers/dri/intel/intel_reg.h
+++ b/src/mesa/drivers/dri/intel/intel_reg.h
@@ -55,6 +55,11 @@
* additional flushing control.
*/
#define _3DSTATE_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24) | 2)
+#define PIPE_CONTROL_CS_STALL (1 << 20)
+#define PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET (1 << 19)
+#define PIPE_CONTROL_TLB_INVALIDATE (1 << 18)
+#define PIPE_CONTROL_SYNC_GFDT (1 << 17)
+#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1 << 16)
#define PIPE_CONTROL_NO_WRITE (0 << 14)
#define PIPE_CONTROL_WRITE_IMMEDIATE (1 << 14)
#define PIPE_CONTROL_WRITE_DEPTH_COUNT (2 << 14)
@@ -62,7 +67,14 @@
#define PIPE_CONTROL_DEPTH_STALL (1 << 13)
#define PIPE_CONTROL_WRITE_FLUSH (1 << 12)
#define PIPE_CONTROL_INSTRUCTION_FLUSH (1 << 11)
+#define PIPE_CONTROL_TC_FLUSH (1 << 10) /* GM45+ only */
+#define PIPE_CONTROL_ISP_DIS (1 << 9)
#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8)
+/* GT */
+#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4)
+#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3)
+#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2)
+#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
#define PIPE_CONTROL_PPGTT_WRITE (0 << 2)
#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2)