summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuc Verhaegen <libv@skynet.be>2010-03-14 05:46:05 +0100
committerLuc Verhaegen <libv@skynet.be>2010-03-14 05:46:05 +0100
commit1dece368be5a87d9ddc972103b4d9f6b6e1aec24 (patch)
treedd5fb17f4d0fc790e7a63387e82d42faaedf5644
parentda7796a7d40ca2cd4a03e99ddf38e3a7256a401f (diff)
Import i915 and i965 dri drivers from mesa 7.5.2.7.5.2
-rw-r--r--configure.ac2
-rw-r--r--i915/Makefile.am2
-rw-r--r--i965/Makefile.am1
-rw-r--r--i965/brw_clip.c3
-rw-r--r--i965/brw_context.h5
-rw-r--r--i965/brw_draw.c1
-rw-r--r--i965/brw_draw_upload.c90
-rw-r--r--i965/brw_eu.c2
-rw-r--r--i965/brw_eu_emit.c26
-rw-r--r--i965/brw_sf_state.c6
-rw-r--r--i965/brw_structs.h2
-rw-r--r--i965/brw_tex_layout.c10
-rw-r--r--i965/brw_vs_emit.c74
-rw-r--r--i965/brw_wm.c2
-rw-r--r--i965/brw_wm.h1
-rw-r--r--i965/brw_wm_fp.c6
-rw-r--r--i965/brw_wm_iz.c3
-rw-r--r--shared/intel_batchbuffer.c5
-rw-r--r--shared/intel_buffer_objects.c5
-rw-r--r--shared/intel_buffers.c17
-rw-r--r--shared/intel_chipset.h4
-rw-r--r--shared/intel_context.c45
-rw-r--r--shared/intel_context.h9
-rw-r--r--shared/intel_fbo.c5
-rw-r--r--shared/intel_pixel.c4
-rw-r--r--shared/intel_pixel_bitmap.c6
-rw-r--r--shared/intel_pixel_read.c (renamed from i915/intel_pixel_read.c)29
-rw-r--r--shared/intel_regions.c7
-rw-r--r--shared/intel_screen.c8
29 files changed, 293 insertions, 87 deletions
diff --git a/configure.ac b/configure.ac
index b73fe88..4214eda 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,7 +1,7 @@
# Process this file with autoconf to produce a configure script
AC_PREREQ(2.57)
-AC_INIT([mesa-dri-i9xx], 7.5.0, [], mesa-dri-i9xx)
+AC_INIT([mesa-dri-i9xx], 7.5.2, [], mesa-dri-i9xx)
AM_INIT_AUTOMAKE([dist-bzip2])
diff --git a/i915/Makefile.am b/i915/Makefile.am
index 2dc608c..3f74c00 100644
--- a/i915/Makefile.am
+++ b/i915/Makefile.am
@@ -32,7 +32,7 @@ i915_dri_la_SOURCES = \
../shared/intel_pixel_bitmap.c \
../shared/intel_pixel_copy.c \
../shared/intel_pixel_draw.c \
- intel_pixel_read.c \
+ ../shared/intel_pixel_read.c \
../shared/intel_buffers.c \
../shared/intel_blit.c \
../shared/intel_swapbuffers.c \
diff --git a/i965/Makefile.am b/i965/Makefile.am
index 5a118af..546d378 100644
--- a/i965/Makefile.am
+++ b/i965/Makefile.am
@@ -25,6 +25,7 @@ i965_dri_la_SOURCES = \
../shared/intel_pixel_bitmap.c \
../shared/intel_pixel_copy.c \
../shared/intel_pixel_draw.c \
+ ../shared/intel_pixel_read.c \
../shared/intel_state.c \
../shared/intel_swapbuffers.c \
../shared/intel_tex.c \
diff --git a/i965/brw_clip.c b/i965/brw_clip.c
index 5cffceb..8fc9f89 100644
--- a/i965/brw_clip.c
+++ b/i965/brw_clip.c
@@ -152,7 +152,8 @@ static void upload_clip_prog(struct brw_context *brw)
/* _NEW_POLYGON */
if (key.primitive == GL_TRIANGLES) {
- if (ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
+ if (ctx->Polygon.CullFlag &&
+ ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
key.clip_mode = BRW_CLIPMODE_REJECT_ALL;
else {
GLuint fill_front = CLIP_CULL;
diff --git a/i965/brw_context.h b/i965/brw_context.h
index 577497b..5cf12fb 100644
--- a/i965/brw_context.h
+++ b/i965/brw_context.h
@@ -386,6 +386,8 @@ struct brw_cached_batch_item {
struct brw_vertex_element {
const struct gl_client_array *glarray;
+ /** The corresponding Mesa vertex attribute */
+ gl_vert_attrib attrib;
/** Size of a complete element */
GLuint element_size;
/** Number of uploaded elements for this input. */
@@ -477,6 +479,9 @@ struct brw_context
struct {
struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
+ struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
+ GLuint nr_enabled;
+
#define BRW_NR_UPLOAD_BUFS 17
#define BRW_UPLOAD_INIT_SIZE (128*1024)
diff --git a/i965/brw_draw.c b/i965/brw_draw.c
index 5342622..54b0661 100644
--- a/i965/brw_draw.c
+++ b/i965/brw_draw.c
@@ -185,6 +185,7 @@ static void brw_merge_inputs( struct brw_context *brw,
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
brw->vb.inputs[i].glarray = arrays[i];
+ brw->vb.inputs[i].attrib = (gl_vert_attrib) i;
if (arrays[i]->StrideB != 0)
brw->vb.info.varying |= 1 << i;
diff --git a/i965/brw_draw_upload.c b/i965/brw_draw_upload.c
index b91b20b..fd9c391 100644
--- a/i965/brw_draw_upload.c
+++ b/i965/brw_draw_upload.c
@@ -343,16 +343,13 @@ static void brw_prepare_vertices(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = intel_context(ctx);
- GLuint tmp = brw->vs.prog_data->inputs_read;
+ GLbitfield vs_inputs = brw->vs.prog_data->inputs_read;
GLuint i;
const unsigned char *ptr = NULL;
GLuint interleave = 0;
unsigned int min_index = brw->vb.min_index;
unsigned int max_index = brw->vb.max_index;
- struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
- GLuint nr_enabled = 0;
-
struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
GLuint nr_uploads = 0;
@@ -362,12 +359,13 @@ static void brw_prepare_vertices(struct brw_context *brw)
_mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
/* Accumulate the list of enabled arrays. */
- while (tmp) {
- GLuint i = _mesa_ffsll(tmp)-1;
+ brw->vb.nr_enabled = 0;
+ while (vs_inputs) {
+ GLuint i = _mesa_ffsll(vs_inputs) - 1;
struct brw_vertex_element *input = &brw->vb.inputs[i];
- tmp &= ~(1<<i);
- enabled[nr_enabled++] = input;
+ vs_inputs &= ~(1 << i);
+ brw->vb.enabled[brw->vb.nr_enabled++] = input;
}
/* XXX: In the rare cases where this happens we fallback all
@@ -376,13 +374,13 @@ static void brw_prepare_vertices(struct brw_context *brw)
* cases with > 17 vertex attributes enabled, so it probably
* isn't an issue at this point.
*/
- if (nr_enabled >= BRW_VEP_MAX) {
+ if (brw->vb.nr_enabled >= BRW_VEP_MAX) {
intel->Fallback = 1;
return;
}
- for (i = 0; i < nr_enabled; i++) {
- struct brw_vertex_element *input = enabled[i];
+ for (i = 0; i < brw->vb.nr_enabled; i++) {
+ struct brw_vertex_element *input = brw->vb.enabled[i];
input->element_size = get_size(input->glarray->Type) * input->glarray->Size;
input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1;
@@ -398,6 +396,20 @@ static void brw_prepare_vertices(struct brw_context *brw)
dri_bo_reference(input->bo);
input->offset = (unsigned long)input->glarray->Ptr;
input->stride = input->glarray->StrideB;
+
+ /* This is a common place to reach if the user mistakenly supplies
+ * a pointer in place of a VBO offset. If we just let it go through,
+ * we may end up dereferencing a pointer beyond the bounds of the
+ * GTT. We would hope that the VBO's max_index would save us, but
+ * Mesa appears to hand us min/max values not clipped to the
+ * array object's _MaxElement, and _MaxElement frequently appears
+ * to be wrong anyway.
+ *
+ * The VBO spec allows application termination in this case, and it's
+ * probably a service to the poor programmer to do so rather than
+ * trying to just not render.
+ */
+ assert(input->offset < input->bo->size);
} else {
if (input->bo != NULL) {
/* Already-uploaded vertex data is present from a previous
@@ -410,7 +422,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
/* Queue the buffer object up to be uploaded in the next pass,
* when we've decided if we're doing interleaved or not.
*/
- if (i == 0) {
+ if (input->attrib == VERT_ATTRIB_POS) {
/* Position array not properly enabled:
*/
if (input->glarray->StrideB == 0) {
@@ -466,8 +478,8 @@ static void brw_prepare_vertices(struct brw_context *brw)
brw_prepare_query_begin(brw);
- for (i = 0; i < nr_enabled; i++) {
- struct brw_vertex_element *input = enabled[i];
+ for (i = 0; i < brw->vb.nr_enabled; i++) {
+ struct brw_vertex_element *input = brw->vb.enabled[i];
brw_add_validated_bo(brw, input->bo);
}
@@ -477,34 +489,44 @@ static void brw_emit_vertices(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = intel_context(ctx);
- GLuint tmp = brw->vs.prog_data->inputs_read;
- struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
GLuint i;
- GLuint nr_enabled = 0;
- /* Accumulate the list of enabled arrays. */
- while (tmp) {
- i = _mesa_ffsll(tmp)-1;
- struct brw_vertex_element *input = &brw->vb.inputs[i];
+ brw_emit_query_begin(brw);
- tmp &= ~(1<<i);
- enabled[nr_enabled++] = input;
+ /* If the VS doesn't read any inputs (calculating vertex position from
+ * a state variable for some reason, for example), emit a single pad
+ * VERTEX_ELEMENT struct and bail.
+ *
+ * The stale VB state stays in place, but they don't do anything unless
+ * a VE loads from them.
+ */
+ if (brw->vb.nr_enabled == 0) {
+ BEGIN_BATCH(3, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
+ OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
+ BRW_VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
+ (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+ OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
+ (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+ (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+ (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
+ ADVANCE_BATCH();
+ return;
}
- brw_emit_query_begin(brw);
-
/* Now emit VB and VEP state packets.
*
* This still defines a hardware VB for each input, even if they
* are interleaved or from the same VBO. TBD if this makes a
* performance difference.
*/
- BEGIN_BATCH(1 + nr_enabled * 4, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS);
OUT_BATCH((CMD_VERTEX_BUFFER << 16) |
- ((1 + nr_enabled * 4) - 2));
+ ((1 + brw->vb.nr_enabled * 4) - 2));
- for (i = 0; i < nr_enabled; i++) {
- struct brw_vertex_element *input = enabled[i];
+ for (i = 0; i < brw->vb.nr_enabled; i++) {
+ struct brw_vertex_element *input = brw->vb.enabled[i];
OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) |
BRW_VB0_ACCESS_VERTEXDATA |
@@ -517,10 +539,10 @@ static void brw_emit_vertices(struct brw_context *brw)
}
ADVANCE_BATCH();
- BEGIN_BATCH(1 + nr_enabled * 2, IGNORE_CLIPRECTS);
- OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr_enabled * 2) - 2));
- for (i = 0; i < nr_enabled; i++) {
- struct brw_vertex_element *input = enabled[i];
+ BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2));
+ for (i = 0; i < brw->vb.nr_enabled; i++) {
+ struct brw_vertex_element *input = brw->vb.enabled[i];
uint32_t format = get_surface_type(input->glarray->Type,
input->glarray->Size,
input->glarray->Format,
@@ -635,7 +657,7 @@ static void brw_emit_indices(struct brw_context *brw)
if (index_buffer == NULL)
return;
- ib_size = get_size(index_buffer->type) * index_buffer->count;
+ ib_size = get_size(index_buffer->type) * index_buffer->count - 1;
/* Emit the indexbuffer packet:
*/
diff --git a/i965/brw_eu.c b/i965/brw_eu.c
index c53efba..1df5613 100644
--- a/i965/brw_eu.c
+++ b/i965/brw_eu.c
@@ -62,7 +62,7 @@ void brw_set_predicate_control( struct brw_compile *p, GLuint pc )
void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional )
{
- p->current->header.destreg__conditonalmod = conditional;
+ p->current->header.destreg__conditionalmod = conditional;
}
void brw_set_access_mode( struct brw_compile *p, GLuint access_mode )
diff --git a/i965/brw_eu_emit.c b/i965/brw_eu_emit.c
index 2a147fb..48243e1 100644
--- a/i965/brw_eu_emit.c
+++ b/i965/brw_eu_emit.c
@@ -377,8 +377,8 @@ static struct brw_instruction *next_insn( struct brw_compile *p,
/* Reset this one-shot flag:
*/
- if (p->current->header.destreg__conditonalmod) {
- p->current->header.destreg__conditonalmod = 0;
+ if (p->current->header.destreg__conditionalmod) {
+ p->current->header.destreg__conditionalmod = 0;
p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
}
@@ -746,7 +746,7 @@ void brw_CMP(struct brw_compile *p,
{
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
- insn->header.destreg__conditonalmod = conditional;
+ insn->header.destreg__conditionalmod = conditional;
brw_set_dest(insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, src1);
@@ -790,7 +790,7 @@ void brw_math( struct brw_compile *p,
* instructions.
*/
insn->header.predicate_control = 0;
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_dest(insn, dest);
brw_set_src0(insn, src);
@@ -826,7 +826,7 @@ void brw_math_16( struct brw_compile *p,
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
insn = next_insn(p, BRW_OPCODE_SEND);
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_dest(insn, dest);
brw_set_src0(insn, src);
@@ -842,7 +842,7 @@ void brw_math_16( struct brw_compile *p,
*/
insn = next_insn(p, BRW_OPCODE_SEND);
insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
- insn->header.destreg__conditonalmod = msg_reg_nr+1;
+ insn->header.destreg__conditionalmod = msg_reg_nr+1;
brw_set_dest(insn, offset(dest,1));
brw_set_src0(insn, src);
@@ -888,7 +888,7 @@ void brw_dp_WRITE_16( struct brw_compile *p,
insn->header.predicate_control = 0; /* XXX */
insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_dest(insn, dest);
brw_set_src0(insn, src);
@@ -933,7 +933,7 @@ void brw_dp_READ_16( struct brw_compile *p,
insn->header.predicate_control = 0; /* XXX */
insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_dest(insn, dest); /* UW? */
brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
@@ -986,7 +986,7 @@ void brw_dp_READ_4( struct brw_compile *p,
insn->header.predicate_control = BRW_PREDICATE_NONE;
insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
insn->header.mask_control = BRW_MASK_DISABLE;
/* cast dest to a uword[8] vector */
@@ -1059,7 +1059,7 @@ void brw_dp_READ_4_vs(struct brw_compile *p,
insn->header.predicate_control = BRW_PREDICATE_NONE;
insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
insn->header.mask_control = BRW_MASK_DISABLE;
/*insn->header.access_mode = BRW_ALIGN_16;*/
@@ -1092,7 +1092,7 @@ void brw_fb_WRITE(struct brw_compile *p,
insn->header.predicate_control = 0; /* XXX */
insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_dest(insn, dest);
brw_set_src0(insn, src0);
@@ -1187,7 +1187,7 @@ void brw_SAMPLE(struct brw_compile *p,
insn->header.predicate_control = 0; /* XXX */
insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_dest(insn, dest);
brw_set_src0(insn, src0);
@@ -1238,7 +1238,7 @@ void brw_urb_WRITE(struct brw_compile *p,
brw_set_src0(insn, src0);
brw_set_src1(insn, brw_imm_d(0));
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_urb_message(insn,
allocate,
diff --git a/i965/brw_sf_state.c b/i965/brw_sf_state.c
index c999187..38733c8 100644
--- a/i965/brw_sf_state.c
+++ b/i965/brw_sf_state.c
@@ -233,7 +233,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
else if (sf.sf6.line_width <= 0x2)
sf.sf6.line_width = 0;
- /* _NEW_POINT */
+ /* _NEW_BUFFERS */
key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
if (!key->render_to_fbo) {
/* Rendering to an OpenGL window */
@@ -263,6 +263,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
}
/* XXX clamp max depends on AA vs. non-AA */
+ /* _NEW_POINT */
sf.sf7.sprite_point = key->point_sprite;
sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3);
sf.sf7.use_point_size_state = !key->point_attenuated;
@@ -328,7 +329,8 @@ const struct brw_tracked_state brw_sf_unit = {
.mesa = (_NEW_POLYGON |
_NEW_LINE |
_NEW_POINT |
- _NEW_SCISSOR),
+ _NEW_SCISSOR |
+ _NEW_BUFFERS),
.brw = BRW_NEW_URB_FENCE,
.cache = (CACHE_NEW_SF_VP |
CACHE_NEW_SF_PROG)
diff --git a/i965/brw_structs.h b/i965/brw_structs.h
index 89e2981..6011a4a 100644
--- a/i965/brw_structs.h
+++ b/i965/brw_structs.h
@@ -1168,7 +1168,7 @@ struct brw_instruction
GLuint predicate_control:4;
GLuint predicate_inverse:1;
GLuint execution_size:3;
- GLuint destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */
+ GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */
GLuint pad0:2;
GLuint debug_control:1;
GLuint saturate:1;
diff --git a/i965/brw_tex_layout.c b/i965/brw_tex_layout.c
index 51a617f..3ab27c2 100644
--- a/i965/brw_tex_layout.c
+++ b/i965/brw_tex_layout.c
@@ -119,6 +119,16 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t
}
}
+ /* The 965's sampler lays cachelines out according to how accesses
+ * in the texture surfaces run, so they may be "vertical" through
+ * memory. As a result, the docs say in Surface Padding Requirements:
+ * Sampling Engine Surfaces that two extra rows of padding are required.
+ * We don't know of similar requirements for pre-965, but given that
+ * those docs are silent on padding requirements in general, let's play
+ * it safe.
+ */
+ if (mt->target == GL_TEXTURE_CUBE_MAP)
+ mt->total_height += 2;
break;
}
diff --git a/i965/brw_vs_emit.c b/i965/brw_vs_emit.c
index b69616d..72cd36a 100644
--- a/i965/brw_vs_emit.c
+++ b/i965/brw_vs_emit.c
@@ -68,6 +68,7 @@ static void release_tmps( struct brw_vs_compile *c )
static void brw_vs_alloc_regs( struct brw_vs_compile *c )
{
GLuint i, reg = 0, mrf;
+ int attributes_in_vue;
#if 0
if (c->vp->program.Base.Parameters->NumParameters >= 6)
@@ -123,6 +124,11 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
reg++;
}
}
+ /* If there are no inputs, we'll still be reading one attribute's worth
+ * because it's required -- see urb_read_length setting.
+ */
+ if (c->nr_inputs == 0)
+ reg++;
/* Allocate outputs: TODO: could organize the non-position outputs
* to go straight into message regs.
@@ -200,8 +206,20 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
* vertex urb, so is half the amount:
*/
c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2;
+ /* Setting this field to 0 leads to undefined behavior according to the
+ * the VS_STATE docs. Our VUEs will always have at least one attribute
+ * sitting in them, even if it's padding.
+ */
+ if (c->prog_data.urb_read_length == 0)
+ c->prog_data.urb_read_length = 1;
+
+ /* The VS VUEs are shared by VF (outputting our inputs) and VS, so size
+ * them to fit the biggest thing they need to.
+ */
+ attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs);
+
+ c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
- c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4;
c->prog_data.total_grf = reg;
if (INTEL_DEBUG & DEBUG_VS) {
@@ -1172,20 +1190,36 @@ post_vs_emit( struct brw_vs_compile *c,
brw_set_src1(end_inst, brw_imm_d(offset * 16));
}
+static uint32_t
+get_predicate(uint32_t swizzle)
+{
+ switch (swizzle) {
+ case SWIZZLE_XXXX:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_X;
+ case SWIZZLE_YYYY:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_Y;
+ case SWIZZLE_ZZZZ:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_Z;
+ case SWIZZLE_WWWW:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_W;
+ default:
+ _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n", swizzle);
+ return BRW_PREDICATE_NORMAL;
+ }
+}
/* Emit the vertex program instructions here.
*/
void brw_vs_emit(struct brw_vs_compile *c )
{
-#define MAX_IFSN 32
+#define MAX_IF_DEPTH 32
struct brw_compile *p = &c->func;
- GLuint nr_insns = c->vp->program.Base.NumInstructions;
- GLuint insn, if_insn = 0;
+ const GLuint nr_insns = c->vp->program.Base.NumInstructions;
+ GLuint insn, if_depth = 0;
GLuint end_offset = 0;
struct brw_instruction *end_inst, *last_inst;
- struct brw_instruction *if_inst[MAX_IFSN];
- struct brw_indirect stack_index = brw_indirect(0, 0);
-
+ struct brw_instruction *if_inst[MAX_IF_DEPTH];
+ const struct brw_indirect stack_index = brw_indirect(0, 0);
GLuint index;
GLuint file;
@@ -1376,15 +1410,18 @@ void brw_vs_emit(struct brw_vs_compile *c )
emit_xpd(p, dst, args[0], args[1]);
break;
case OPCODE_IF:
- assert(if_insn < MAX_IFSN);
- if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
+ assert(if_depth < MAX_IF_DEPTH);
+ if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8);
+ if_inst[if_depth]->header.predicate_control =
+ get_predicate(inst->DstReg.CondSwizzle);
+ if_depth++;
break;
case OPCODE_ELSE:
- if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]);
+ if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
break;
case OPCODE_ENDIF:
- assert(if_insn > 0);
- brw_ENDIF(p, if_inst[--if_insn]);
+ assert(if_depth > 0);
+ brw_ENDIF(p, if_inst[--if_depth]);
break;
case OPCODE_BRA:
brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
@@ -1430,6 +1467,19 @@ void brw_vs_emit(struct brw_vs_compile *c )
"unknown");
}
+ /* Set the predication update on the last instruction of the native
+ * instruction sequence.
+ *
+ * This would be problematic if it was set on a math instruction,
+ * but that shouldn't be the case with the current GLSL compiler.
+ */
+ if (inst->CondUpdate) {
+ struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1];
+
+ assert(hw_insn->header.destreg__conditionalmod == 0);
+ hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ;
+ }
+
if ((inst->DstReg.File == PROGRAM_OUTPUT)
&& (inst->DstReg.Index != VERT_RESULT_HPOS)
&& c->output_regs[inst->DstReg.Index].used_in_src) {
diff --git a/i965/brw_wm.c b/i965/brw_wm.c
index 8a3b7df..d4e22d5 100644
--- a/i965/brw_wm.c
+++ b/i965/brw_wm.c
@@ -202,6 +202,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
/* BRW_NEW_FRAGMENT_PROGRAM */
const struct brw_fragment_program *fp =
(struct brw_fragment_program *)brw->fragment_program;
+ GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
GLuint lookup = 0;
GLuint line_aa;
GLuint i;
@@ -263,6 +264,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
brw_wm_lookup_iz(line_aa,
lookup,
+ uses_depth,
key);
diff --git a/i965/brw_wm.h b/i965/brw_wm.h
index 295fed8..307b5e5 100644
--- a/i965/brw_wm.h
+++ b/i965/brw_wm.h
@@ -286,6 +286,7 @@ void brw_wm_print_program( struct brw_wm_compile *c,
void brw_wm_lookup_iz( GLuint line_aa,
GLuint lookup,
+ GLboolean ps_uses_depth,
struct brw_wm_prog_key *key );
GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
diff --git a/i965/brw_wm_fp.c b/i965/brw_wm_fp.c
index 49aad28..8cf3a1f 100644
--- a/i965/brw_wm_fp.c
+++ b/i965/brw_wm_fp.c
@@ -705,7 +705,11 @@ static void precalc_tex( struct brw_wm_compile *c,
tmpcoord,
0,
inst->SrcReg[0],
- scale,
+ src_swizzle(scale,
+ SWIZZLE_X,
+ SWIZZLE_Y,
+ SWIZZLE_ONE,
+ SWIZZLE_ONE),
src_undef());
coord = src_reg_from_dst(tmpcoord);
diff --git a/i965/brw_wm_iz.c b/i965/brw_wm_iz.c
index bd60ac9..7e2b1c7 100644
--- a/i965/brw_wm_iz.c
+++ b/i965/brw_wm_iz.c
@@ -118,6 +118,7 @@ const struct {
void brw_wm_lookup_iz( GLuint line_aa,
GLuint lookup,
+ GLboolean ps_uses_depth,
struct brw_wm_prog_key *key )
{
GLuint reg = 2;
@@ -127,7 +128,7 @@ void brw_wm_lookup_iz( GLuint line_aa,
if (lookup & IZ_PS_COMPUTES_DEPTH_BIT)
key->computes_depth = 1;
- if (wm_iz_table[lookup].sd_present) {
+ if (wm_iz_table[lookup].sd_present || ps_uses_depth) {
key->source_depth_reg = reg;
reg += 2;
}
diff --git a/shared/intel_batchbuffer.c b/shared/intel_batchbuffer.c
index 29dc05c..82a92a9 100644
--- a/shared/intel_batchbuffer.c
+++ b/shared/intel_batchbuffer.c
@@ -197,6 +197,11 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
GLuint used = batch->ptr - batch->map;
GLboolean was_locked = intel->locked;
+ if (intel->first_post_swapbuffers_batch == NULL) {
+ intel->first_post_swapbuffers_batch = intel->batch->buf;
+ drm_intel_bo_reference(intel->first_post_swapbuffers_batch);
+ }
+
if (used == 0) {
batch->cliprect_mode = IGNORE_CLIPRECTS;
return;
diff --git a/shared/intel_buffer_objects.c b/shared/intel_buffer_objects.c
index 2e6b778..db0de03 100644
--- a/shared/intel_buffer_objects.c
+++ b/shared/intel_buffer_objects.c
@@ -209,7 +209,10 @@ intel_bufferobj_get_subdata(GLcontext * ctx,
struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
assert(intel_obj);
- dri_bo_get_subdata(intel_obj->buffer, offset, size, data);
+ if (intel_obj->sys_buffer)
+ memcpy(data, (char *)intel_obj->sys_buffer + offset, size);
+ else
+ dri_bo_get_subdata(intel_obj->buffer, offset, size, data);
}
diff --git a/shared/intel_buffers.c b/shared/intel_buffers.c
index d2fad9e..44e3433 100644
--- a/shared/intel_buffers.c
+++ b/shared/intel_buffers.c
@@ -345,6 +345,23 @@ intelDrawBuffer(GLcontext * ctx, GLenum mode)
static void
intelReadBuffer(GLcontext * ctx, GLenum mode)
{
+ if ((ctx->DrawBuffer != NULL) && (ctx->DrawBuffer->Name == 0)) {
+ struct intel_context *const intel = intel_context(ctx);
+ const GLboolean was_front_buffer_reading =
+ intel->is_front_buffer_reading;
+
+ intel->is_front_buffer_reading = (mode == GL_FRONT_LEFT)
+ || (mode == GL_FRONT);
+
+ /* If we weren't front-buffer reading before but we are now, make sure
+ * that the front-buffer has actually been allocated.
+ */
+ if (!was_front_buffer_reading && intel->is_front_buffer_reading) {
+ intel_update_renderbuffers(intel->driContext,
+ intel->driContext->driDrawablePriv);
+ }
+ }
+
if (ctx->ReadBuffer == ctx->DrawBuffer) {
/* This will update FBO completeness status.
* A framebuffer will be incomplete if the GL_READ_BUFFER setting
diff --git a/shared/intel_chipset.h b/shared/intel_chipset.h
index 4593d90..a528d99 100644
--- a/shared/intel_chipset.h
+++ b/shared/intel_chipset.h
@@ -66,6 +66,7 @@
#define PCI_CHIP_Q45_G 0x2E12
#define PCI_CHIP_G45_G 0x2E22
#define PCI_CHIP_G41_G 0x2E32
+#define PCI_CHIP_B43_G 0x2E42
#define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \
devid == PCI_CHIP_I915_GM || \
@@ -78,7 +79,8 @@
#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
devid == PCI_CHIP_Q45_G || \
devid == PCI_CHIP_G45_G || \
- devid == PCI_CHIP_G41_G)
+ devid == PCI_CHIP_G41_G || \
+ devid == PCI_CHIP_B43_G)
#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM)
#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid))
diff --git a/shared/intel_context.c b/shared/intel_context.c
index cfd983d..e593b23 100644
--- a/shared/intel_context.c
+++ b/shared/intel_context.c
@@ -161,6 +161,9 @@ intelGetString(GLcontext * ctx, GLenum name)
case PCI_CHIP_G41_G:
chipset = "Intel(R) G41";
break;
+ case PCI_CHIP_B43_G:
+ chipset = "Intel(R) B43";
+ break;
default:
chipset = "Unknown Intel Chipset";
break;
@@ -220,7 +223,9 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
struct intel_renderbuffer *stencil_rb;
i = 0;
- if ((intel->is_front_buffer_rendering || !intel_fb->color_rb[1])
+ if ((intel->is_front_buffer_rendering ||
+ intel->is_front_buffer_reading ||
+ !intel_fb->color_rb[1])
&& intel_fb->color_rb[0]) {
attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
attachments[i++] = intel_bits_per_pixel(intel_fb->color_rb[0]);
@@ -495,7 +500,8 @@ intel_flush(GLcontext *ctx, GLboolean needs_mi_flush)
if (screen->dri2.loader &&
(screen->dri2.loader->base.version >= 2)
- && (screen->dri2.loader->flushFrontBuffer != NULL)) {
+ && (screen->dri2.loader->flushFrontBuffer != NULL) &&
+ intel->driDrawable && intel->driDrawable->loaderPrivate) {
(*screen->dri2.loader->flushFrontBuffer)(intel->driDrawable,
intel->driDrawable->loaderPrivate);
@@ -505,7 +511,7 @@ intel_flush(GLcontext *ctx, GLboolean needs_mi_flush)
* each of N places that do rendering. This has worse performances,
* but it is much easier to get correct.
*/
- if (intel->is_front_buffer_rendering) {
+ if (!intel->is_front_buffer_rendering) {
intel->front_buffer_dirty = GL_FALSE;
}
}
@@ -521,7 +527,27 @@ intelFlush(GLcontext * ctx)
static void
intel_glFlush(GLcontext *ctx)
{
+ struct intel_context *intel = intel_context(ctx);
+
intel_flush(ctx, GL_TRUE);
+
+ /* We're using glFlush as an indicator that a frame is done, which is
+ * what DRI2 does before calling SwapBuffers (and means we should catch
+ * people doing front-buffer rendering, as well)..
+ *
+ * Wait for the swapbuffers before the one we just emitted, so we don't
+ * get too many swaps outstanding for apps that are GPU-heavy but not
+ * CPU-heavy.
+ *
+ * Unfortunately, we don't have a handle to the batch containing the swap,
+ * and getting our hands on that doesn't seem worth it, so we just us the
+ * first batch we emitted after the last swap.
+ */
+ if (intel->first_post_swapbuffers_batch != NULL) {
+ drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch);
+ drm_intel_bo_unreference(intel->first_post_swapbuffers_batch);
+ intel->first_post_swapbuffers_batch = NULL;
+ }
}
void
@@ -787,6 +813,8 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
intel->prim.vb = NULL;
dri_bo_unreference(intel->prim.vb_bo);
intel->prim.vb_bo = NULL;
+ dri_bo_unreference(intel->first_post_swapbuffers_batch);
+ intel->first_post_swapbuffers_batch = NULL;
if (release_texture_heaps) {
/* This share group is about to go away, free our private
@@ -804,12 +832,23 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
/* free the Mesa context */
_mesa_free_context_data(&intel->ctx);
+
+ FREE(intel);
+ driContextPriv->driverPrivate = NULL;
}
}
GLboolean
intelUnbindContext(__DRIcontextPrivate * driContextPriv)
{
+ struct intel_context *intel =
+ (struct intel_context *) driContextPriv->driverPrivate;
+
+ /* Deassociate the context with the drawables.
+ */
+ intel->driDrawable = NULL;
+ intel->driReadDrawable = NULL;
+
return GL_TRUE;
}
diff --git a/shared/intel_context.h b/shared/intel_context.h
index b5cf7e6..e2b3943 100644
--- a/shared/intel_context.h
+++ b/shared/intel_context.h
@@ -191,6 +191,7 @@ struct intel_context
GLboolean ttm;
struct intel_batchbuffer *batch;
+ drm_intel_bo *first_post_swapbuffers_batch;
GLboolean no_batch_wrap;
unsigned batch_id;
@@ -294,6 +295,14 @@ struct intel_context
* easily.
*/
GLboolean is_front_buffer_rendering;
+ /**
+ * Track whether front-buffer is the current read target.
+ *
+ * This is closely associated with is_front_buffer_rendering, but may
+ * be set separately. The DRI2 fake front buffer must be referenced
+ * either way.
+ */
+ GLboolean is_front_buffer_reading;
drm_clip_rect_t fboRect; /**< cliprect for FBO rendering */
diff --git a/shared/intel_fbo.c b/shared/intel_fbo.c
index 30f58b1..ed7c78e 100644
--- a/shared/intel_fbo.c
+++ b/shared/intel_fbo.c
@@ -680,6 +680,11 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb)
if (rb == NULL)
continue;
+ if (irb == NULL) {
+ fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
+ continue;
+ }
+
switch (irb->texformat->MesaFormat) {
case MESA_FORMAT_ARGB8888:
case MESA_FORMAT_RGB565:
diff --git a/shared/intel_pixel.c b/shared/intel_pixel.c
index fc0ac0b..defb80f 100644
--- a/shared/intel_pixel.c
+++ b/shared/intel_pixel.c
@@ -342,10 +342,8 @@ intelInitPixelFuncs(struct dd_function_table *functions)
functions->Bitmap = intelBitmap;
functions->CopyPixels = intelCopyPixels;
functions->DrawPixels = intelDrawPixels;
-#ifdef I915
- functions->ReadPixels = intelReadPixels;
-#endif
}
+ functions->ReadPixels = intelReadPixels;
}
void
diff --git a/shared/intel_pixel_bitmap.c b/shared/intel_pixel_bitmap.c
index a2ccae1..d137aef 100644
--- a/shared/intel_pixel_bitmap.c
+++ b/shared/intel_pixel_bitmap.c
@@ -409,6 +409,12 @@ intel_texture_bitmap(GLcontext * ctx,
return GL_FALSE;
}
+ if (ctx->Fog.Enabled) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "glBitmap() fallback: fog\n");
+ return GL_FALSE;
+ }
+
/* Check that we can load in a texture this big. */
if (width > (1 << (ctx->Const.MaxTextureLevels - 1)) ||
height > (1 << (ctx->Const.MaxTextureLevels - 1))) {
diff --git a/i915/intel_pixel_read.c b/shared/intel_pixel_read.c
index 56087aa..0370255 100644
--- a/i915/intel_pixel_read.c
+++ b/shared/intel_pixel_read.c
@@ -1,8 +1,8 @@
/**************************************************************************
- *
+ *
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
- *
+ *
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
- *
+ *
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,7 +22,7 @@
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
+ *
**************************************************************************/
#include "main/glheader.h"
@@ -31,6 +31,7 @@
#include "main/macros.h"
#include "main/image.h"
#include "main/bufferobj.h"
+#include "main/state.h"
#include "swrast/swrast.h"
#include "intel_screen.h"
@@ -45,10 +46,10 @@
/* For many applications, the new ability to pull the source buffers
* back out of the GTT and then do the packing/conversion operations
* in software will be as much of an improvement as trying to get the
- * blitter and/or texture engine to do the work.
+ * blitter and/or texture engine to do the work.
*
* This step is gated on private backbuffers.
- *
+ *
* Obviously the frontbuffer can't be pulled back, so that is either
* an argument for blit/texture readpixels, or for blitting to a
* temporary and then pulling that back.
@@ -291,6 +292,7 @@ intelReadPixels(GLcontext * ctx,
intelFlush(ctx);
+#ifdef I915
if (do_blit_readpixels
(ctx, x, y, width, height, format, type, pack, pixels))
return;
@@ -298,9 +300,22 @@ intelReadPixels(GLcontext * ctx,
if (do_texture_readpixels
(ctx, x, y, width, height, format, type, pack, pixels))
return;
+#else
+ (void)do_blit_readpixels;
+ (void)do_texture_readpixels;
+#endif
if (INTEL_DEBUG & DEBUG_PIXEL)
_mesa_printf("%s: fallback to swrast\n", __FUNCTION__);
+ /* Update Mesa state before calling down into _swrast_ReadPixels, as
+ * the spans code requires the computed buffer states to be up to date,
+ * but _swrast_ReadPixels only updates Mesa state after setting up
+ * the spans code.
+ */
+
+ if (ctx->NewState)
+ _mesa_update_state(ctx);
+
_swrast_ReadPixels(ctx, x, y, width, height, format, type, pack, pixels);
}
diff --git a/shared/intel_regions.c b/shared/intel_regions.c
index 0aa5b8c..b8d2dec 100644
--- a/shared/intel_regions.c
+++ b/shared/intel_regions.c
@@ -114,6 +114,13 @@ intel_region_alloc(struct intel_context *intel,
{
dri_bo *buffer;
+ /* If we're untiled, we have to align to 2 rows high because the
+ * data port accesses 2x2 blocks even if the bottom row isn't to be
+ * rendered, so failure to align means we could walk off the end of the
+ * GTT and fault.
+ */
+ height = ALIGN(height, 2);
+
if (expect_accelerated_upload) {
buffer = drm_intel_bo_alloc_for_render(intel->bufmgr, "region",
pitch * cpp * height, 64);
diff --git a/shared/intel_screen.c b/shared/intel_screen.c
index 0f278b3..f810850 100644
--- a/shared/intel_screen.c
+++ b/shared/intel_screen.c
@@ -305,7 +305,7 @@ intelDestroyScreen(__DRIscreenPrivate * sPriv)
dri_bufmgr_destroy(intelScreen->bufmgr);
intelUnmapScreenRegions(intelScreen);
- driDestroyOptionCache(&intelScreen->optionCache);
+ driDestroyOptionInfo(&intelScreen->optionCache);
FREE(intelScreen);
sPriv->private = NULL;
@@ -617,10 +617,10 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen)
/* Otherwise, use the classic buffer manager. */
if (intelScreen->bufmgr == NULL) {
if (gem_disable) {
- fprintf(stderr, "GEM disabled. Using classic.\n");
+ _mesa_warning(NULL, "GEM disabled. Using classic.");
} else {
- fprintf(stderr, "Failed to initialize GEM. "
- "Falling back to classic.\n");
+ _mesa_warning(NULL,
+ "Failed to initialize GEM. Falling back to classic.");
}
if (intelScreen->tex.size == 0) {