diff options
author | Luc Verhaegen <libv@skynet.be> | 2010-03-14 05:46:05 +0100 |
---|---|---|
committer | Luc Verhaegen <libv@skynet.be> | 2010-03-14 05:46:05 +0100 |
commit | 1dece368be5a87d9ddc972103b4d9f6b6e1aec24 (patch) | |
tree | dd5fb17f4d0fc790e7a63387e82d42faaedf5644 | |
parent | da7796a7d40ca2cd4a03e99ddf38e3a7256a401f (diff) |
Import i915 and i965 dri drivers from mesa 7.5.2.7.5.2
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | i915/Makefile.am | 2 | ||||
-rw-r--r-- | i965/Makefile.am | 1 | ||||
-rw-r--r-- | i965/brw_clip.c | 3 | ||||
-rw-r--r-- | i965/brw_context.h | 5 | ||||
-rw-r--r-- | i965/brw_draw.c | 1 | ||||
-rw-r--r-- | i965/brw_draw_upload.c | 90 | ||||
-rw-r--r-- | i965/brw_eu.c | 2 | ||||
-rw-r--r-- | i965/brw_eu_emit.c | 26 | ||||
-rw-r--r-- | i965/brw_sf_state.c | 6 | ||||
-rw-r--r-- | i965/brw_structs.h | 2 | ||||
-rw-r--r-- | i965/brw_tex_layout.c | 10 | ||||
-rw-r--r-- | i965/brw_vs_emit.c | 74 | ||||
-rw-r--r-- | i965/brw_wm.c | 2 | ||||
-rw-r--r-- | i965/brw_wm.h | 1 | ||||
-rw-r--r-- | i965/brw_wm_fp.c | 6 | ||||
-rw-r--r-- | i965/brw_wm_iz.c | 3 | ||||
-rw-r--r-- | shared/intel_batchbuffer.c | 5 | ||||
-rw-r--r-- | shared/intel_buffer_objects.c | 5 | ||||
-rw-r--r-- | shared/intel_buffers.c | 17 | ||||
-rw-r--r-- | shared/intel_chipset.h | 4 | ||||
-rw-r--r-- | shared/intel_context.c | 45 | ||||
-rw-r--r-- | shared/intel_context.h | 9 | ||||
-rw-r--r-- | shared/intel_fbo.c | 5 | ||||
-rw-r--r-- | shared/intel_pixel.c | 4 | ||||
-rw-r--r-- | shared/intel_pixel_bitmap.c | 6 | ||||
-rw-r--r-- | shared/intel_pixel_read.c (renamed from i915/intel_pixel_read.c) | 29 | ||||
-rw-r--r-- | shared/intel_regions.c | 7 | ||||
-rw-r--r-- | shared/intel_screen.c | 8 |
29 files changed, 293 insertions, 87 deletions
diff --git a/configure.ac b/configure.ac index b73fe88..4214eda 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ # Process this file with autoconf to produce a configure script AC_PREREQ(2.57) -AC_INIT([mesa-dri-i9xx], 7.5.0, [], mesa-dri-i9xx) +AC_INIT([mesa-dri-i9xx], 7.5.2, [], mesa-dri-i9xx) AM_INIT_AUTOMAKE([dist-bzip2]) diff --git a/i915/Makefile.am b/i915/Makefile.am index 2dc608c..3f74c00 100644 --- a/i915/Makefile.am +++ b/i915/Makefile.am @@ -32,7 +32,7 @@ i915_dri_la_SOURCES = \ ../shared/intel_pixel_bitmap.c \ ../shared/intel_pixel_copy.c \ ../shared/intel_pixel_draw.c \ - intel_pixel_read.c \ + ../shared/intel_pixel_read.c \ ../shared/intel_buffers.c \ ../shared/intel_blit.c \ ../shared/intel_swapbuffers.c \ diff --git a/i965/Makefile.am b/i965/Makefile.am index 5a118af..546d378 100644 --- a/i965/Makefile.am +++ b/i965/Makefile.am @@ -25,6 +25,7 @@ i965_dri_la_SOURCES = \ ../shared/intel_pixel_bitmap.c \ ../shared/intel_pixel_copy.c \ ../shared/intel_pixel_draw.c \ + ../shared/intel_pixel_read.c \ ../shared/intel_state.c \ ../shared/intel_swapbuffers.c \ ../shared/intel_tex.c \ diff --git a/i965/brw_clip.c b/i965/brw_clip.c index 5cffceb..8fc9f89 100644 --- a/i965/brw_clip.c +++ b/i965/brw_clip.c @@ -152,7 +152,8 @@ static void upload_clip_prog(struct brw_context *brw) /* _NEW_POLYGON */ if (key.primitive == GL_TRIANGLES) { - if (ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) + if (ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) key.clip_mode = BRW_CLIPMODE_REJECT_ALL; else { GLuint fill_front = CLIP_CULL; diff --git a/i965/brw_context.h b/i965/brw_context.h index 577497b..5cf12fb 100644 --- a/i965/brw_context.h +++ b/i965/brw_context.h @@ -386,6 +386,8 @@ struct brw_cached_batch_item { struct brw_vertex_element { const struct gl_client_array *glarray; + /** The corresponding Mesa vertex attribute */ + gl_vert_attrib attrib; /** Size of a complete element */ GLuint element_size; /** Number of uploaded elements for this input. */ @@ -477,6 +479,9 @@ struct brw_context struct { struct brw_vertex_element inputs[VERT_ATTRIB_MAX]; + struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; + GLuint nr_enabled; + #define BRW_NR_UPLOAD_BUFS 17 #define BRW_UPLOAD_INIT_SIZE (128*1024) diff --git a/i965/brw_draw.c b/i965/brw_draw.c index 5342622..54b0661 100644 --- a/i965/brw_draw.c +++ b/i965/brw_draw.c @@ -185,6 +185,7 @@ static void brw_merge_inputs( struct brw_context *brw, for (i = 0; i < VERT_ATTRIB_MAX; i++) { brw->vb.inputs[i].glarray = arrays[i]; + brw->vb.inputs[i].attrib = (gl_vert_attrib) i; if (arrays[i]->StrideB != 0) brw->vb.info.varying |= 1 << i; diff --git a/i965/brw_draw_upload.c b/i965/brw_draw_upload.c index b91b20b..fd9c391 100644 --- a/i965/brw_draw_upload.c +++ b/i965/brw_draw_upload.c @@ -343,16 +343,13 @@ static void brw_prepare_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); - GLuint tmp = brw->vs.prog_data->inputs_read; + GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; GLuint i; const unsigned char *ptr = NULL; GLuint interleave = 0; unsigned int min_index = brw->vb.min_index; unsigned int max_index = brw->vb.max_index; - struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; - GLuint nr_enabled = 0; - struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; @@ -362,12 +359,13 @@ static void brw_prepare_vertices(struct brw_context *brw) _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ - while (tmp) { - GLuint i = _mesa_ffsll(tmp)-1; + brw->vb.nr_enabled = 0; + while (vs_inputs) { + GLuint i = _mesa_ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; - tmp &= ~(1<<i); - enabled[nr_enabled++] = input; + vs_inputs &= ~(1 << i); + brw->vb.enabled[brw->vb.nr_enabled++] = input; } /* XXX: In the rare cases where this happens we fallback all @@ -376,13 +374,13 @@ static void brw_prepare_vertices(struct brw_context *brw) * cases with > 17 vertex attributes enabled, so it probably * isn't an issue at this point. */ - if (nr_enabled >= BRW_VEP_MAX) { + if (brw->vb.nr_enabled >= BRW_VEP_MAX) { intel->Fallback = 1; return; } - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; input->element_size = get_size(input->glarray->Type) * input->glarray->Size; input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1; @@ -398,6 +396,20 @@ static void brw_prepare_vertices(struct brw_context *brw) dri_bo_reference(input->bo); input->offset = (unsigned long)input->glarray->Ptr; input->stride = input->glarray->StrideB; + + /* This is a common place to reach if the user mistakenly supplies + * a pointer in place of a VBO offset. If we just let it go through, + * we may end up dereferencing a pointer beyond the bounds of the + * GTT. We would hope that the VBO's max_index would save us, but + * Mesa appears to hand us min/max values not clipped to the + * array object's _MaxElement, and _MaxElement frequently appears + * to be wrong anyway. + * + * The VBO spec allows application termination in this case, and it's + * probably a service to the poor programmer to do so rather than + * trying to just not render. + */ + assert(input->offset < input->bo->size); } else { if (input->bo != NULL) { /* Already-uploaded vertex data is present from a previous @@ -410,7 +422,7 @@ static void brw_prepare_vertices(struct brw_context *brw) /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ - if (i == 0) { + if (input->attrib == VERT_ATTRIB_POS) { /* Position array not properly enabled: */ if (input->glarray->StrideB == 0) { @@ -466,8 +478,8 @@ static void brw_prepare_vertices(struct brw_context *brw) brw_prepare_query_begin(brw); - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; brw_add_validated_bo(brw, input->bo); } @@ -477,34 +489,44 @@ static void brw_emit_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); - GLuint tmp = brw->vs.prog_data->inputs_read; - struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; GLuint i; - GLuint nr_enabled = 0; - /* Accumulate the list of enabled arrays. */ - while (tmp) { - i = _mesa_ffsll(tmp)-1; - struct brw_vertex_element *input = &brw->vb.inputs[i]; + brw_emit_query_begin(brw); - tmp &= ~(1<<i); - enabled[nr_enabled++] = input; + /* If the VS doesn't read any inputs (calculating vertex position from + * a state variable for some reason, for example), emit a single pad + * VERTEX_ELEMENT struct and bail. + * + * The stale VB state stays in place, but they don't do anything unless + * a VE loads from them. + */ + if (brw->vb.nr_enabled == 0) { + BEGIN_BATCH(3, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1); + OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | + (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); + ADVANCE_BATCH(); + return; } - brw_emit_query_begin(brw); - /* Now emit VB and VEP state packets. * * This still defines a hardware VB for each input, even if they * are interleaved or from the same VBO. TBD if this makes a * performance difference. */ - BEGIN_BATCH(1 + nr_enabled * 4, IGNORE_CLIPRECTS); + BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS); OUT_BATCH((CMD_VERTEX_BUFFER << 16) | - ((1 + nr_enabled * 4) - 2)); + ((1 + brw->vb.nr_enabled * 4) - 2)); - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) | BRW_VB0_ACCESS_VERTEXDATA | @@ -517,10 +539,10 @@ static void brw_emit_vertices(struct brw_context *brw) } ADVANCE_BATCH(); - BEGIN_BATCH(1 + nr_enabled * 2, IGNORE_CLIPRECTS); - OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr_enabled * 2) - 2)); - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2)); + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; uint32_t format = get_surface_type(input->glarray->Type, input->glarray->Size, input->glarray->Format, @@ -635,7 +657,7 @@ static void brw_emit_indices(struct brw_context *brw) if (index_buffer == NULL) return; - ib_size = get_size(index_buffer->type) * index_buffer->count; + ib_size = get_size(index_buffer->type) * index_buffer->count - 1; /* Emit the indexbuffer packet: */ diff --git a/i965/brw_eu.c b/i965/brw_eu.c index c53efba..1df5613 100644 --- a/i965/brw_eu.c +++ b/i965/brw_eu.c @@ -62,7 +62,7 @@ void brw_set_predicate_control( struct brw_compile *p, GLuint pc ) void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ) { - p->current->header.destreg__conditonalmod = conditional; + p->current->header.destreg__conditionalmod = conditional; } void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ) diff --git a/i965/brw_eu_emit.c b/i965/brw_eu_emit.c index 2a147fb..48243e1 100644 --- a/i965/brw_eu_emit.c +++ b/i965/brw_eu_emit.c @@ -377,8 +377,8 @@ static struct brw_instruction *next_insn( struct brw_compile *p, /* Reset this one-shot flag: */ - if (p->current->header.destreg__conditonalmod) { - p->current->header.destreg__conditonalmod = 0; + if (p->current->header.destreg__conditionalmod) { + p->current->header.destreg__conditionalmod = 0; p->current->header.predicate_control = BRW_PREDICATE_NORMAL; } @@ -746,7 +746,7 @@ void brw_CMP(struct brw_compile *p, { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); - insn->header.destreg__conditonalmod = conditional; + insn->header.destreg__conditionalmod = conditional; brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, src1); @@ -790,7 +790,7 @@ void brw_math( struct brw_compile *p, * instructions. */ insn->header.predicate_control = 0; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); @@ -826,7 +826,7 @@ void brw_math_16( struct brw_compile *p, brw_set_compression_control(p, BRW_COMPRESSION_NONE); insn = next_insn(p, BRW_OPCODE_SEND); - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); @@ -842,7 +842,7 @@ void brw_math_16( struct brw_compile *p, */ insn = next_insn(p, BRW_OPCODE_SEND); insn->header.compression_control = BRW_COMPRESSION_2NDHALF; - insn->header.destreg__conditonalmod = msg_reg_nr+1; + insn->header.destreg__conditionalmod = msg_reg_nr+1; brw_set_dest(insn, offset(dest,1)); brw_set_src0(insn, src); @@ -888,7 +888,7 @@ void brw_dp_WRITE_16( struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); @@ -933,7 +933,7 @@ void brw_dp_READ_16( struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); /* UW? */ brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); @@ -986,7 +986,7 @@ void brw_dp_READ_4( struct brw_compile *p, insn->header.predicate_control = BRW_PREDICATE_NONE; insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; insn->header.mask_control = BRW_MASK_DISABLE; /* cast dest to a uword[8] vector */ @@ -1059,7 +1059,7 @@ void brw_dp_READ_4_vs(struct brw_compile *p, insn->header.predicate_control = BRW_PREDICATE_NONE; insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; insn->header.mask_control = BRW_MASK_DISABLE; /*insn->header.access_mode = BRW_ALIGN_16;*/ @@ -1092,7 +1092,7 @@ void brw_fb_WRITE(struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src0); @@ -1187,7 +1187,7 @@ void brw_SAMPLE(struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src0); @@ -1238,7 +1238,7 @@ void brw_urb_WRITE(struct brw_compile *p, brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_urb_message(insn, allocate, diff --git a/i965/brw_sf_state.c b/i965/brw_sf_state.c index c999187..38733c8 100644 --- a/i965/brw_sf_state.c +++ b/i965/brw_sf_state.c @@ -233,7 +233,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, else if (sf.sf6.line_width <= 0x2) sf.sf6.line_width = 0; - /* _NEW_POINT */ + /* _NEW_BUFFERS */ key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; if (!key->render_to_fbo) { /* Rendering to an OpenGL window */ @@ -263,6 +263,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, } /* XXX clamp max depends on AA vs. non-AA */ + /* _NEW_POINT */ sf.sf7.sprite_point = key->point_sprite; sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3); sf.sf7.use_point_size_state = !key->point_attenuated; @@ -328,7 +329,8 @@ const struct brw_tracked_state brw_sf_unit = { .mesa = (_NEW_POLYGON | _NEW_LINE | _NEW_POINT | - _NEW_SCISSOR), + _NEW_SCISSOR | + _NEW_BUFFERS), .brw = BRW_NEW_URB_FENCE, .cache = (CACHE_NEW_SF_VP | CACHE_NEW_SF_PROG) diff --git a/i965/brw_structs.h b/i965/brw_structs.h index 89e2981..6011a4a 100644 --- a/i965/brw_structs.h +++ b/i965/brw_structs.h @@ -1168,7 +1168,7 @@ struct brw_instruction GLuint predicate_control:4; GLuint predicate_inverse:1; GLuint execution_size:3; - GLuint destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */ + GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */ GLuint pad0:2; GLuint debug_control:1; GLuint saturate:1; diff --git a/i965/brw_tex_layout.c b/i965/brw_tex_layout.c index 51a617f..3ab27c2 100644 --- a/i965/brw_tex_layout.c +++ b/i965/brw_tex_layout.c @@ -119,6 +119,16 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t } } + /* The 965's sampler lays cachelines out according to how accesses + * in the texture surfaces run, so they may be "vertical" through + * memory. As a result, the docs say in Surface Padding Requirements: + * Sampling Engine Surfaces that two extra rows of padding are required. + * We don't know of similar requirements for pre-965, but given that + * those docs are silent on padding requirements in general, let's play + * it safe. + */ + if (mt->target == GL_TEXTURE_CUBE_MAP) + mt->total_height += 2; break; } diff --git a/i965/brw_vs_emit.c b/i965/brw_vs_emit.c index b69616d..72cd36a 100644 --- a/i965/brw_vs_emit.c +++ b/i965/brw_vs_emit.c @@ -68,6 +68,7 @@ static void release_tmps( struct brw_vs_compile *c ) static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { GLuint i, reg = 0, mrf; + int attributes_in_vue; #if 0 if (c->vp->program.Base.Parameters->NumParameters >= 6) @@ -123,6 +124,11 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) reg++; } } + /* If there are no inputs, we'll still be reading one attribute's worth + * because it's required -- see urb_read_length setting. + */ + if (c->nr_inputs == 0) + reg++; /* Allocate outputs: TODO: could organize the non-position outputs * to go straight into message regs. @@ -200,8 +206,20 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) * vertex urb, so is half the amount: */ c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2; + /* Setting this field to 0 leads to undefined behavior according to the + * the VS_STATE docs. Our VUEs will always have at least one attribute + * sitting in them, even if it's padding. + */ + if (c->prog_data.urb_read_length == 0) + c->prog_data.urb_read_length = 1; + + /* The VS VUEs are shared by VF (outputting our inputs) and VS, so size + * them to fit the biggest thing they need to. + */ + attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs); + + c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; - c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4; c->prog_data.total_grf = reg; if (INTEL_DEBUG & DEBUG_VS) { @@ -1172,20 +1190,36 @@ post_vs_emit( struct brw_vs_compile *c, brw_set_src1(end_inst, brw_imm_d(offset * 16)); } +static uint32_t +get_predicate(uint32_t swizzle) +{ + switch (swizzle) { + case SWIZZLE_XXXX: + return BRW_PREDICATE_ALIGN16_REPLICATE_X; + case SWIZZLE_YYYY: + return BRW_PREDICATE_ALIGN16_REPLICATE_Y; + case SWIZZLE_ZZZZ: + return BRW_PREDICATE_ALIGN16_REPLICATE_Z; + case SWIZZLE_WWWW: + return BRW_PREDICATE_ALIGN16_REPLICATE_W; + default: + _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n", swizzle); + return BRW_PREDICATE_NORMAL; + } +} /* Emit the vertex program instructions here. */ void brw_vs_emit(struct brw_vs_compile *c ) { -#define MAX_IFSN 32 +#define MAX_IF_DEPTH 32 struct brw_compile *p = &c->func; - GLuint nr_insns = c->vp->program.Base.NumInstructions; - GLuint insn, if_insn = 0; + const GLuint nr_insns = c->vp->program.Base.NumInstructions; + GLuint insn, if_depth = 0; GLuint end_offset = 0; struct brw_instruction *end_inst, *last_inst; - struct brw_instruction *if_inst[MAX_IFSN]; - struct brw_indirect stack_index = brw_indirect(0, 0); - + struct brw_instruction *if_inst[MAX_IF_DEPTH]; + const struct brw_indirect stack_index = brw_indirect(0, 0); GLuint index; GLuint file; @@ -1376,15 +1410,18 @@ void brw_vs_emit(struct brw_vs_compile *c ) emit_xpd(p, dst, args[0], args[1]); break; case OPCODE_IF: - assert(if_insn < MAX_IFSN); - if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + assert(if_depth < MAX_IF_DEPTH); + if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8); + if_inst[if_depth]->header.predicate_control = + get_predicate(inst->DstReg.CondSwizzle); + if_depth++; break; case OPCODE_ELSE: - if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); break; case OPCODE_ENDIF: - assert(if_insn > 0); - brw_ENDIF(p, if_inst[--if_insn]); + assert(if_depth > 0); + brw_ENDIF(p, if_inst[--if_depth]); break; case OPCODE_BRA: brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); @@ -1430,6 +1467,19 @@ void brw_vs_emit(struct brw_vs_compile *c ) "unknown"); } + /* Set the predication update on the last instruction of the native + * instruction sequence. + * + * This would be problematic if it was set on a math instruction, + * but that shouldn't be the case with the current GLSL compiler. + */ + if (inst->CondUpdate) { + struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1]; + + assert(hw_insn->header.destreg__conditionalmod == 0); + hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ; + } + if ((inst->DstReg.File == PROGRAM_OUTPUT) && (inst->DstReg.Index != VERT_RESULT_HPOS) && c->output_regs[inst->DstReg.Index].used_in_src) { diff --git a/i965/brw_wm.c b/i965/brw_wm.c index 8a3b7df..d4e22d5 100644 --- a/i965/brw_wm.c +++ b/i965/brw_wm.c @@ -202,6 +202,7 @@ static void brw_wm_populate_key( struct brw_context *brw, /* BRW_NEW_FRAGMENT_PROGRAM */ const struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; + GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; GLuint lookup = 0; GLuint line_aa; GLuint i; @@ -263,6 +264,7 @@ static void brw_wm_populate_key( struct brw_context *brw, brw_wm_lookup_iz(line_aa, lookup, + uses_depth, key); diff --git a/i965/brw_wm.h b/i965/brw_wm.h index 295fed8..307b5e5 100644 --- a/i965/brw_wm.h +++ b/i965/brw_wm.h @@ -286,6 +286,7 @@ void brw_wm_print_program( struct brw_wm_compile *c, void brw_wm_lookup_iz( GLuint line_aa, GLuint lookup, + GLboolean ps_uses_depth, struct brw_wm_prog_key *key ); GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); diff --git a/i965/brw_wm_fp.c b/i965/brw_wm_fp.c index 49aad28..8cf3a1f 100644 --- a/i965/brw_wm_fp.c +++ b/i965/brw_wm_fp.c @@ -705,7 +705,11 @@ static void precalc_tex( struct brw_wm_compile *c, tmpcoord, 0, inst->SrcReg[0], - scale, + src_swizzle(scale, + SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_ONE, + SWIZZLE_ONE), src_undef()); coord = src_reg_from_dst(tmpcoord); diff --git a/i965/brw_wm_iz.c b/i965/brw_wm_iz.c index bd60ac9..7e2b1c7 100644 --- a/i965/brw_wm_iz.c +++ b/i965/brw_wm_iz.c @@ -118,6 +118,7 @@ const struct { void brw_wm_lookup_iz( GLuint line_aa, GLuint lookup, + GLboolean ps_uses_depth, struct brw_wm_prog_key *key ) { GLuint reg = 2; @@ -127,7 +128,7 @@ void brw_wm_lookup_iz( GLuint line_aa, if (lookup & IZ_PS_COMPUTES_DEPTH_BIT) key->computes_depth = 1; - if (wm_iz_table[lookup].sd_present) { + if (wm_iz_table[lookup].sd_present || ps_uses_depth) { key->source_depth_reg = reg; reg += 2; } diff --git a/shared/intel_batchbuffer.c b/shared/intel_batchbuffer.c index 29dc05c..82a92a9 100644 --- a/shared/intel_batchbuffer.c +++ b/shared/intel_batchbuffer.c @@ -197,6 +197,11 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, GLuint used = batch->ptr - batch->map; GLboolean was_locked = intel->locked; + if (intel->first_post_swapbuffers_batch == NULL) { + intel->first_post_swapbuffers_batch = intel->batch->buf; + drm_intel_bo_reference(intel->first_post_swapbuffers_batch); + } + if (used == 0) { batch->cliprect_mode = IGNORE_CLIPRECTS; return; diff --git a/shared/intel_buffer_objects.c b/shared/intel_buffer_objects.c index 2e6b778..db0de03 100644 --- a/shared/intel_buffer_objects.c +++ b/shared/intel_buffer_objects.c @@ -209,7 +209,10 @@ intel_bufferobj_get_subdata(GLcontext * ctx, struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); - dri_bo_get_subdata(intel_obj->buffer, offset, size, data); + if (intel_obj->sys_buffer) + memcpy(data, (char *)intel_obj->sys_buffer + offset, size); + else + dri_bo_get_subdata(intel_obj->buffer, offset, size, data); } diff --git a/shared/intel_buffers.c b/shared/intel_buffers.c index d2fad9e..44e3433 100644 --- a/shared/intel_buffers.c +++ b/shared/intel_buffers.c @@ -345,6 +345,23 @@ intelDrawBuffer(GLcontext * ctx, GLenum mode) static void intelReadBuffer(GLcontext * ctx, GLenum mode) { + if ((ctx->DrawBuffer != NULL) && (ctx->DrawBuffer->Name == 0)) { + struct intel_context *const intel = intel_context(ctx); + const GLboolean was_front_buffer_reading = + intel->is_front_buffer_reading; + + intel->is_front_buffer_reading = (mode == GL_FRONT_LEFT) + || (mode == GL_FRONT); + + /* If we weren't front-buffer reading before but we are now, make sure + * that the front-buffer has actually been allocated. + */ + if (!was_front_buffer_reading && intel->is_front_buffer_reading) { + intel_update_renderbuffers(intel->driContext, + intel->driContext->driDrawablePriv); + } + } + if (ctx->ReadBuffer == ctx->DrawBuffer) { /* This will update FBO completeness status. * A framebuffer will be incomplete if the GL_READ_BUFFER setting diff --git a/shared/intel_chipset.h b/shared/intel_chipset.h index 4593d90..a528d99 100644 --- a/shared/intel_chipset.h +++ b/shared/intel_chipset.h @@ -66,6 +66,7 @@ #define PCI_CHIP_Q45_G 0x2E12 #define PCI_CHIP_G45_G 0x2E22 #define PCI_CHIP_G41_G 0x2E32 +#define PCI_CHIP_B43_G 0x2E42 #define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \ devid == PCI_CHIP_I915_GM || \ @@ -78,7 +79,8 @@ #define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ devid == PCI_CHIP_Q45_G || \ devid == PCI_CHIP_G45_G || \ - devid == PCI_CHIP_G41_G) + devid == PCI_CHIP_G41_G || \ + devid == PCI_CHIP_B43_G) #define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) #define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) diff --git a/shared/intel_context.c b/shared/intel_context.c index cfd983d..e593b23 100644 --- a/shared/intel_context.c +++ b/shared/intel_context.c @@ -161,6 +161,9 @@ intelGetString(GLcontext * ctx, GLenum name) case PCI_CHIP_G41_G: chipset = "Intel(R) G41"; break; + case PCI_CHIP_B43_G: + chipset = "Intel(R) B43"; + break; default: chipset = "Unknown Intel Chipset"; break; @@ -220,7 +223,9 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) struct intel_renderbuffer *stencil_rb; i = 0; - if ((intel->is_front_buffer_rendering || !intel_fb->color_rb[1]) + if ((intel->is_front_buffer_rendering || + intel->is_front_buffer_reading || + !intel_fb->color_rb[1]) && intel_fb->color_rb[0]) { attachments[i++] = __DRI_BUFFER_FRONT_LEFT; attachments[i++] = intel_bits_per_pixel(intel_fb->color_rb[0]); @@ -495,7 +500,8 @@ intel_flush(GLcontext *ctx, GLboolean needs_mi_flush) if (screen->dri2.loader && (screen->dri2.loader->base.version >= 2) - && (screen->dri2.loader->flushFrontBuffer != NULL)) { + && (screen->dri2.loader->flushFrontBuffer != NULL) && + intel->driDrawable && intel->driDrawable->loaderPrivate) { (*screen->dri2.loader->flushFrontBuffer)(intel->driDrawable, intel->driDrawable->loaderPrivate); @@ -505,7 +511,7 @@ intel_flush(GLcontext *ctx, GLboolean needs_mi_flush) * each of N places that do rendering. This has worse performances, * but it is much easier to get correct. */ - if (intel->is_front_buffer_rendering) { + if (!intel->is_front_buffer_rendering) { intel->front_buffer_dirty = GL_FALSE; } } @@ -521,7 +527,27 @@ intelFlush(GLcontext * ctx) static void intel_glFlush(GLcontext *ctx) { + struct intel_context *intel = intel_context(ctx); + intel_flush(ctx, GL_TRUE); + + /* We're using glFlush as an indicator that a frame is done, which is + * what DRI2 does before calling SwapBuffers (and means we should catch + * people doing front-buffer rendering, as well).. + * + * Wait for the swapbuffers before the one we just emitted, so we don't + * get too many swaps outstanding for apps that are GPU-heavy but not + * CPU-heavy. + * + * Unfortunately, we don't have a handle to the batch containing the swap, + * and getting our hands on that doesn't seem worth it, so we just us the + * first batch we emitted after the last swap. + */ + if (intel->first_post_swapbuffers_batch != NULL) { + drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch); + drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); + intel->first_post_swapbuffers_batch = NULL; + } } void @@ -787,6 +813,8 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) intel->prim.vb = NULL; dri_bo_unreference(intel->prim.vb_bo); intel->prim.vb_bo = NULL; + dri_bo_unreference(intel->first_post_swapbuffers_batch); + intel->first_post_swapbuffers_batch = NULL; if (release_texture_heaps) { /* This share group is about to go away, free our private @@ -804,12 +832,23 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) /* free the Mesa context */ _mesa_free_context_data(&intel->ctx); + + FREE(intel); + driContextPriv->driverPrivate = NULL; } } GLboolean intelUnbindContext(__DRIcontextPrivate * driContextPriv) { + struct intel_context *intel = + (struct intel_context *) driContextPriv->driverPrivate; + + /* Deassociate the context with the drawables. + */ + intel->driDrawable = NULL; + intel->driReadDrawable = NULL; + return GL_TRUE; } diff --git a/shared/intel_context.h b/shared/intel_context.h index b5cf7e6..e2b3943 100644 --- a/shared/intel_context.h +++ b/shared/intel_context.h @@ -191,6 +191,7 @@ struct intel_context GLboolean ttm; struct intel_batchbuffer *batch; + drm_intel_bo *first_post_swapbuffers_batch; GLboolean no_batch_wrap; unsigned batch_id; @@ -294,6 +295,14 @@ struct intel_context * easily. */ GLboolean is_front_buffer_rendering; + /** + * Track whether front-buffer is the current read target. + * + * This is closely associated with is_front_buffer_rendering, but may + * be set separately. The DRI2 fake front buffer must be referenced + * either way. + */ + GLboolean is_front_buffer_reading; drm_clip_rect_t fboRect; /**< cliprect for FBO rendering */ diff --git a/shared/intel_fbo.c b/shared/intel_fbo.c index 30f58b1..ed7c78e 100644 --- a/shared/intel_fbo.c +++ b/shared/intel_fbo.c @@ -680,6 +680,11 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) if (rb == NULL) continue; + if (irb == NULL) { + fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; + continue; + } + switch (irb->texformat->MesaFormat) { case MESA_FORMAT_ARGB8888: case MESA_FORMAT_RGB565: diff --git a/shared/intel_pixel.c b/shared/intel_pixel.c index fc0ac0b..defb80f 100644 --- a/shared/intel_pixel.c +++ b/shared/intel_pixel.c @@ -342,10 +342,8 @@ intelInitPixelFuncs(struct dd_function_table *functions) functions->Bitmap = intelBitmap; functions->CopyPixels = intelCopyPixels; functions->DrawPixels = intelDrawPixels; -#ifdef I915 - functions->ReadPixels = intelReadPixels; -#endif } + functions->ReadPixels = intelReadPixels; } void diff --git a/shared/intel_pixel_bitmap.c b/shared/intel_pixel_bitmap.c index a2ccae1..d137aef 100644 --- a/shared/intel_pixel_bitmap.c +++ b/shared/intel_pixel_bitmap.c @@ -409,6 +409,12 @@ intel_texture_bitmap(GLcontext * ctx, return GL_FALSE; } + if (ctx->Fog.Enabled) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "glBitmap() fallback: fog\n"); + return GL_FALSE; + } + /* Check that we can load in a texture this big. */ if (width > (1 << (ctx->Const.MaxTextureLevels - 1)) || height > (1 << (ctx->Const.MaxTextureLevels - 1))) { diff --git a/i915/intel_pixel_read.c b/shared/intel_pixel_read.c index 56087aa..0370255 100644 --- a/i915/intel_pixel_read.c +++ b/shared/intel_pixel_read.c @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,7 +22,7 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ #include "main/glheader.h" @@ -31,6 +31,7 @@ #include "main/macros.h" #include "main/image.h" #include "main/bufferobj.h" +#include "main/state.h" #include "swrast/swrast.h" #include "intel_screen.h" @@ -45,10 +46,10 @@ /* For many applications, the new ability to pull the source buffers * back out of the GTT and then do the packing/conversion operations * in software will be as much of an improvement as trying to get the - * blitter and/or texture engine to do the work. + * blitter and/or texture engine to do the work. * * This step is gated on private backbuffers. - * + * * Obviously the frontbuffer can't be pulled back, so that is either * an argument for blit/texture readpixels, or for blitting to a * temporary and then pulling that back. @@ -291,6 +292,7 @@ intelReadPixels(GLcontext * ctx, intelFlush(ctx); +#ifdef I915 if (do_blit_readpixels (ctx, x, y, width, height, format, type, pack, pixels)) return; @@ -298,9 +300,22 @@ intelReadPixels(GLcontext * ctx, if (do_texture_readpixels (ctx, x, y, width, height, format, type, pack, pixels)) return; +#else + (void)do_blit_readpixels; + (void)do_texture_readpixels; +#endif if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s: fallback to swrast\n", __FUNCTION__); + /* Update Mesa state before calling down into _swrast_ReadPixels, as + * the spans code requires the computed buffer states to be up to date, + * but _swrast_ReadPixels only updates Mesa state after setting up + * the spans code. + */ + + if (ctx->NewState) + _mesa_update_state(ctx); + _swrast_ReadPixels(ctx, x, y, width, height, format, type, pack, pixels); } diff --git a/shared/intel_regions.c b/shared/intel_regions.c index 0aa5b8c..b8d2dec 100644 --- a/shared/intel_regions.c +++ b/shared/intel_regions.c @@ -114,6 +114,13 @@ intel_region_alloc(struct intel_context *intel, { dri_bo *buffer; + /* If we're untiled, we have to align to 2 rows high because the + * data port accesses 2x2 blocks even if the bottom row isn't to be + * rendered, so failure to align means we could walk off the end of the + * GTT and fault. + */ + height = ALIGN(height, 2); + if (expect_accelerated_upload) { buffer = drm_intel_bo_alloc_for_render(intel->bufmgr, "region", pitch * cpp * height, 64); diff --git a/shared/intel_screen.c b/shared/intel_screen.c index 0f278b3..f810850 100644 --- a/shared/intel_screen.c +++ b/shared/intel_screen.c @@ -305,7 +305,7 @@ intelDestroyScreen(__DRIscreenPrivate * sPriv) dri_bufmgr_destroy(intelScreen->bufmgr); intelUnmapScreenRegions(intelScreen); - driDestroyOptionCache(&intelScreen->optionCache); + driDestroyOptionInfo(&intelScreen->optionCache); FREE(intelScreen); sPriv->private = NULL; @@ -617,10 +617,10 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen) /* Otherwise, use the classic buffer manager. */ if (intelScreen->bufmgr == NULL) { if (gem_disable) { - fprintf(stderr, "GEM disabled. Using classic.\n"); + _mesa_warning(NULL, "GEM disabled. Using classic."); } else { - fprintf(stderr, "Failed to initialize GEM. " - "Falling back to classic.\n"); + _mesa_warning(NULL, + "Failed to initialize GEM. Falling back to classic."); } if (intelScreen->tex.size == 0) { |