diff options
author | Luc Verhaegen <libv@skynet.be> | 2010-04-02 12:46:10 +0200 |
---|---|---|
committer | Luc Verhaegen <libv@skynet.be> | 2010-04-02 12:46:10 +0200 |
commit | e8bf66970f1ee08b214e5b88f8f4b62fc1a73509 (patch) | |
tree | 84830fb169ba2c393ca250d75341bee9eeb5ae6f | |
parent | 54a8e9cc3988d908b5b846a752679127cacefd3b (diff) |
-rw-r--r-- | i915/intel_tris.c | 4 | ||||
-rw-r--r-- | i965/brw_eu_emit.c | 18 | ||||
-rw-r--r-- | i965/brw_program.c | 14 | ||||
-rw-r--r-- | i965/brw_sf.c | 21 | ||||
-rw-r--r-- | i965/brw_sf.h | 6 | ||||
-rw-r--r-- | i965/brw_sf_emit.c | 135 | ||||
-rw-r--r-- | i965/brw_vs_emit.c | 6 | ||||
-rw-r--r-- | i965/brw_wm.h | 6 | ||||
-rw-r--r-- | i965/brw_wm_emit.c | 12 | ||||
-rw-r--r-- | i965/brw_wm_glsl.c | 147 | ||||
-rw-r--r-- | shared/intel_blit.c | 2 | ||||
-rw-r--r-- | shared/intel_buffers.c | 6 | ||||
-rw-r--r-- | shared/intel_context.c | 4 | ||||
-rw-r--r-- | shared/intel_fbo.c | 9 | ||||
-rw-r--r-- | shared/intel_mipmap_tree.c | 18 | ||||
-rw-r--r-- | shared/intel_pixel_copy.c | 32 | ||||
-rw-r--r-- | shared/intel_reg.h | 4 | ||||
-rw-r--r-- | shared/intel_regions.c | 29 | ||||
-rw-r--r-- | shared/intel_regions.h | 4 | ||||
-rw-r--r-- | shared/intel_span.c | 16 |
20 files changed, 228 insertions, 265 deletions
diff --git a/i915/intel_tris.c b/i915/intel_tris.c index fb191fe..81c4ade 100644 --- a/i915/intel_tris.c +++ b/i915/intel_tris.c @@ -251,7 +251,7 @@ void intel_flush_prim(struct intel_context *intel) BEGIN_BATCH(5); OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(0) | I1_LOAD_S(1) | 1); - assert((offset & !S0_VB_OFFSET_MASK) == 0); + assert((offset & ~S0_VB_OFFSET_MASK) == 0); OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, offset); OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) | (intel->vertex_size << S1_VERTEX_PITCH_SHIFT)); @@ -270,7 +270,7 @@ void intel_flush_prim(struct intel_context *intel) OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(0) | I1_LOAD_S(2) | 1); /* S0 */ - assert((offset & !S0_VB_OFFSET_MASK_830) == 0); + assert((offset & ~S0_VB_OFFSET_MASK_830) == 0); OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, offset | (intel->vertex_size << S0_VB_PITCH_SHIFT_830) | S0_VB_ENABLE_830); diff --git a/i965/brw_eu_emit.c b/i965/brw_eu_emit.c index f69d529..82f2fda 100644 --- a/i965/brw_eu_emit.c +++ b/i965/brw_eu_emit.c @@ -1290,7 +1290,7 @@ void brw_SAMPLE(struct brw_compile *p, GLuint simd_mode) { GLboolean need_stall = 0; - + if (writemask == 0) { /*printf("%s: zero writemask??\n", __FUNCTION__); */ return; @@ -1327,8 +1327,14 @@ void brw_SAMPLE(struct brw_compile *p, /* printf("need stall %x %x\n", newmask , writemask); */ } else { + GLboolean dispatch_16 = GL_FALSE; + struct brw_reg m1 = brw_message_reg(msg_reg_nr); - + + guess_execution_size(p->current, dest); + if (p->current->header.execution_size == BRW_EXECUTE_16) + dispatch_16 = GL_TRUE; + newmask = ~newmask & WRITEMASK_XYZW; brw_push_insn_state(p); @@ -1343,7 +1349,13 @@ void brw_SAMPLE(struct brw_compile *p, src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); dest = offset(dest, dst_offset); - response_length = len * 2; + + /* For 16-wide dispatch, masked channels are skipped in the + * response. For 8-wide, masked channels still take up slots, + * and are just not written to. + */ + if (dispatch_16) + response_length = len * 2; } } diff --git a/i965/brw_program.c b/i965/brw_program.c index c78f7b3..1fd957b 100644 --- a/i965/brw_program.c +++ b/i965/brw_program.c @@ -95,9 +95,17 @@ static void brwDeleteProgram( GLcontext *ctx, struct gl_program *prog ) { if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { - struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; - struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog); - dri_bo_unreference(brw_fprog->const_buffer); + struct gl_fragment_program *fp = (struct gl_fragment_program *) prog; + struct brw_fragment_program *brw_fp = brw_fragment_program(fp); + + dri_bo_unreference(brw_fp->const_buffer); + } + + if (prog->Target == GL_VERTEX_PROGRAM_ARB) { + struct gl_vertex_program *vp = (struct gl_vertex_program *) prog; + struct brw_vertex_program *brw_vp = brw_vertex_program(vp); + + dri_bo_unreference(brw_vp->const_buffer); } _mesa_delete_program( ctx, prog ); diff --git a/i965/brw_sf.c b/i965/brw_sf.c index 8e6839b..57d1c29 100644 --- a/i965/brw_sf.c +++ b/i965/brw_sf.c @@ -46,7 +46,6 @@ static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { - GLcontext *ctx = &brw->intel.ctx; struct brw_sf_compile c; const GLuint *program; GLuint program_size; @@ -69,20 +68,14 @@ static void compile_sf_prog( struct brw_context *brw, /* Construct map from attribute number to position in the vertex. */ - for (i = idx = 0; i < VERT_RESULT_MAX; i++) + for (i = idx = 0; i < VERT_RESULT_MAX; i++) { if (c.key.attrs & BITFIELD64_BIT(i)) { c.attr_to_idx[i] = idx; c.idx_to_attr[idx] = i; - if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) { - c.point_attrs[i].CoordReplace = - ctx->Point.CoordReplace[i - VERT_RESULT_TEX0]; - } - else { - c.point_attrs[i].CoordReplace = GL_FALSE; - } idx++; } - + } + /* Which primitive? Or all three? */ switch (key->primitive) { @@ -162,6 +155,14 @@ static void upload_sf_prog(struct brw_context *brw) } key.do_point_sprite = ctx->Point.PointSprite; + if (key.do_point_sprite) { + int i; + + for (i = 0; i < 8; i++) { + if (ctx->Point.CoordReplace[i]) + key.point_sprite_coord_replace |= (1 << i); + } + } key.sprite_origin_lower_left = (ctx->Point.SpriteOrigin == GL_LOWER_LEFT); /* _NEW_LIGHT */ key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); diff --git a/i965/brw_sf.h b/i965/brw_sf.h index 0ba731f..a0680a5 100644 --- a/i965/brw_sf.h +++ b/i965/brw_sf.h @@ -46,6 +46,7 @@ struct brw_sf_prog_key { GLbitfield64 attrs; + uint8_t point_sprite_coord_replace; GLuint primitive:2; GLuint do_twoside_color:1; GLuint do_flat_shading:1; @@ -56,10 +57,6 @@ struct brw_sf_prog_key { GLuint pad:24; }; -struct brw_sf_point_tex { - GLboolean CoordReplace; -}; - struct brw_sf_compile { struct brw_compile func; struct brw_sf_prog_key key; @@ -100,7 +97,6 @@ struct brw_sf_compile { GLubyte attr_to_idx[VERT_RESULT_MAX]; GLubyte idx_to_attr[VERT_RESULT_MAX]; - struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX]; }; diff --git a/i965/brw_sf_emit.c b/i965/brw_sf_emit.c index bb08055..56f7c98 100644 --- a/i965/brw_sf_emit.c +++ b/i965/brw_sf_emit.c @@ -354,6 +354,33 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, return is_last_attr; } +/* Calculates the predicate control for which channels of a reg + * (containing 2 attrs) to do point sprite coordinate replacement on. + */ +static uint16_t +calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg) +{ + int attr1, attr2; + uint16_t pc = 0; + + attr1 = c->idx_to_attr[reg * 2]; + if (attr1 >= VERT_RESULT_TEX0 && attr1 <= VERT_RESULT_TEX7) { + if (c->key.point_sprite_coord_replace & (1 << (attr1 - VERT_RESULT_TEX0))) + pc |= 0x0f; + } + + if (reg * 2 + 1 < c->nr_setup_attrs) { + attr2 = c->idx_to_attr[reg * 2 + 1]; + if (attr2 >= VERT_RESULT_TEX0 && attr2 <= VERT_RESULT_TEX7) { + if (c->key.point_sprite_coord_replace & (1 << (attr2 - + VERT_RESULT_TEX0))) + pc |= 0xf0; + } + } + + return pc; +} + void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate) @@ -529,22 +556,27 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) copy_z_inv_w(c); for (i = 0; i < c->nr_setup_regs; i++) { - struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]]; struct brw_reg a0 = offset(c->vert[0], i); - GLushort pc, pc_persp, pc_linear; + GLushort pc, pc_persp, pc_linear, pc_coord_replace; GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); - - if (pc_persp) - { - if (!tex->CoordReplace) { - brw_set_predicate_control_flag_value(p, pc_persp); - brw_MUL(p, a0, a0, c->inv_w[0]); - } + + pc_coord_replace = calculate_point_sprite_mask(c, i); + pc_persp &= ~pc_coord_replace; + + if (pc_persp) { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); } - if (tex->CoordReplace) { - /* Caculate 1.0/PointWidth */ - brw_math(&c->func, + /* Point sprite coordinate replacement: A texcoord with this + * enabled gets replaced with the value (x, y, 0, 1) where x and + * y vary from 0 to 1 across the horizontal and vertical of the + * point. + */ + if (pc_coord_replace) { + brw_set_predicate_control_flag_value(p, pc_coord_replace); + /* Caculate 1.0/PointWidth */ + brw_math(&c->func, c->tmp, BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, @@ -553,50 +585,51 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) BRW_MATH_DATA_SCALAR, BRW_MATH_PRECISION_FULL); - if (c->key.sprite_origin_lower_left) { - brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); - brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0])); - brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); - } else { - brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); - brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); - } - } else { - brw_MOV(p, c->m1Cx, brw_imm_ud(0)); - brw_MOV(p, c->m2Cy, brw_imm_ud(0)); - } + brw_set_access_mode(p, BRW_ALIGN_16); - { - brw_set_predicate_control_flag_value(p, pc); - if (tex->CoordReplace) { - if (c->key.sprite_origin_lower_left) { - brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0)); - brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0)); - } - else - brw_MOV(p, c->m3C0, brw_imm_f(0.0)); + /* dA/dx, dA/dy */ + brw_MOV(p, c->m1Cx, brw_imm_f(0.0)); + brw_MOV(p, c->m2Cy, brw_imm_f(0.0)); + brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp); + if (c->key.sprite_origin_lower_left) { + brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp)); } else { - brw_MOV(p, c->m3C0, a0); /* constant value */ + brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp); } - /* Copy m0..m3 to URB. - */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), - 0, /* allocate */ - 1, /* used */ - 4, /* msg len */ - 0, /* response len */ - last, /* eot */ - last, /* writes complete */ - i*4, /* urb destination offset */ - BRW_URB_SWIZZLE_TRANSPOSE); + /* attribute constant offset */ + brw_MOV(p, c->m3C0, brw_imm_f(0.0)); + if (c->key.sprite_origin_lower_left) { + brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0)); + } else { + brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0)); + } + + brw_set_access_mode(p, BRW_ALIGN_1); } + + if (pc & ~pc_coord_replace) { + brw_set_predicate_control_flag_value(p, pc & ~pc_coord_replace); + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); + brw_MOV(p, c->m3C0, a0); /* constant value */ + } + + + brw_set_predicate_control_flag_value(p, pc); + /* Copy m0..m3 to URB. */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); } } diff --git a/i965/brw_vs_emit.c b/i965/brw_vs_emit.c index a7c4b58..a48804a 100644 --- a/i965/brw_vs_emit.c +++ b/i965/brw_vs_emit.c @@ -1717,11 +1717,13 @@ void brw_vs_emit(struct brw_vs_compile *c ) /* patch all the BREAK/CONT instructions from last BEGINLOOP */ while (inst0 > loop_inst[loop_depth]) { inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK) { + if (inst0->header.opcode == BRW_OPCODE_BREAK && + inst0->bits3.if_else.jump_count == 0) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); inst0->bits3.if_else.pop_count = 0; } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0); inst0->bits3.if_else.pop_count = 0; } diff --git a/i965/brw_wm.h b/i965/brw_wm.h index 88d84ee..47b764d 100644 --- a/i965/brw_wm.h +++ b/i965/brw_wm.h @@ -328,6 +328,12 @@ void emit_cinterp(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0); +void emit_cmp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2); void emit_ddxy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, diff --git a/i965/brw_wm_emit.c b/i965/brw_wm_emit.c index 9315bca..c7d87b9 100644 --- a/i965/brw_wm_emit.c +++ b/i965/brw_wm_emit.c @@ -566,12 +566,12 @@ static void emit_sne( struct brw_compile *p, emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1); } -static void emit_cmp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2 ) +void emit_cmp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2) { GLuint i; diff --git a/i965/brw_wm_glsl.c b/i965/brw_wm_glsl.c index 562608e..315b030 100644 --- a/i965/brw_wm_glsl.c +++ b/i965/brw_wm_glsl.c @@ -614,112 +614,6 @@ static void invoke_subroutine( struct brw_wm_compile *c, } } -/* Workaround for using brw_wm_emit.c's emit functions, which expect - * destination regs to be uniquely written. Moves arguments out to - * temporaries as necessary for instructions which use their destination as - * a temporary. - */ -static void -unalias3(struct brw_wm_compile *c, - void (*func)(struct brw_compile *c, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2), - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2) -{ - struct brw_compile *p = &c->func; - struct brw_reg tmp_arg0[4], tmp_arg1[4], tmp_arg2[4]; - int i, j; - int mark = mark_tmps(c); - - for (j = 0; j < 4; j++) { - tmp_arg0[j] = arg0[j]; - tmp_arg1[j] = arg1[j]; - tmp_arg2[j] = arg2[j]; - } - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - for (j = 0; j < 4; j++) { - if (arg0[j].file == dst[i].file && - dst[i].nr == arg0[j].nr) { - tmp_arg0[j] = alloc_tmp(c); - brw_MOV(p, tmp_arg0[j], arg0[j]); - } - if (arg1[j].file == dst[i].file && - dst[i].nr == arg1[j].nr) { - tmp_arg1[j] = alloc_tmp(c); - brw_MOV(p, tmp_arg1[j], arg1[j]); - } - if (arg2[j].file == dst[i].file && - dst[i].nr == arg2[j].nr) { - tmp_arg2[j] = alloc_tmp(c); - brw_MOV(p, tmp_arg2[j], arg2[j]); - } - } - } - } - - func(p, dst, mask, tmp_arg0, tmp_arg1, tmp_arg2); - - release_tmps(c, mark); -} - -/* Workaround for using brw_wm_emit.c's emit functions, which expect - * destination regs to be uniquely written. Moves arguments out to - * temporaries as necessary for instructions which use their destination as - * a temporary. - */ -static void -unalias2(struct brw_wm_compile *c, - void (*func)(struct brw_compile *c, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1), - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1) -{ - struct brw_compile *p = &c->func; - struct brw_reg tmp_arg0[4], tmp_arg1[4]; - int i, j; - int mark = mark_tmps(c); - - for (j = 0; j < 4; j++) { - tmp_arg0[j] = arg0[j]; - tmp_arg1[j] = arg1[j]; - } - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - for (j = 0; j < 4; j++) { - if (arg0[j].file == dst[i].file && - dst[i].nr == arg0[j].nr) { - tmp_arg0[j] = alloc_tmp(c); - brw_MOV(p, tmp_arg0[j], arg0[j]); - } - if (arg1[j].file == dst[i].file && - dst[i].nr == arg1[j].nr) { - tmp_arg1[j] = alloc_tmp(c); - brw_MOV(p, tmp_arg1[j], arg1[j]); - } - } - } - } - - func(p, dst, mask, tmp_arg0, tmp_arg1); - - release_tmps(c, mark); -} - static void emit_arl(struct brw_wm_compile *c, const struct prog_instruction *inst) { @@ -1813,14 +1707,29 @@ static void get_argument_regs(struct brw_wm_compile *c, const struct prog_instruction *inst, int index, + struct brw_reg *dst, struct brw_reg *regs, int mask) { - int i; + struct brw_compile *p = &c->func; + int i, j; for (i = 0; i < 4; i++) { - if (mask & (1 << i)) + if (mask & (1 << i)) { regs[i] = get_src_reg(c, inst, index, i); + + /* Unalias destination registers from our sources. */ + if (regs[i].file == BRW_GENERAL_REGISTER_FILE) { + for (j = 0; j < 4; j++) { + if (memcmp(®s[i], &dst[j], sizeof(regs[0])) == 0) { + struct brw_reg tmp = alloc_tmp(c); + brw_MOV(p, tmp, regs[i]); + regs[i] = tmp; + break; + } + } + } + } } } @@ -1845,6 +1754,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) int dst_flags; struct brw_reg args[3][4], dst[4]; int j; + int mark = mark_tmps( c ); c->cur_inst = i; @@ -1866,7 +1776,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) } } for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++) - get_argument_regs(c, inst, j, args[j], WRITEMASK_XYZW); + get_argument_regs(c, inst, j, dst, args[j], WRITEMASK_XYZW); dst_flags = inst->DstReg.WriteMask; if (inst->SaturateMode == SATURATE_ZERO_ONE) @@ -1920,8 +1830,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); break; case OPCODE_LRP: - unalias3(c, emit_lrp, - dst, dst_flags, args[0], args[1], args[2]); + emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); break; case OPCODE_TRUNC: emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]); @@ -1960,11 +1869,14 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_LG2: emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); break; + case OPCODE_CMP: + emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]); + break; case OPCODE_MIN: - unalias2(c, emit_min, dst, dst_flags, args[0], args[1]); + emit_min(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_MAX: - unalias2(c, emit_max, dst, dst_flags, args[0], args[1]); + emit_max(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_DDX: case OPCODE_DDY: @@ -2103,11 +2015,13 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) /* patch all the BREAK/CONT instructions from last BGNLOOP */ while (inst0 > loop_inst[loop_depth]) { inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK) { + if (inst0->header.opcode == BRW_OPCODE_BREAK && + inst0->bits3.if_else.jump_count == 0) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); inst0->bits3.if_else.pop_count = 0; } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0); inst0->bits3.if_else.pop_count = 0; } @@ -2119,6 +2033,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) inst->Opcode); } + /* Release temporaries containing any unaliased source regs. */ + release_tmps( c, mark ); + if (inst->CondUpdate) brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); else diff --git a/shared/intel_blit.c b/shared/intel_blit.c index f2769aa..4ad42a7 100644 --- a/shared/intel_blit.c +++ b/shared/intel_blit.c @@ -119,8 +119,6 @@ intelEmitCopyBlit(struct intel_context *intel, break; } while (pass < 2); - intel_prepare_render(intel); - if (pass >= 2) { drm_intel_gem_bo_map_gtt(dst_buffer); drm_intel_gem_bo_map_gtt(src_buffer); diff --git a/shared/intel_buffers.c b/shared/intel_buffers.c index b106930..0480770 100644 --- a/shared/intel_buffers.c +++ b/shared/intel_buffers.c @@ -226,7 +226,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) * only changes with _NEW_STENCIL (which seems sensible). So flag it * here since this is the _NEW_BUFFERS path. */ - ctx->NewState |= (_NEW_DEPTH | _NEW_STENCIL); + intel->NewGLState |= (_NEW_DEPTH | _NEW_STENCIL); } intel->vtbl.set_draw_region(intel, colorRegions, depthRegion, @@ -236,7 +236,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) #ifdef I915 intelCalcViewport(ctx); #else - ctx->NewState |= _NEW_VIEWPORT; + intel->NewGLState |= _NEW_VIEWPORT; #endif /* Set state we know depends on drawable parameters: */ @@ -256,7 +256,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) if (ctx->Driver.FrontFace) ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace); else - ctx->NewState |= _NEW_POLYGON; + intel->NewGLState |= _NEW_POLYGON; } diff --git a/shared/intel_context.c b/shared/intel_context.c index d6a1ba6..0a7dcb8 100644 --- a/shared/intel_context.c +++ b/shared/intel_context.c @@ -63,7 +63,7 @@ int INTEL_DEBUG = (0); #endif -#define DRIVER_DATE "20091221 DEVELOPMENT" +#define DRIVER_DATE "20100328 2010Q1" #define DRIVER_DATE_GEM "GEM " DRIVER_DATE @@ -880,12 +880,12 @@ intelMakeCurrent(__DRIcontext * driContextPriv, struct gl_framebuffer *fb = driDrawPriv->driverPrivate; struct gl_framebuffer *readFb = driReadPriv->driverPrivate; - _mesa_make_current(&intel->ctx, fb, readFb); intel->driReadDrawable = driReadPriv; intel->driDrawable = driDrawPriv; driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1; driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1; intel_prepare_render(intel); + _mesa_make_current(&intel->ctx, fb, readFb); } else { _mesa_make_current(NULL, NULL, NULL); diff --git a/shared/intel_fbo.c b/shared/intel_fbo.c index a429f8d..ba3bb8f 100644 --- a/shared/intel_fbo.c +++ b/shared/intel_fbo.c @@ -104,7 +104,6 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, struct intel_context *intel = intel_context(ctx); struct intel_renderbuffer *irb = intel_renderbuffer(rb); int cpp; - GLuint pitch; ASSERT(rb->Name != 0); @@ -176,15 +175,11 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, /* allocate new memory region/renderbuffer */ - /* Choose a pitch to match hardware requirements: - */ - pitch = ((cpp * width + 63) & ~63) / cpp; - /* alloc hardware renderbuffer */ - DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width, height, pitch); + DBG("Allocating %d x %d Intel RBO\n", width, height); irb->region = intel_region_alloc(intel, I915_TILING_NONE, cpp, - width, height, pitch, GL_TRUE); + width, height, GL_TRUE); if (!irb->region) return GL_FALSE; /* out of memory? */ diff --git a/shared/intel_mipmap_tree.c b/shared/intel_mipmap_tree.c index 4f14946..5b6b4b2 100644 --- a/shared/intel_mipmap_tree.c +++ b/shared/intel_mipmap_tree.c @@ -146,8 +146,8 @@ intel_miptree_create(struct intel_context *intel, mt->cpp, mt->pitch, mt->total_height, - mt->pitch, expect_accelerated_upload); + mt->pitch = mt->region->pitch; if (!mt->region) { free(mt); @@ -177,20 +177,11 @@ intel_miptree_create_for_region(struct intel_context *intel, I915_TILING_NONE); if (!mt) return mt; -#if 0 - if (mt->pitch != region->pitch) { - fprintf(stderr, - "region pitch (%d) doesn't match mipmap tree pitch (%d)\n", - region->pitch, mt->pitch); - free(mt); - return NULL; - } -#else + /* The mipmap tree pitch is aligned to 64 bytes to make sure render * to texture works, but we don't need that for texturing from a * pixmap. Just override it here. */ mt->pitch = region->pitch; -#endif intel_region_reference(&mt->region, region); @@ -520,12 +511,15 @@ intel_miptree_image_copy(struct intel_context *intel, width = ALIGN(width, align_w); } + intel_prepare_render(intel); + for (i = 0; i < depth; i++) { intel_miptree_get_image_offset(src, level, face, i, &src_x, &src_y); intel_miptree_get_image_offset(dst, level, face, i, &dst_x, &dst_y); success = intel_region_copy(intel, dst->region, 0, dst_x, dst_y, - src->region, 0, src_x, src_y, width, height, + src->region, 0, src_x, src_y, + width, height, GL_FALSE, GL_COPY); if (!success) { GLubyte *src_ptr, *dst_ptr; diff --git a/shared/intel_pixel_copy.c b/shared/intel_pixel_copy.c index f4f3fd6..56faf07 100644 --- a/shared/intel_pixel_copy.c +++ b/shared/intel_pixel_copy.c @@ -108,14 +108,15 @@ do_blit_copypixels(GLcontext * ctx, GLint dstx, GLint dsty, GLenum type) { struct intel_context *intel = intel_context(ctx); - struct intel_region *dst = intel_drawbuf_region(intel); - struct intel_region *src = copypix_src_region(intel, type); + struct intel_region *dst; + struct intel_region *src; struct gl_framebuffer *fb = ctx->DrawBuffer; struct gl_framebuffer *read_fb = ctx->ReadBuffer; GLint orig_dstx; GLint orig_dsty; GLint orig_srcx; GLint orig_srcy; + GLboolean flip = GL_FALSE; if (type == GL_DEPTH || type == GL_STENCIL) { if (INTEL_DEBUG & DEBUG_FALLBACKS) @@ -133,15 +134,16 @@ do_blit_copypixels(GLcontext * ctx, ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F) return GL_FALSE; + intel_prepare_render(intel); + + dst = intel_drawbuf_region(intel); + src = copypix_src_region(intel, type); + if (!src || !dst) return GL_FALSE; intelFlush(&intel->ctx); - intel_prepare_render(intel); - - /* XXX: We fail to handle different inversion between read and draw framebuffer. */ - /* Clip to destination buffer. */ orig_dstx = dstx; orig_dsty = dsty; @@ -164,23 +166,23 @@ do_blit_copypixels(GLcontext * ctx, dstx += srcx - orig_srcx; dsty += srcy - orig_srcy; - /* Convert from GL to hardware coordinates: */ + /* Flip dest Y if it's a window system framebuffer. */ if (fb->Name == 0) { - /* copypixels to a system framebuffer */ + /* copypixels to a window system framebuffer */ dsty = fb->Height - dsty - height; - } else { - /* copypixels to a user framebuffer object */ - dsty = dsty; + flip = !flip; } - /* Flip source Y if it's a system framebuffer. */ - if (read_fb->Name == 0) - srcy = fb->Height - srcy - height; + /* Flip source Y if it's a window system framebuffer. */ + if (read_fb->Name == 0) { + srcy = read_fb->Height - srcy - height; + flip = !flip; + } if (!intel_region_copy(intel, dst, 0, dstx, dsty, src, 0, srcx, srcy, - width, height, + width, height, flip, ctx->Color.ColorLogicOpEnabled ? ctx->Color.LogicOp : GL_COPY)) { DBG("%s: blit failure\n", __FUNCTION__); diff --git a/shared/intel_reg.h b/shared/intel_reg.h index d19f1ba..36d8180 100644 --- a/shared/intel_reg.h +++ b/shared/intel_reg.h @@ -70,8 +70,10 @@ /** @{ * 915 definitions + * + * 915 documents say that bits 31:28 and 1 are "undefined, must be zero." */ -#define S0_VB_OFFSET_MASK 0xffffffc0 +#define S0_VB_OFFSET_MASK 0x0ffffffc #define S0_AUTO_CACHE_INV_DISABLE (1<<0) /** @} */ diff --git a/shared/intel_regions.c b/shared/intel_regions.c index f042bcb..1172de9 100644 --- a/shared/intel_regions.c +++ b/shared/intel_regions.c @@ -164,7 +164,6 @@ intel_region_alloc_internal(struct intel_context *intel, /* Default to no tiling */ region->tiling = I915_TILING_NONE; - region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE; _DBG("%s <-- %p\n", __FUNCTION__, region); return region; @@ -173,7 +172,7 @@ intel_region_alloc_internal(struct intel_context *intel, struct intel_region * intel_region_alloc(struct intel_context *intel, uint32_t tiling, - GLuint cpp, GLuint width, GLuint height, GLuint pitch, + GLuint cpp, GLuint width, GLuint height, GLboolean expect_accelerated_upload) { dri_bo *buffer; @@ -187,19 +186,10 @@ intel_region_alloc(struct intel_context *intel, buffer = drm_intel_bo_alloc_tiled(intel->bufmgr, "region", width, height, cpp, &tiling, &aligned_pitch, flags); - /* We've already chosen a pitch as part of miptree layout. It had - * better be the same. - */ - assert(aligned_pitch == pitch * cpp); region = intel_region_alloc_internal(intel, cpp, width, height, - pitch, buffer); - - if (tiling != I915_TILING_NONE) { - assert(((pitch * cpp) & 127) == 0); - drm_intel_bo_set_tiling(buffer, &tiling, pitch * cpp); - drm_intel_bo_get_tiling(buffer, ®ion->tiling, ®ion->bit_6_swizzle); - } + aligned_pitch / cpp, buffer); + region->tiling = tiling; return region; } @@ -213,6 +203,7 @@ intel_region_alloc_for_handle(struct intel_context *intel, struct intel_region *region, *dummy; dri_bo *buffer; int ret; + uint32_t bit_6_swizzle; region = _mesa_HashLookup(intel->intelScreen->named_regions, handle); if (region != NULL) { @@ -236,7 +227,7 @@ intel_region_alloc_for_handle(struct intel_context *intel, return region; ret = dri_bo_get_tiling(region->buffer, ®ion->tiling, - ®ion->bit_6_swizzle); + &bit_6_swizzle); if (ret != 0) { fprintf(stderr, "Couldn't get tiling of buffer %d (%s): %s\n", handle, name, strerror(-ret)); @@ -316,7 +307,7 @@ _mesa_copy_rect(GLubyte * dst, dst += dst_x * cpp; src += src_x * cpp; dst += dst_y * dst_pitch; - src += src_y * dst_pitch; + src += src_y * src_pitch; width *= cpp; if (width == dst_pitch && width == src_pitch) @@ -380,8 +371,11 @@ intel_region_copy(struct intel_context *intel, struct intel_region *src, GLuint src_offset, GLuint srcx, GLuint srcy, GLuint width, GLuint height, + GLboolean flip, GLenum logicop) { + uint32_t src_pitch = src->pitch; + _DBG("%s\n", __FUNCTION__); if (intel == NULL) @@ -397,9 +391,12 @@ intel_region_copy(struct intel_context *intel, assert(src->cpp == dst->cpp); + if (flip) + src_pitch = -src_pitch; + return intelEmitCopyBlit(intel, dst->cpp, - src->pitch, src->buffer, src_offset, src->tiling, + src_pitch, src->buffer, src_offset, src->tiling, dst->pitch, dst->buffer, dst_offset, dst->tiling, srcx, srcy, dstx, dsty, width, height, logicop); diff --git a/shared/intel_regions.h b/shared/intel_regions.h index 7ee6a98..2459c9a 100644 --- a/shared/intel_regions.h +++ b/shared/intel_regions.h @@ -65,7 +65,6 @@ struct intel_region GLuint draw_x, draw_y; /**< Offset of drawing within the region */ uint32_t tiling; /**< Which tiling mode the region is in */ - uint32_t bit_6_swizzle; /**< GEM flag for address swizzling requirement */ struct intel_buffer_object *pbo; /* zero-copy uploads */ uint32_t name; /**< Global name for the bo */ @@ -79,7 +78,7 @@ struct intel_region struct intel_region *intel_region_alloc(struct intel_context *intel, uint32_t tiling, GLuint cpp, GLuint width, - GLuint height, GLuint pitch, + GLuint height, GLboolean expect_accelerated_upload); struct intel_region * @@ -122,6 +121,7 @@ intel_region_copy(struct intel_context *intel, struct intel_region *src, GLuint src_offset, GLuint srcx, GLuint srcy, GLuint width, GLuint height, + GLboolean flip, GLenum logicop); /* Helpers for zerocopy uploads, particularly texture image uploads: diff --git a/shared/intel_span.c b/shared/intel_span.c index fb5c01b..377f3a8 100644 --- a/shared/intel_span.c +++ b/shared/intel_span.c @@ -48,11 +48,11 @@ intel_set_span_functions(struct intel_context *intel, #define LOCAL_VARS \ struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ - const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \ - const GLint yBias = ctx->DrawBuffer->Name ? 0 : irb->Base.Height - 1;\ + const GLint yScale = rb->Name ? 1 : -1; \ + const GLint yBias = rb->Name ? 0 : rb->Height - 1; \ int minx = 0, miny = 0; \ - int maxx = ctx->DrawBuffer->Width; \ - int maxy = ctx->DrawBuffer->Height; \ + int maxx = rb->Width; \ + int maxy = rb->Height; \ int pitch = irb->region->pitch * irb->region->cpp; \ void *buf = irb->region->buffer->virtual; \ GLuint p; \ @@ -108,11 +108,11 @@ intel_set_span_functions(struct intel_context *intel, #define LOCAL_DEPTH_VARS \ struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ - const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \ - const GLint yBias = ctx->DrawBuffer->Name ? 0 : irb->Base.Height - 1;\ + const GLint yScale = rb->Name ? 1 : -1; \ + const GLint yBias = rb->Name ? 0 : rb->Height - 1; \ int minx = 0, miny = 0; \ - int maxx = ctx->DrawBuffer->Width; \ - int maxy = ctx->DrawBuffer->Height; \ + int maxx = rb->Width; \ + int maxy = rb->Height; \ int pitch = irb->region->pitch * irb->region->cpp; \ void *buf = irb->region->buffer->virtual; \ (void)buf; (void)pitch; /* unused for non-gttmap. */ \ |