diff options
author | Luc Verhaegen <libv@skynet.be> | 2010-03-16 20:19:03 +0100 |
---|---|---|
committer | Luc Verhaegen <libv@skynet.be> | 2010-03-16 20:19:03 +0100 |
commit | 54a8e9cc3988d908b5b846a752679127cacefd3b (patch) | |
tree | ee289172f13f246903151b12db7808b7cb0651e8 /i965 | |
parent | 6b263d1e2c599ddcc0ce4c84425dbc1ac8de020c (diff) |
Import i915 and i965 dri drivers from mesa 7.8-rc1.7.8-rc1
Diffstat (limited to 'i965')
66 files changed, 3143 insertions, 891 deletions
diff --git a/i965/Makefile.am b/i965/Makefile.am index 8a61dab..d9c82d5 100644 --- a/i965/Makefile.am +++ b/i965/Makefile.am @@ -27,7 +27,6 @@ i965_dri_la_SOURCES = \ ../shared/intel_pixel_draw.c \ ../shared/intel_pixel_read.c \ ../shared/intel_state.c \ - ../shared/intel_swapbuffers.c \ ../shared/intel_syncobj.c \ ../shared/intel_tex.c \ ../shared/intel_tex_copy.c \ @@ -88,4 +87,15 @@ i965_dri_la_SOURCES = \ brw_wm_pass2.c \ brw_wm_sampler_state.c \ brw_wm_state.c \ - brw_wm_surface_state.c + brw_wm_surface_state.c \ + gen6_cc.c \ + gen6_clip_state.c \ + gen6_depthstencil.c \ + gen6_gs_state.c \ + gen6_sampler_state.c \ + gen6_scissor_state.c \ + gen6_sf_state.c \ + gen6_urb.c \ + gen6_viewport_state.c \ + gen6_vs_state.c \ + gen6_wm_state.c diff --git a/i965/brw_cc.c b/i965/brw_cc.c index bac1c3a..fa2d394 100644 --- a/i965/brw_cc.c +++ b/i965/brw_cc.c @@ -34,9 +34,7 @@ #include "brw_state.h" #include "brw_defines.h" #include "brw_util.h" -#include "intel_fbo.h" #include "main/macros.h" -#include "main/enums.h" static void prepare_cc_vp( struct brw_context *brw ) { @@ -295,8 +293,7 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT, key, sizeof(*key), &brw->cc.vp_bo, 1, - &cc, sizeof(cc), - NULL, NULL); + &cc, sizeof(cc)); /* Emit CC viewport relocation */ dri_bo_emit_reloc(bo, diff --git a/i965/brw_clip.c b/i965/brw_clip.c index dbd10a5..d3275c7 100644 --- a/i965/brw_clip.c +++ b/i965/brw_clip.c @@ -50,6 +50,7 @@ static void compile_clip_prog( struct brw_context *brw, struct brw_clip_prog_key *key ) { + struct intel_context *intel = &brw->intel; struct brw_clip_compile c; const GLuint *program; GLuint program_size; @@ -65,14 +66,13 @@ static void compile_clip_prog( struct brw_context *brw, c.func.single_program_flow = 1; c.key = *key; - c.need_ff_sync = BRW_IS_IGDNG(brw); /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.header_position_offset = ATTR_SIZE; - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) delta = 3 * REG_SIZE; else delta = REG_SIZE; @@ -85,7 +85,7 @@ static void compile_clip_prog( struct brw_context *brw, c.nr_attrs = brw_count_bits(c.key.attrs); - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ else c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ @@ -130,20 +130,22 @@ static void compile_clip_prog( struct brw_context *brw, /* Upload */ dri_bo_unreference(brw->clip.prog_bo); - brw->clip.prog_bo = brw_upload_cache( &brw->cache, - BRW_CLIP_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - &brw->clip.prog_data ); + brw->clip.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, + BRW_CLIP_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + sizeof(c.prog_data), + &brw->clip.prog_data); } /* Calculate interpolants for triangle and line rasterization. */ static void upload_clip_prog(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; struct brw_clip_prog_key key; memset(&key, 0, sizeof(key)); @@ -160,7 +162,7 @@ static void upload_clip_prog(struct brw_context *brw) /* _NEW_TRANSFORM */ key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP; else key.clip_mode = BRW_CLIPMODE_NORMAL; diff --git a/i965/brw_clip.h b/i965/brw_clip.h index 1c68255..d71bac7 100644 --- a/i965/brw_clip.h +++ b/i965/brw_clip.h @@ -118,7 +118,6 @@ struct brw_clip_compile { GLuint header_position_offset; GLuint offset[VERT_ATTRIB_MAX]; - GLboolean need_ff_sync; }; #define ATTR_SIZE (4*4) diff --git a/i965/brw_clip_line.c b/i965/brw_clip_line.c index fa9648f..ceb62a3 100644 --- a/i965/brw_clip_line.c +++ b/i965/brw_clip_line.c @@ -39,13 +39,13 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" -#include "brw_util.h" #include "brw_clip.h" static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) { + struct intel_context *intel = &c->func.brw->intel; GLuint i = 0,j; /* Register usage is static, precompute here: @@ -85,7 +85,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) i++; } - if (c->need_ff_sync) { + if (intel->needs_ff_sync) { c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; } @@ -126,6 +126,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) static void clip_and_emit_line( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; struct brw_indirect vtx0 = brw_indirect(0, 0); struct brw_indirect vtx1 = brw_indirect(1, 0); struct brw_indirect newvtx0 = brw_indirect(2, 0); @@ -152,7 +153,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_clip_init_clipmask(c); /* -ve rhw workaround */ - if (BRW_IS_965(p->brw)) { + if (brw->has_negative_rhw_bug) { brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); @@ -189,7 +190,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) * Both can be negative on GM965/G965 due to RHW workaround * if so, this object should be rejected. */ - if (BRW_IS_965(p->brw)) { + if (brw->has_negative_rhw_bug) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0)); is_neg2 = brw_IF(p, BRW_EXECUTE_1); { @@ -214,7 +215,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) /* If both are positive, do nothing */ /* Only on GM965/G965 */ - if (BRW_IS_965(p->brw)) { + if (brw->has_negative_rhw_bug) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); is_neg2 = brw_IF(p, BRW_EXECUTE_1); } @@ -229,7 +230,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_set_predicate_control(p, BRW_PREDICATE_NONE); } - if (BRW_IS_965(p->brw)) { + if (brw->has_negative_rhw_bug) { brw_ENDIF(p, is_neg2); } } diff --git a/i965/brw_clip_point.c b/i965/brw_clip_point.c index 8458f61..7f47634 100644 --- a/i965/brw_clip_point.c +++ b/i965/brw_clip_point.c @@ -39,7 +39,6 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" -#include "brw_util.h" #include "brw_clip.h" diff --git a/i965/brw_clip_state.c b/i965/brw_clip_state.c index 234b374..424c9a1 100644 --- a/i965/brw_clip_state.c +++ b/i965/brw_clip_state.c @@ -32,7 +32,6 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "main/macros.h" struct brw_clip_unit_key { unsigned int total_grf; @@ -74,6 +73,7 @@ static dri_bo * clip_unit_create_from_key(struct brw_context *brw, struct brw_clip_unit_key *key) { + struct intel_context *intel = &brw->intel; struct brw_clip_unit_state clip; dri_bo *bo; @@ -105,7 +105,7 @@ clip_unit_create_from_key(struct brw_context *brw, /* Although up to 16 concurrent Clip threads are allowed on IGDNG, * only 2 threads can output VUEs at a time. */ - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) clip.thread4.max_threads = 16 - 1; else clip.thread4.max_threads = 2 - 1; @@ -130,7 +130,7 @@ clip_unit_create_from_key(struct brw_context *brw, clip.clip5.api_mode = BRW_CLIP_API_OGL; clip.clip5.clip_mode = key->clip_mode; - if (BRW_IS_G4X(brw)) + if (intel->is_g4x) clip.clip5.negative_w_clip_test = 1; clip.clip6.clipper_viewport_state_ptr = 0; @@ -142,8 +142,7 @@ clip_unit_create_from_key(struct brw_context *brw, bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT, key, sizeof(*key), &brw->clip.prog_bo, 1, - &clip, sizeof(clip), - NULL, NULL); + &clip, sizeof(clip)); /* Emit clip program relocation */ assert(brw->clip.prog_bo); diff --git a/i965/brw_clip_tri.c b/i965/brw_clip_tri.c index cf79224..815211a 100644 --- a/i965/brw_clip_tri.c +++ b/i965/brw_clip_tri.c @@ -39,7 +39,6 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" -#include "brw_util.h" #include "brw_clip.h" static void release_tmps( struct brw_clip_compile *c ) @@ -51,6 +50,7 @@ static void release_tmps( struct brw_clip_compile *c ) void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, GLuint nr_verts ) { + struct intel_context *intel = &c->func.brw->intel; GLuint i = 0,j; /* Register usage is static, precompute here: @@ -78,7 +78,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, for (j = 0; j < 3; j++) { GLuint delta = c->nr_attrs*16 + 32; - if (BRW_IS_IGDNG(c->func.brw)) + if (intel->is_ironlake) delta = c->nr_attrs * 16 + 32 * 3; brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); @@ -119,7 +119,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, i++; } - if (c->need_ff_sync) { + if (intel->needs_ff_sync) { c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; } @@ -571,6 +571,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c ) { struct brw_instruction *neg_rhw; struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); brw_clip_tri_init_vertices(c); brw_clip_init_clipmask(c); @@ -578,7 +579,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c ) /* if -ve rhw workaround bit is set, do cliptest */ - if (BRW_IS_965(p->brw)) { + if (brw->has_negative_rhw_bug) { brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); diff --git a/i965/brw_clip_unfilled.c b/i965/brw_clip_unfilled.c index ad1bfa4..f36d22f 100644 --- a/i965/brw_clip_unfilled.c +++ b/i965/brw_clip_unfilled.c @@ -39,7 +39,6 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" -#include "brw_util.h" #include "brw_clip.h" diff --git a/i965/brw_clip_util.c b/i965/brw_clip_util.c index 5a73abd..14bc889 100644 --- a/i965/brw_clip_util.c +++ b/i965/brw_clip_util.c @@ -40,7 +40,6 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" -#include "brw_util.h" #include "brw_clip.h" @@ -135,6 +134,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, GLboolean force_edgeflag) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; struct brw_reg tmp = get_tmp(c); GLuint i; @@ -142,7 +142,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, */ /* * After CLIP stage, only first 256 bits of the VUE are read - * back on IGDNG, so needn't change it + * back on Ironlake, so needn't change it */ brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); @@ -151,7 +151,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, for (i = 0; i < c->nr_attrs; i++) { GLuint delta = i*16 + 32; - if (BRW_IS_IGDNG(p->brw)) + if (intel->is_ironlake) delta = i * 16 + 32 * 3; if (delta == c->offset[VERT_RESULT_EDGE]) { @@ -185,7 +185,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, if (i & 1) { GLuint delta = i*16 + 32; - if (BRW_IS_IGDNG(p->brw)) + if (intel->is_ironlake) delta = i * 16 + 32 * 3; brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); @@ -359,7 +359,9 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c ) void brw_clip_ff_sync(struct brw_clip_compile *c) { - if (c->need_ff_sync) { + struct intel_context *intel = &c->func.brw->intel; + + if (intel->needs_ff_sync) { struct brw_compile *p = &c->func; struct brw_instruction *need_ff_sync; @@ -388,7 +390,9 @@ void brw_clip_ff_sync(struct brw_clip_compile *c) void brw_clip_init_ff_sync(struct brw_clip_compile *c) { - if (c->need_ff_sync) { + struct intel_context *intel = &c->func.brw->intel; + + if (intel->needs_ff_sync) { struct brw_compile *p = &c->func; brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0)); diff --git a/i965/brw_context.c b/i965/brw_context.c index aaa2d80..a512896 100644 --- a/i965/brw_context.c +++ b/i965/brw_context.c @@ -33,7 +33,6 @@ #include "main/imports.h" #include "main/api_noop.h" #include "main/macros.h" -/* #include "main/vtxfmt.h" */ #include "main/simple_list.h" #include "shader/shader_api.h" @@ -41,16 +40,9 @@ #include "brw_defines.h" #include "brw_draw.h" #include "brw_state.h" -#include "brw_vs.h" -#include "intel_tex.h" -#include "intel_blit.h" -#include "intel_batchbuffer.h" -#include "intel_pixel.h" #include "intel_span.h" #include "tnl/t_pipeline.h" -#include "utils.h" - /*************************************** * Mesa's Driver Functions @@ -77,7 +69,7 @@ static void brwInitDriverFunctions( struct dd_function_table *functions ) } GLboolean brwCreateContext( const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, + __DRIcontext *driContextPriv, void *sharedContextPrivate) { struct dd_function_table functions; @@ -86,7 +78,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, GLcontext *ctx = &intel->ctx; if (!brw) { - _mesa_printf("%s: failed to alloc context\n", __FUNCTION__); + printf("%s: failed to alloc context\n", __FUNCTION__); return GL_FALSE; } @@ -95,7 +87,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, if (!intelInitContext( intel, mesaVis, driContextPriv, sharedContextPrivate, &functions )) { - _mesa_printf("%s: failed to init intel context\n", __FUNCTION__); + printf("%s: failed to init intel context\n", __FUNCTION__); FREE(brw); return GL_FALSE; } @@ -111,6 +103,9 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits, ctx->Const.MaxTextureImageUnits); ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */ + ctx->Const.MaxCombinedTextureImageUnits = + ctx->Const.MaxVertexTextureImageUnits + + ctx->Const.MaxTextureImageUnits; /* Mesa limits textures to 4kx4k; it would be nice to fix that someday */ @@ -155,6 +150,38 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, MIN2(ctx->Const.FragmentProgram.MaxNativeParameters, ctx->Const.FragmentProgram.MaxEnvParams); + if (intel->is_ironlake || intel->is_g4x || intel->gen >= 6) { + brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_GM45; + brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45; + brw->has_surface_tile_offset = GL_TRUE; + brw->has_compr4 = GL_TRUE; + brw->has_aa_line_parameters = GL_TRUE; + } else { + brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_965; + brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965; + } + + /* WM maximum threads is number of EUs times number of threads per EU. */ + if (intel->is_ironlake) { + brw->urb.size = 1024; + brw->vs_max_threads = 72; + brw->wm_max_threads = 12 * 6; + } else if (intel->is_g4x) { + brw->urb.size = 384; + brw->vs_max_threads = 32; + brw->wm_max_threads = 10 * 5; + } else if (intel->gen < 6) { + brw->urb.size = 256; + brw->vs_max_threads = 16; + brw->wm_max_threads = 8 * 4; + brw->has_negative_rhw_bug = GL_TRUE; + } + + if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) { + brw->vs_max_threads = 1; + brw->wm_max_threads = 1; + } + brw_init_state( brw ); brw->state.dirty.mesa = ~0; diff --git a/i965/brw_context.h b/i965/brw_context.h index fded47a..d6fc37e 100644 --- a/i965/brw_context.h +++ b/i965/brw_context.h @@ -131,7 +131,6 @@ struct brw_context; #define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 #define BRW_NEW_PSP 0x800 #define BRW_NEW_WM_SURFACES 0x1000 -#define BRW_NEW_FENCE 0x2000 #define BRW_NEW_INDICES 0x4000 #define BRW_NEW_VERTICES 0x8000 /** @@ -172,8 +171,8 @@ struct brw_fragment_program { GLuint id; /**< serial no. to identify frag progs, never re-used */ GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ - dri_bo *const_buffer; /** Program constant buffer/surface */ GLboolean use_const_buffer; + dri_bo *const_buffer; /** Program constant buffer/surface */ /** for debugging, which texture units are referenced */ GLbitfield tex_units_used; @@ -283,6 +282,9 @@ struct brw_vs_ouput_sizes { enum brw_cache_id { + BRW_BLEND_STATE, + BRW_DEPTH_STENCIL_STATE, + BRW_COLOR_CALC_STATE, BRW_CC_VP, BRW_CC_UNIT, BRW_WM_PROG, @@ -291,7 +293,7 @@ enum brw_cache_id { BRW_WM_UNIT, BRW_SF_PROG, BRW_SF_VP, - BRW_SF_UNIT, + BRW_SF_UNIT, /* scissor state on gen6 */ BRW_VS_UNIT, BRW_VS_PROG, BRW_GS_UNIT, @@ -332,7 +334,6 @@ struct brw_cache { struct brw_cache_item **items; GLuint size, n_items; - GLuint aux_size[BRW_MAX_CACHE]; char *name[BRW_MAX_CACHE]; /* Record of the last BOs chosen for each cache_id. Used to set @@ -356,6 +357,9 @@ struct brw_tracked_state { /* Flags for brw->state.cache. */ +#define CACHE_NEW_BLEND_STATE (1<<BRW_BLEND_STATE) +#define CACHE_NEW_DEPTH_STENCIL_STATE (1<<BRW_DEPTH_STENCIL_STATE) +#define CACHE_NEW_COLOR_CALC_STATE (1<<BRW_COLOR_CALC_STATE) #define CACHE_NEW_CC_VP (1<<BRW_CC_VP) #define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT) #define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG) @@ -438,8 +442,11 @@ struct brw_context GLuint primitive; GLboolean emit_state_always; - GLboolean no_batch_wrap; - + GLboolean has_surface_tile_offset; + GLboolean has_compr4; + GLboolean has_negative_rhw_bug; + GLboolean has_aa_line_parameters; +; struct { struct brw_state_flags dirty; @@ -515,6 +522,12 @@ struct brw_context */ GLuint next_free_page; + /* hw-dependent 3DSTATE_VF_STATISTICS opcode */ + uint32_t CMD_VF_STATISTICS; + /* hw-dependent 3DSTATE_PIPELINE_SELECT opcode */ + uint32_t CMD_PIPELINE_SELECT; + int vs_max_threads; + int wm_max_threads; /* BRW_NEW_URB_ALLOCATIONS: */ @@ -531,7 +544,8 @@ struct brw_context GLuint nr_sf_entries; GLuint nr_cs_entries; -/* GLuint vs_size; */ + /* gen6 */ + GLuint vs_size; /* GLuint gs_size; */ /* GLuint clip_size; */ /* GLuint sf_size; */ @@ -542,6 +556,7 @@ struct brw_context GLuint clip_start; GLuint sf_start; GLuint cs_start; + GLuint size; /* Hardware URB size, in KB. */ } urb; @@ -564,15 +579,11 @@ struct brw_context GLfloat *last_buf; GLuint last_bufsz; - /** - * Whether we should create a new bo instead of reusing the old one - * (if we just dispatch the batch pointing at the old one. - */ - GLboolean need_new_bo; } curbe; struct { struct brw_vs_prog_data *prog_data; + int8_t *constant_map; /* variable array following prog_data */ dri_bo *prog_bo; dri_bo *state_bo; @@ -639,9 +650,16 @@ struct brw_context struct { + /* gen4 */ dri_bo *prog_bo; - dri_bo *state_bo; dri_bo *vp_bo; + + /* gen6 */ + dri_bo *blend_state_bo; + dri_bo *depth_stencil_state_bo; + dri_bo *color_calc_state_bo; + + dri_bo *state_bo; } cc; struct { @@ -669,7 +687,7 @@ void brwInitVtbl( struct brw_context *brw ); * brw_context.c */ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, + __DRIcontext *driContextPriv, void *sharedContextPrivate); /*====================================================================== diff --git a/i965/brw_curbe.c b/i965/brw_curbe.c index aadcfbe..4e78b08 100644 --- a/i965/brw_curbe.c +++ b/i965/brw_curbe.c @@ -114,13 +114,13 @@ static void calculate_curbe_offsets( struct brw_context *brw ) brw->curbe.total_size = reg; if (0) - _mesa_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n", - brw->curbe.wm_start, - brw->curbe.wm_size, - brw->curbe.clip_start, - brw->curbe.clip_size, - brw->curbe.vs_start, - brw->curbe.vs_size ); + printf("curbe wm %d+%d clip %d+%d vs %d+%d\n", + brw->curbe.wm_start, + brw->curbe.wm_size, + brw->curbe.clip_start, + brw->curbe.clip_size, + brw->curbe.vs_start, + brw->curbe.vs_size ); brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS; } @@ -198,7 +198,7 @@ static void prepare_constant_buffer(struct brw_context *brw) return; } - buf = (GLfloat *) _mesa_calloc(bufsz); + buf = (GLfloat *) calloc(1, bufsz); /* fragment shader constants */ if (brw->curbe.wm_size) { @@ -256,25 +256,36 @@ static void prepare_constant_buffer(struct brw_context *brw) */ _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); - /* XXX just use a memcpy here */ - for (i = 0; i < nr; i++) { - const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i]; - buf[offset + i * 4 + 0] = value[0]; - buf[offset + i * 4 + 1] = value[1]; - buf[offset + i * 4 + 2] = value[2]; - buf[offset + i * 4 + 3] = value[3]; + if (vp->use_const_buffer) { + /* Load the subset of push constants that will get used when + * we also have a pull constant buffer. + */ + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + assert(brw->vs.constant_map[i] <= nr); + memcpy(buf + offset + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } + } + } else { + for (i = 0; i < nr; i++) { + memcpy(buf + offset + i * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } } } if (0) { for (i = 0; i < sz*16; i+=4) - _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, - buf[i+0], buf[i+1], buf[i+2], buf[i+3]); + printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, + buf[i+0], buf[i+1], buf[i+2], buf[i+3]); - _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n", - brw->curbe.last_buf, buf, - bufsz, brw->curbe.last_bufsz, - brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); + printf("last_buf %p buf %p sz %d/%d cmp %d\n", + brw->curbe.last_buf, buf, + bufsz, brw->curbe.last_bufsz, + brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); } if (brw->curbe.curbe_bo != NULL && @@ -282,20 +293,20 @@ static void prepare_constant_buffer(struct brw_context *brw) bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { /* constants have not changed */ - _mesa_free(buf); + free(buf); } else { /* constants have changed */ if (brw->curbe.last_buf) - _mesa_free(brw->curbe.last_buf); + free(brw->curbe.last_buf); brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; if (brw->curbe.curbe_bo != NULL && - (brw->curbe.need_new_bo || - brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size)) + brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size) { + drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo); dri_bo_unreference(brw->curbe.curbe_bo); brw->curbe.curbe_bo = NULL; } @@ -307,6 +318,7 @@ static void prepare_constant_buffer(struct brw_context *brw) brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE", 4096, 1 << 6); brw->curbe.curbe_next_offset = 0; + drm_intel_gem_bo_map_gtt(brw->curbe.curbe_bo); } brw->curbe.curbe_offset = brw->curbe.curbe_next_offset; @@ -315,7 +327,9 @@ static void prepare_constant_buffer(struct brw_context *brw) /* Copy data to the buffer: */ - dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf); + memcpy(brw->curbe.curbe_bo->virtual + brw->curbe.curbe_offset, + buf, + bufsz); } brw_add_validated_bo(brw, brw->curbe.curbe_bo); @@ -340,7 +354,7 @@ static void emit_constant_buffer(struct brw_context *brw) struct intel_context *intel = &brw->intel; GLuint sz = brw->curbe.total_size; - BEGIN_BATCH(2, IGNORE_CLIPRECTS); + BEGIN_BATCH(2); if (sz == 0) { OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); OUT_BATCH(0); diff --git a/i965/brw_defines.h b/i965/brw_defines.h index c19510b..bb1b5f5 100644 --- a/i965/brw_defines.h +++ b/i965/brw_defines.h @@ -530,6 +530,7 @@ #define BRW_OPCODE_POP 47 #define BRW_OPCODE_WAIT 48 #define BRW_OPCODE_SEND 49 +#define BRW_OPCODE_MATH 56 #define BRW_OPCODE_ADD 64 #define BRW_OPCODE_MUL 65 #define BRW_OPCODE_AVG 66 @@ -727,7 +728,8 @@ #define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ #define BRW_MATH_FUNCTION_COS 7 /* was 8 */ #define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ -#define BRW_MATH_FUNCTION_TAN 9 +#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */ +#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */ #define BRW_MATH_FUNCTION_POW 10 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 @@ -778,17 +780,33 @@ #define CMD_PIPELINED_STATE_POINTERS 0x7800 #define CMD_BINDING_TABLE_PTRS 0x7801 +# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8) +# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9) +# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 10) + +#define CMD_3D_SAMPLER_STATE_POINTERS 0x7802 /* SNB+ */ +# define PS_SAMPLER_STATE_CHANGE (1 << 12) +# define GS_SAMPLER_STATE_CHANGE (1 << 9) +# define VS_SAMPLER_STATE_CHANGE (1 << 8) +/* DW1: VS */ +/* DW2: GS */ +/* DW3: PS */ #define CMD_VERTEX_BUFFER 0x7808 # define BRW_VB0_INDEX_SHIFT 27 +# define GEN6_VB0_INDEX_SHIFT 26 # define BRW_VB0_ACCESS_VERTEXDATA (0 << 26) # define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26) +# define GEN6_VB0_ACCESS_VERTEXDATA (0 << 20) +# define GEN6_VB0_ACCESS_INSTANCEDATA (1 << 20) # define BRW_VB0_PITCH_SHIFT 0 #define CMD_VERTEX_ELEMENT 0x7809 # define BRW_VE0_INDEX_SHIFT 27 +# define GEN6_VE0_INDEX_SHIFT 26 # define BRW_VE0_FORMAT_SHIFT 16 # define BRW_VE0_VALID (1 << 26) +# define GEN6_VE0_VALID (1 << 25) # define BRW_VE0_SRC_OFFSET_SHIFT 0 # define BRW_VE1_COMPONENT_NOSTORE 0 # define BRW_VE1_COMPONENT_STORE_SRC 1 @@ -805,8 +823,219 @@ # define BRW_VE1_DST_OFFSET_SHIFT 0 #define CMD_INDEX_BUFFER 0x780a -#define CMD_VF_STATISTICS_965 0x780b +#define CMD_VF_STATISTICS_965 0x780b #define CMD_VF_STATISTICS_GM45 0x680b +#define CMD_3D_CC_STATE_POINTERS 0x780e /* GEN6+ */ + +#define CMD_URB 0x7805 /* GEN6+ */ +# define GEN6_URB_VS_SIZE_SHIFT 16 +# define GEN6_URB_VS_ENTRIES_SHIFT 0 +# define GEN6_URB_GS_SIZE_SHIFT 8 +# define GEN6_URB_GS_ENTRIES_SHIFT 0 + +#define CMD_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */ +# define GEN6_CC_VIEWPORT_MODIFY (1 << 12) +# define GEN6_SF_VIEWPORT_MODIFY (1 << 11) +# define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10) + +#define CMD_3D_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */ + +#define CMD_3D_VS_STATE 0x7810 /* GEN6+ */ +/* DW2 */ +# define GEN6_VS_SPF_MODE (1 << 31) +# define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_VS_SAMPLER_COUNT_SHIFT 27 +# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW4 */ +# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20 +# define GEN6_VS_URB_READ_LENGTH_SHIFT 11 +# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW5 */ +# define GEN6_VS_MAX_THREADS_SHIFT 25 +# define GEN6_VS_STATISTICS_ENABLE (1 << 10) +# define GEN6_VS_CACHE_DISABLE (1 << 1) +# define GEN6_VS_ENABLE (1 << 0) + +#define CMD_3D_GS_STATE 0x7811 /* GEN6+ */ +/* DW2 */ +# define GEN6_GS_SPF_MODE (1 << 31) +# define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_GS_SAMPLER_COUNT_SHIFT 27 +# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW4 */ +# define GEN6_GS_URB_READ_LENGTH_SHIFT 11 +# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT 4 +# define GEN6_GS_DISPATCH_START_GRF_SHIFT 0 +/* DW5 */ +# define GEN6_GS_MAX_THREADS_SHIFT 25 +# define GEN6_GS_STATISTICS_ENABLE (1 << 10) +# define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9) +# define GEN6_GS_RENDERING_ENABLE (1 << 8) +/* DW6 */ +# define GEN6_GS_ENABLE (1 << 15) + +#define CMD_3D_CLIP_STATE 0x7812 /* GEN6+ */ +/* DW1 */ +# define GEN6_CLIP_STATISTICS_ENABLE (1 << 10) +/* DW2 */ +# define GEN6_CLIP_ENABLE (1 << 31) +# define GEN6_CLIP_API_OGL (0 << 30) +# define GEN6_CLIP_API_D3D (1 << 30) +# define GEN6_CLIP_XY_TEST (1 << 28) +# define GEN6_CLIP_Z_TEST (1 << 27) +# define GEN6_CLIP_GB_TEST (1 << 26) +# define GEN6_CLIP_MODE_NORMAL (0 << 13) +# define GEN6_CLIP_MODE_REJECT_ALL (3 << 13) +# define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13) +# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE (1 << 9) +# define GEN6_CLIP_BARYCENTRIC_ENABLE (1 << 8) +# define GEN6_CLIP_TRI_PROVOKE_SHIFT 4 +# define GEN6_CLIP_LINE_PROVOKE_SHIFT 2 +# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT 0 +/* DW3 */ +# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT 17 +# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6 + +#define CMD_3D_SF_STATE 0x7813 /* GEN6+ */ +/* DW1 */ +# define GEN6_SF_NUM_OUTPUTS_SHIFT 22 +# define GEN6_SF_SWIZZLE_ENABLE (1 << 21) +# define GEN6_SF_POINT_SPRITE_LOWERLEFT (1 << 20) +# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW2 */ +# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS (1 << 11) +# define GEN6_SF_STATISTICS_ENABLE (1 << 10) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID (1 << 9) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME (1 << 8) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT (1 << 7) +# define GEN6_SF_FRONT_SOLID (0 << 5) +# define GEN6_SF_FRONT_WIREFRAME (1 << 5) +# define GEN6_SF_FRONT_POINT (2 << 5) +# define GEN6_SF_BACK_SOLID (0 << 3) +# define GEN6_SF_BACK_WIREFRAME (1 << 3) +# define GEN6_SF_BACK_POINT (2 << 3) +# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE (1 << 1) +# define GEN6_SF_WINDING_CCW (1 << 0) +/* DW3 */ +# define GEN6_SF_LINE_AA_ENABLE (1 << 31) +# define GEN6_SF_CULL_BOTH (0 << 29) +# define GEN6_SF_CULL_NONE (1 << 29) +# define GEN6_SF_CULL_FRONT (2 << 29) +# define GEN6_SF_CULL_BACK (3 << 29) +# define GEN6_SF_LINE_WIDTH_SHIFT 18 /* U3.7 */ +# define GEN6_SF_LINE_END_CAP_WIDTH_0_5 (0 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_1_0 (1 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_2_0 (2 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_4_0 (3 << 16) +# define GEN6_SF_SCISSOR_ENABLE (1 << 11) +# define GEN6_SF_MSRAST_OFF_PIXEL (0 << 8) +# define GEN6_SF_MSRAST_OFF_PATTERN (1 << 8) +# define GEN6_SF_MSRAST_ON_PIXEL (2 << 8) +# define GEN6_SF_MSRAST_ON_PATTERN (3 << 8) +/* DW4 */ +# define GEN6_SF_TRI_PROVOKE_SHIFT 29 +# define GEN6_SF_LINE_PROVOKE_SHIFT 27 +# define GEN6_SF_TRIFAN_PROVOKE_SHIFT 25 +# define GEN6_SF_LINE_AA_MODE_MANHATTAN (0 << 14) +# define GEN6_SF_LINE_AA_MODE_TRUE (1 << 14) +# define GEN6_SF_VERTEX_SUBPIXEL_8BITS (0 << 12) +# define GEN6_SF_VERTEX_SUBPIXEL_4BITS (1 << 12) +# define GEN6_SF_USE_STATE_POINT_WIDTH (1 << 11) +# define GEN6_SF_POINT_WIDTH_SHIFT 0 /* U8.3 */ +/* DW5: depth offset constant */ +/* DW6: depth offset scale */ +/* DW7: depth offset clamp */ +/* DW8 */ +# define ATTRIBUTE_1_OVERRIDE_W (1 << 31) +# define ATTRIBUTE_1_OVERRIDE_Z (1 << 30) +# define ATTRIBUTE_1_OVERRIDE_Y (1 << 29) +# define ATTRIBUTE_1_OVERRIDE_X (1 << 28) +# define ATTRIBUTE_1_CONST_SOURCE_SHIFT 25 +# define ATTRIBUTE_1_SWIZZLE_SHIFT 22 +# define ATTRIBUTE_1_SOURCE_SHIFT 16 +# define ATTRIBUTE_0_OVERRIDE_W (1 << 15) +# define ATTRIBUTE_0_OVERRIDE_Z (1 << 14) +# define ATTRIBUTE_0_OVERRIDE_Y (1 << 13) +# define ATTRIBUTE_0_OVERRIDE_X (1 << 12) +# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9 +# define ATTRIBUTE_0_SWIZZLE_SHIFT 6 +# define ATTRIBUTE_0_SOURCE_SHIFT 0 +/* DW16: Point sprite texture coordinate enables */ +/* DW17: Constant interpolation enables */ +/* DW18: attr 0-7 wrap shortest enables */ +/* DW19: attr 8-16 wrap shortest enables */ + +#define CMD_3D_WM_STATE 0x7814 /* GEN6+ */ +/* DW1: kernel pointer */ +/* DW2 */ +# define GEN6_WM_SPF_MODE (1 << 31) +# define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_WM_SAMPLER_COUNT_SHIFT 27 +# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW3: scratch space */ +/* DW4 */ +# define GEN6_WM_STATISTICS_ENABLE (1 << 31) +# define GEN6_WM_DEPTH_CLEAR (1 << 30) +# define GEN6_WM_DEPTH_RESOLVE (1 << 28) +# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0 16 +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1 8 +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2 0 +/* DW5 */ +# define GEN6_WM_MAX_THREADS_SHIFT 25 +# define GEN6_WM_KILL_ENABLE (1 << 22) +# define GEN6_WM_COMPUTED_DEPTH (1 << 21) +# define GEN6_WM_USES_SOURCE_DEPTH (1 << 20) +# define GEN6_WM_DISPATCH_ENABLE (1 << 19) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 16) +# define GEN6_WM_LINE_AA_WIDTH_0_5 (0 << 14) +# define GEN6_WM_LINE_AA_WIDTH_1_0 (1 << 14) +# define GEN6_WM_LINE_AA_WIDTH_2_0 (2 << 14) +# define GEN6_WM_LINE_AA_WIDTH_4_0 (3 << 14) +# define GEN6_WM_POLYGON_STIPPLE_ENABLE (1 << 13) +# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 12) +# define GEN6_WM_OMASK_TO_RENDER_TARGET (1 << 9) +# define GEN6_WM_USES_SOURCE_W (1 << 8) +# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7) +# define GEN6_WM_32_DISPATCH_ENABLE (1 << 2) +# define GEN6_WM_16_DISPATCH_ENABLE (1 << 1) +# define GEN6_WM_8_DISPATCH_ENABLE (1 << 0) +/* DW6 */ +# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT 20 +# define GEN6_WM_POSOFFSET_NONE (0 << 18) +# define GEN6_WM_POSOFFSET_CENTROID (2 << 18) +# define GEN6_WM_POSOFFSET_SAMPLE (3 << 18) +# define GEN6_WM_POSITION_ZW_PIXEL (0 << 16) +# define GEN6_WM_POSITION_ZW_CENTROID (2 << 16) +# define GEN6_WM_POSITION_ZW_SAMPLE (3 << 16) +# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15) +# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14) +# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13) +# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12) +# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11) +# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10) +# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 9) +# define GEN6_WM_MSRAST_OFF_PIXEL (0 << 1) +# define GEN6_WM_MSRAST_OFF_PATTERN (1 << 1) +# define GEN6_WM_MSRAST_ON_PIXEL (2 << 1) +# define GEN6_WM_MSRAST_ON_PATTERN (3 << 1) +# define GEN6_WM_MSDISPMODE_PERPIXEL (1 << 0) +/* DW7: kernel 1 pointer */ +/* DW8: kernel 2 pointer */ + +#define CMD_3D_CONSTANT_VS_STATE 0x7815 /* GEN6+ */ +#define CMD_3D_CONSTANT_GS_STATE 0x7816 /* GEN6+ */ +#define CMD_3D_CONSTANT_PS_STATE 0x7817 /* GEN6+ */ +# define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15) +# define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14) +# define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13) +# define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12) + +#define CMD_3D_SAMPLE_MASK 0x7818 /* GEN6+ */ #define CMD_DRAW_RECT 0x7900 #define CMD_BLEND_CONSTANT_COLOR 0x7901 @@ -818,6 +1047,25 @@ #define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 #define CMD_AA_LINE_PARAMETERS 0x790a +#define CMD_GS_SVB_INDEX 0x790b /* CTG+ */ +/* DW1 */ +# define SVB_INDEX_SHIFT 29 +# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */ +/* DW2: SVB index */ +/* DW3: SVB maximum index */ + +#define CMD_3D_MULTISAMPLE 0x790d /* SNB+ */ +/* DW1 */ +# define MS_PIXEL_LOCATION_CENTER (0 << 4) +# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4) +# define MS_NUMSAMPLES_1 (0 << 1) +# define MS_NUMSAMPLES_4 (2 << 1) +# define MS_NUMSAMPLES_8 (3 << 1) + +#define CMD_3D_CLEAR_PARAMS 0x7910 /* ILK+ */ +# define DEPTH_CLEAR_VALID (1 << 15) +/* DW1: depth clear value */ + #define CMD_PIPE_CONTROL 0x7a00 #define CMD_3D_PRIM 0x7b00 @@ -832,12 +1080,4 @@ #include "intel_chipset.h" -#define BRW_IS_G4X(brw) (IS_G4X((brw)->intel.intelScreen->deviceID)) -#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->intel.intelScreen->deviceID)) -#define BRW_IS_965(brw) (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) -#define CMD_PIPELINE_SELECT(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965) -#define CMD_VF_STATISTICS(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965) -#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \ - (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */ - #endif diff --git a/i965/brw_disasm.c b/i965/brw_disasm.c index 9fef230..a8f6b99 100644 --- a/i965/brw_disasm.c +++ b/i965/brw_disasm.c @@ -239,7 +239,7 @@ char *imm_encoding[8] = { [2] = "UW", [3] = "W", [5] = "VF", - [5] = "V", + [6] = "V", [7] = "F" }; @@ -365,6 +365,7 @@ static int format (FILE *f, char *format, ...) va_start (args, format); vsnprintf (buf, sizeof (buf) - 1, format, args); + va_end (args); string (f, buf); return 0; } diff --git a/i965/brw_draw.c b/i965/brw_draw.c index 8bcb608..e348d46 100644 --- a/i965/brw_draw.c +++ b/i965/brw_draw.c @@ -39,10 +39,8 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_state.h" -#include "brw_fallback.h" #include "intel_batchbuffer.h" -#include "intel_buffer_objects.h" #define FILE_DEBUG_FLAG DEBUG_BATCH @@ -84,7 +82,7 @@ static GLuint brw_set_prim(struct brw_context *brw, GLenum prim) GLcontext *ctx = &brw->intel.ctx; if (INTEL_DEBUG & DEBUG_PRIMS) - _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim)); + printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim)); /* Slight optimization to avoid the GS program when not needed: */ @@ -127,7 +125,7 @@ static void brw_emit_prim(struct brw_context *brw, struct intel_context *intel = &brw->intel; if (INTEL_DEBUG & DEBUG_PRIMS) - _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), + printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), prim->start, prim->count); prim_packet.header.opcode = CMD_3D_PRIM; @@ -145,7 +143,7 @@ static void brw_emit_prim(struct brw_context *brw, prim_packet.base_vert_location = prim->basevertex; /* Can't wrap here, since we rely on the validated state. */ - brw->no_batch_wrap = GL_TRUE; + intel->no_batch_wrap = GL_TRUE; /* If we're set to always flush, do it before and after the primitive emit. * We want to catch both missed flushes that hurt instruction/state cache @@ -157,13 +155,13 @@ static void brw_emit_prim(struct brw_context *brw, } if (prim_packet.verts_per_instance) { intel_batchbuffer_data( brw->intel.batch, &prim_packet, - sizeof(prim_packet), LOOP_CLIPRECTS); + sizeof(prim_packet)); } if (intel->always_flush_cache) { intel_batchbuffer_emit_mi_flush(intel->batch); } - brw->no_batch_wrap = GL_FALSE; + intel->no_batch_wrap = GL_FALSE; } static void brw_merge_inputs( struct brw_context *brw, @@ -339,12 +337,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, * so can't access it earlier. */ - LOCK_HARDWARE(intel); - - if (!intel->constant_cliprect && intel->driDrawable->numClipRects == 0) { - UNLOCK_HARDWARE(intel); - return GL_TRUE; - } + intel_prepare_render(intel); for (i = 0; i < nr_prims; i++) { uint32_t hw_prim; @@ -356,8 +349,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, * an upper bound of how much we might emit in a single * brw_try_draw_prims(). */ - intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4, - LOOP_CLIPRECTS); + intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4); hw_prim = brw_set_prim(brw, prim[i].mode); @@ -404,7 +396,6 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, if (intel->always_flush_batch) intel_batchbuffer_flush(intel->batch); out: - UNLOCK_HARDWARE(intel); brw_state_cache_check_size(brw); diff --git a/i965/brw_draw_upload.c b/i965/brw_draw_upload.c index ee684f6..71a4357 100644 --- a/i965/brw_draw_upload.c +++ b/i965/brw_draw_upload.c @@ -29,19 +29,15 @@ #include "main/glheader.h" #include "main/bufferobj.h" #include "main/context.h" -#include "main/state.h" -/* #include "main/api_validate.h" */ #include "main/enums.h" #include "brw_draw.h" #include "brw_defines.h" #include "brw_context.h" #include "brw_state.h" -#include "brw_fallback.h" #include "intel_batchbuffer.h" #include "intel_buffer_objects.h" -#include "intel_tex.h" static GLuint double_types[5] = { 0, @@ -59,6 +55,14 @@ static GLuint float_types[5] = { BRW_SURFACEFORMAT_R32G32B32A32_FLOAT }; +static GLuint half_float_types[5] = { + 0, + BRW_SURFACEFORMAT_R16_FLOAT, + BRW_SURFACEFORMAT_R16G16_FLOAT, + 0, /* can't seem to render this one */ + BRW_SURFACEFORMAT_R16G16B16A16_FLOAT +}; + static GLuint uint_types_norm[5] = { 0, BRW_SURFACEFORMAT_R32_UNORM, @@ -165,13 +169,14 @@ static GLuint get_surface_type( GLenum type, GLuint size, GLenum format, GLboolean normalized ) { if (INTEL_DEBUG & DEBUG_VERTS) - _mesa_printf("type %s size %d normalized %d\n", + printf("type %s size %d normalized %d\n", _mesa_lookup_enum_by_nr(type), size, normalized); if (normalized) { switch (type) { case GL_DOUBLE: return double_types[size]; case GL_FLOAT: return float_types[size]; + case GL_HALF_FLOAT: return half_float_types[size]; case GL_INT: return int_types_norm[size]; case GL_SHORT: return short_types_norm[size]; case GL_BYTE: return byte_types_norm[size]; @@ -194,6 +199,7 @@ static GLuint get_surface_type( GLenum type, GLuint size, switch (type) { case GL_DOUBLE: return double_types[size]; case GL_FLOAT: return float_types[size]; + case GL_HALF_FLOAT: return half_float_types[size]; case GL_INT: return int_types_scale[size]; case GL_SHORT: return short_types_scale[size]; case GL_BYTE: return byte_types_scale[size]; @@ -211,6 +217,7 @@ static GLuint get_size( GLenum type ) switch (type) { case GL_DOUBLE: return sizeof(GLdouble); case GL_FLOAT: return sizeof(GLfloat); + case GL_HALF_FLOAT: return sizeof(GLhalfARB); case GL_INT: return sizeof(GLint); case GL_SHORT: return sizeof(GLshort); case GL_BYTE: return sizeof(GLbyte); @@ -243,14 +250,6 @@ static void wrap_buffers( struct brw_context *brw, dri_bo_unreference(brw->vb.upload.bo); brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO", size, 1); - - /* Set the internal VBO\ to no-backing-store. We only use them as a - * temporary within a brw_try_draw_prims while the lock is held. - */ - /* DON'T DO THIS AS IF WE HAVE TO RE-ORG MEMORY WE NEED SOMEWHERE WITH - FAKE TO PUSH THIS STUFF */ -// if (!brw->intel.ttm) -// dri_bo_fake_disable_backing_store(brw->vb.upload.bo, NULL, NULL); } static void get_space( struct brw_context *brw, @@ -277,7 +276,6 @@ copy_array_to_vbo_array( struct brw_context *brw, struct brw_vertex_element *element, GLuint dst_stride) { - struct intel_context *intel = &brw->intel; GLuint size = element->count * dst_stride; get_space(brw, size, &element->bo, &element->offset); @@ -290,52 +288,26 @@ copy_array_to_vbo_array( struct brw_context *brw, } if (dst_stride == element->glarray->StrideB) { - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(element->bo); - memcpy((char *)element->bo->virtual + element->offset, - element->glarray->Ptr, size); - drm_intel_gem_bo_unmap_gtt(element->bo); - } else { - dri_bo_subdata(element->bo, - element->offset, - size, - element->glarray->Ptr); - } + drm_intel_gem_bo_map_gtt(element->bo); + memcpy((char *)element->bo->virtual + element->offset, + element->glarray->Ptr, size); + drm_intel_gem_bo_unmap_gtt(element->bo); } else { char *dest; const unsigned char *src = element->glarray->Ptr; int i; - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(element->bo); - dest = element->bo->virtual; - dest += element->offset; - - for (i = 0; i < element->count; i++) { - memcpy(dest, src, dst_stride); - src += element->glarray->StrideB; - dest += dst_stride; - } - - drm_intel_gem_bo_unmap_gtt(element->bo); - } else { - void *data; - - data = _mesa_malloc(dst_stride * element->count); - dest = data; - for (i = 0; i < element->count; i++) { - memcpy(dest, src, dst_stride); - src += element->glarray->StrideB; - dest += dst_stride; - } - - dri_bo_subdata(element->bo, - element->offset, - size, - data); + drm_intel_gem_bo_map_gtt(element->bo); + dest = element->bo->virtual; + dest += element->offset; - _mesa_free(data); + for (i = 0; i < element->count; i++) { + memcpy(dest, src, dst_stride); + src += element->glarray->StrideB; + dest += dst_stride; } + + drm_intel_gem_bo_unmap_gtt(element->bo); } } @@ -356,7 +328,7 @@ static void brw_prepare_vertices(struct brw_context *brw) /* First build an array of pointers to ve's in vb.inputs_read */ if (0) - _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); + printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ brw->vb.nr_enabled = 0; @@ -502,12 +474,19 @@ static void brw_emit_vertices(struct brw_context *brw) * a VE loads from them. */ if (brw->vb.nr_enabled == 0) { - BEGIN_BATCH(3, IGNORE_CLIPRECTS); + BEGIN_BATCH(3); OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1); - OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | - BRW_VE0_VALID | - (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + if (IS_GEN6(intel->intelScreen->deviceID)) { + OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | + GEN6_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + } else { + OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + } OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | @@ -522,20 +501,28 @@ static void brw_emit_vertices(struct brw_context *brw) * are interleaved or from the same VBO. TBD if this makes a * performance difference. */ - BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS); + BEGIN_BATCH(1 + brw->vb.nr_enabled * 4); OUT_BATCH((CMD_VERTEX_BUFFER << 16) | ((1 + brw->vb.nr_enabled * 4) - 2)); for (i = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; + uint32_t dw0; + + if (intel->gen >= 6) { + dw0 = GEN6_VB0_ACCESS_VERTEXDATA | + (i << GEN6_VB0_INDEX_SHIFT); + } else { + dw0 = BRW_VB0_ACCESS_VERTEXDATA | + (i << BRW_VB0_INDEX_SHIFT); + } - OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) | - BRW_VB0_ACCESS_VERTEXDATA | + OUT_BATCH(dw0 | (input->stride << BRW_VB0_PITCH_SHIFT)); OUT_RELOC(input->bo, I915_GEM_DOMAIN_VERTEX, 0, input->offset); - if (BRW_IS_IGDNG(brw)) { + if (intel->is_ironlake || intel->gen >= 6) { OUT_RELOC(input->bo, I915_GEM_DOMAIN_VERTEX, 0, input->bo->size - 1); @@ -545,7 +532,7 @@ static void brw_emit_vertices(struct brw_context *brw) } ADVANCE_BATCH(); - BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS); + BEGIN_BATCH(1 + brw->vb.nr_enabled * 2); OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2)); for (i = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; @@ -566,12 +553,19 @@ static void brw_emit_vertices(struct brw_context *brw) break; } - OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) | - BRW_VE0_VALID | - (format << BRW_VE0_FORMAT_SHIFT) | - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + if (IS_GEN6(intel->intelScreen->deviceID)) { + OUT_BATCH((i << GEN6_VE0_INDEX_SHIFT) | + GEN6_VE0_VALID | + (format << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + } else { + OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (format << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + } - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake || intel->gen >= 6) OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | @@ -625,13 +619,9 @@ static void brw_prepare_indices(struct brw_context *brw) /* Straight upload */ - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(bo); - memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); - drm_intel_gem_bo_unmap_gtt(bo); - } else { - dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); - } + drm_intel_gem_bo_map_gtt(bo); + memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); + drm_intel_gem_bo_unmap_gtt(bo); } else { offset = (GLuint) (unsigned long) index_buffer->ptr; brw->ib.start_vertex_offset = 0; @@ -712,7 +702,7 @@ static void brw_emit_index_buffer(struct brw_context *brw) ib.header.bits.index_format = get_index_type(index_buffer->type); ib.header.bits.cut_index_enable = 0; - BEGIN_BATCH(4, IGNORE_CLIPRECTS); + BEGIN_BATCH(4); OUT_BATCH( ib.header.dword ); OUT_RELOC(brw->ib.bo, I915_GEM_DOMAIN_VERTEX, 0, diff --git a/i965/brw_eu.c b/i965/brw_eu.c index 1df5613..4e7c122 100644 --- a/i965/brw_eu.c +++ b/i965/brw_eu.c @@ -237,7 +237,7 @@ brw_resolve_cals(struct brw_compile *c) struct brw_glsl_call *call, *next; for (call = c->first_call; call; call = next) { next = call->next; - _mesa_free(call); + free(call); } c->first_call = NULL; } @@ -247,7 +247,7 @@ brw_resolve_cals(struct brw_compile *c) struct brw_glsl_label *label, *next; for (label = c->first_label; label; label = next) { next = label->next; - _mesa_free(label); + free(label); } c->first_label = NULL; } diff --git a/i965/brw_eu_debug.c b/i965/brw_eu_debug.c index 29f3f6d..99453af 100644 --- a/i965/brw_eu_debug.c +++ b/i965/brw_eu_debug.c @@ -54,9 +54,9 @@ void brw_print_reg( struct brw_reg hwreg ) "f" }; - _mesa_printf("%s%s", - hwreg.abs ? "abs/" : "", - hwreg.negate ? "-" : ""); + printf("%s%s", + hwreg.abs ? "abs/" : "", + hwreg.negate ? "-" : ""); if (hwreg.file == BRW_GENERAL_REGISTER_FILE && hwreg.nr % 2 == 0 && @@ -66,7 +66,7 @@ void brw_print_reg( struct brw_reg hwreg ) hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && hwreg.type == BRW_REGISTER_TYPE_F) { /* vector register */ - _mesa_printf("vec%d", hwreg.nr); + printf("vec%d", hwreg.nr); } else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && hwreg.vstride == BRW_VERTICAL_STRIDE_0 && @@ -74,13 +74,13 @@ void brw_print_reg( struct brw_reg hwreg ) hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && hwreg.type == BRW_REGISTER_TYPE_F) { /* "scalar" register */ - _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); + printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); } else if (hwreg.file == BRW_IMMEDIATE_VALUE) { - _mesa_printf("imm %f", hwreg.dw1.f); + printf("imm %f", hwreg.dw1.f); } else { - _mesa_printf("%s%d.%d<%d;%d,%d>:%s", + printf("%s%d.%d<%d;%d,%d>:%s", file[hwreg.file], hwreg.nr, hwreg.subnr / type_sz(hwreg.type), diff --git a/i965/brw_eu_emit.c b/i965/brw_eu_emit.c index 7ceabba..f69d529 100644 --- a/i965/brw_eu_emit.c +++ b/i965/brw_eu_emit.c @@ -102,8 +102,6 @@ static void brw_set_dest( struct brw_instruction *insn, static void brw_set_src0( struct brw_instruction *insn, struct brw_reg reg ) { - assert(reg.file != BRW_MESSAGE_REGISTER_FILE); - if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) assert(reg.nr < 128); @@ -199,7 +197,7 @@ void brw_set_src1( struct brw_instruction *insn, * in the future: */ assert (reg.address_mode == BRW_ADDRESS_DIRECT); - //assert (reg.file == BRW_GENERAL_REGISTER_FILE); + /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ if (insn->header.access_mode == BRW_ALIGN_1) { insn->bits3.da1.src1_subreg_nr = reg.subnr; @@ -252,9 +250,10 @@ static void brw_set_math_message( struct brw_context *brw, GLboolean saturate, GLuint dataType ) { + struct intel_context *intel = &brw->intel; brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { + if (intel->is_ironlake) { insn->bits3.math_igdng.function = function; insn->bits3.math_igdng.int_type = integer_type; insn->bits3.math_igdng.precision = low_precision; @@ -319,9 +318,10 @@ static void brw_set_urb_message( struct brw_context *brw, GLuint offset, GLuint swizzle_control ) { + struct intel_context *intel = &brw->intel; brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { + if (intel->is_ironlake || intel->gen >= 6) { insn->bits3.urb_igdng.opcode = 0; /* ? */ insn->bits3.urb_igdng.offset = offset; insn->bits3.urb_igdng.swizzle_control = swizzle_control; @@ -332,8 +332,16 @@ static void brw_set_urb_message( struct brw_context *brw, insn->bits3.urb_igdng.response_length = response_length; insn->bits3.urb_igdng.msg_length = msg_length; insn->bits3.urb_igdng.end_of_thread = end_of_thread; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; - insn->bits2.send_igdng.end_of_thread = end_of_thread; + if (intel->gen >= 6) { + /* For SNB, the SFID bits moved to the condmod bits, and + * EOT stayed in bits3 above. Does the EOT bit setting + * below on Ironlake even do anything? + */ + insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; + } else { + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } } else { insn->bits3.urb.opcode = 0; /* ? */ insn->bits3.urb.offset = offset; @@ -358,9 +366,10 @@ static void brw_set_dp_write_message( struct brw_context *brw, GLuint response_length, GLuint end_of_thread ) { + struct intel_context *intel = &brw->intel; brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { + if (intel->is_ironlake) { insn->bits3.dp_write_igdng.binding_table_index = binding_table_index; insn->bits3.dp_write_igdng.msg_control = msg_control; insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear; @@ -395,9 +404,10 @@ static void brw_set_dp_read_message( struct brw_context *brw, GLuint response_length, GLuint end_of_thread ) { + struct intel_context *intel = &brw->intel; brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { + if (intel->is_ironlake) { insn->bits3.dp_read_igdng.binding_table_index = binding_table_index; insn->bits3.dp_read_igdng.msg_control = msg_control; insn->bits3.dp_read_igdng.msg_type = msg_type; @@ -433,10 +443,11 @@ static void brw_set_sampler_message(struct brw_context *brw, GLuint header_present, GLuint simd_mode) { + struct intel_context *intel = &brw->intel; assert(eot == 0); brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { + if (intel->is_ironlake) { insn->bits3.sampler_igdng.binding_table_index = binding_table_index; insn->bits3.sampler_igdng.sampler = sampler; insn->bits3.sampler_igdng.msg_type = msg_type; @@ -447,7 +458,7 @@ static void brw_set_sampler_message(struct brw_context *brw, insn->bits3.sampler_igdng.end_of_thread = eot; insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER; insn->bits2.send_igdng.end_of_thread = eot; - } else if (BRW_IS_G4X(brw)) { + } else if (intel->is_g4x) { insn->bits3.sampler_g4x.binding_table_index = binding_table_index; insn->bits3.sampler_g4x.sampler = sampler; insn->bits3.sampler_g4x.msg_type = msg_type; @@ -648,10 +659,11 @@ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) struct brw_instruction *brw_ELSE(struct brw_compile *p, struct brw_instruction *if_insn) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; GLuint br = 1; - if (BRW_IS_IGDNG(p->brw)) + if (intel->is_ironlake) br = 2; if (p->single_program_flow) { @@ -690,9 +702,10 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, void brw_ENDIF(struct brw_compile *p, struct brw_instruction *patch_insn) { + struct intel_context *intel = &p->brw->intel; GLuint br = 1; - if (BRW_IS_IGDNG(p->brw)) + if (intel->is_ironlake) br = 2; if (p->single_program_flow) { @@ -803,10 +816,11 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *do_insn) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; GLuint br = 1; - if (BRW_IS_IGDNG(p->brw)) + if (intel->is_ironlake) br = 2; if (p->single_program_flow) @@ -846,14 +860,15 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, void brw_land_fwd_jump(struct brw_compile *p, struct brw_instruction *jmp_insn) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *landing = &p->store[p->nr_insn]; GLuint jmpi = 1; - if (BRW_IS_IGDNG(p->brw)) + if (intel->is_ironlake) jmpi = 2; assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); - assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); + assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); } @@ -908,26 +923,40 @@ void brw_math( struct brw_compile *p, GLuint data_type, GLuint precision ) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; - GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + struct intel_context *intel = &p->brw->intel; - /* Example code doesn't set predicate_control for send - * instructions. - */ - insn->header.predicate_control = 0; - insn->header.destreg__conditionalmod = msg_reg_nr; + if (intel->gen >= 6) { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); - brw_set_dest(insn, dest); - brw_set_src0(insn, src); - brw_set_math_message(p->brw, - insn, - msg_length, response_length, - function, - BRW_MATH_INTEGER_UNSIGNED, - precision, - saturate, - data_type); + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_src1(insn, brw_null_reg()); + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + /* Example code doesn't set predicate_control for send + * instructions. + */ + insn->header.predicate_control = 0; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_math_message(p->brw, + insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + data_type); + } } /** @@ -1263,7 +1292,7 @@ void brw_SAMPLE(struct brw_compile *p, GLboolean need_stall = 0; if (writemask == 0) { - /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ + /*printf("%s: zero writemask??\n", __FUNCTION__); */ return; } @@ -1295,7 +1324,7 @@ void brw_SAMPLE(struct brw_compile *p, if (newmask != writemask) { need_stall = 1; - /* _mesa_printf("need stall %x %x\n", newmask , writemask); */ + /* printf("need stall %x %x\n", newmask , writemask); */ } else { struct brw_reg m1 = brw_message_reg(msg_reg_nr); @@ -1368,7 +1397,18 @@ void brw_urb_WRITE(struct brw_compile *p, GLuint offset, GLuint swizzle) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn; + + /* Sandybridge doesn't have the implied move for SENDs, + * and the first message register index comes from src0. + */ + if (intel->gen >= 6) { + brw_MOV(p, brw_message_reg(msg_reg_nr), src0); + src0 = brw_message_reg(msg_reg_nr); + } + + insn = next_insn(p, BRW_OPCODE_SEND); assert(msg_length < BRW_MAX_MRF); @@ -1376,7 +1416,8 @@ void brw_urb_WRITE(struct brw_compile *p, brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); - insn->header.destreg__conditionalmod = msg_reg_nr; + if (intel->gen < 6) + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_urb_message(p->brw, insn, diff --git a/i965/brw_fallback.c b/i965/brw_fallback.c index 562a178..ba401c2 100644 --- a/i965/brw_fallback.c +++ b/i965/brw_fallback.c @@ -36,18 +36,13 @@ #include "swrast/swrast.h" #include "tnl/tnl.h" #include "brw_context.h" -#include "brw_fallback.h" -#include "intel_chipset.h" #include "intel_fbo.h" #include "intel_regions.h" -#include "glapi/glapi.h" - #define FILE_DEBUG_FLAG DEBUG_FALLBACKS static GLboolean do_check_fallback(struct brw_context *brw) { - struct intel_context *intel = &brw->intel; GLcontext *ctx = &brw->intel.ctx; GLuint i; @@ -86,8 +81,7 @@ static GLboolean do_check_fallback(struct brw_context *brw) } /* _NEW_BUFFERS */ - if (IS_965(intel->intelScreen->deviceID) && - !IS_G4X(intel->intelScreen->deviceID)) { + if (!brw->has_surface_tile_offset) { for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; struct intel_renderbuffer *irb = intel_renderbuffer(rb); diff --git a/i965/brw_gs.c b/i965/brw_gs.c index 610b6c3..7261b31 100644 --- a/i965/brw_gs.c +++ b/i965/brw_gs.c @@ -47,6 +47,7 @@ static void compile_gs_prog( struct brw_context *brw, struct brw_gs_prog_key *key ) { + struct intel_context *intel = &brw->intel; struct brw_gs_compile c; const GLuint *program; GLuint program_size; @@ -54,13 +55,12 @@ static void compile_gs_prog( struct brw_context *brw, memset(&c, 0, sizeof(c)); c.key = *key; - c.need_ff_sync = BRW_IS_IGDNG(brw); /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.nr_attrs = brw_count_bits(c.key.attrs); - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ else c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ @@ -125,12 +125,13 @@ static void compile_gs_prog( struct brw_context *brw, /* Upload */ dri_bo_unreference(brw->gs.prog_bo); - brw->gs.prog_bo = brw_upload_cache( &brw->cache, BRW_GS_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - &brw->gs.prog_data ); + brw->gs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_GS_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + sizeof(c.prog_data), + &brw->gs.prog_data); } static const GLenum gs_prim[GL_POLYGON+1] = { diff --git a/i965/brw_gs.h b/i965/brw_gs.h index 010c1c2..813b8d4 100644 --- a/i965/brw_gs.h +++ b/i965/brw_gs.h @@ -63,7 +63,6 @@ struct brw_gs_compile { GLuint nr_attrs; GLuint nr_regs; GLuint nr_bytes; - GLboolean need_ff_sync; }; #define ATTR_SIZE (4*4) diff --git a/i965/brw_gs_emit.c b/i965/brw_gs_emit.c index 0fc5b02..dd7b057 100644 --- a/i965/brw_gs_emit.c +++ b/i965/brw_gs_emit.c @@ -40,7 +40,6 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" -#include "brw_util.h" #include "brw_gs.h" static void brw_gs_alloc_regs( struct brw_gs_compile *c, @@ -122,12 +121,14 @@ static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim) void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key ) { + struct intel_context *intel = &c->func.brw->intel; + brw_gs_alloc_regs(c, 4); /* Use polygons for correct edgeflag behaviour. Note that vertex 3 * is the PV for quads, but vertex 0 for polygons: */ - if (c->need_ff_sync) + if (intel->needs_ff_sync) brw_gs_ff_sync(c, 1); if (key->pv_first) { brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); @@ -145,9 +146,11 @@ void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key ) void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key ) { + struct intel_context *intel = &c->func.brw->intel; + brw_gs_alloc_regs(c, 4); - if (c->need_ff_sync) + if (intel->needs_ff_sync) brw_gs_ff_sync(c, 1); if (key->pv_first) { brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); @@ -165,9 +168,11 @@ void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key ) void brw_gs_tris( struct brw_gs_compile *c ) { + struct intel_context *intel = &c->func.brw->intel; + brw_gs_alloc_regs(c, 3); - if (c->need_ff_sync) + if (intel->needs_ff_sync) brw_gs_ff_sync(c, 1); brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START)); brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2)); @@ -176,9 +181,11 @@ void brw_gs_tris( struct brw_gs_compile *c ) void brw_gs_lines( struct brw_gs_compile *c ) { + struct intel_context *intel = &c->func.brw->intel; + brw_gs_alloc_regs(c, 2); - if (c->need_ff_sync) + if (intel->needs_ff_sync) brw_gs_ff_sync(c, 1); brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START)); brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END)); @@ -186,9 +193,11 @@ void brw_gs_lines( struct brw_gs_compile *c ) void brw_gs_points( struct brw_gs_compile *c ) { + struct intel_context *intel = &c->func.brw->intel; + brw_gs_alloc_regs(c, 1); - if (c->need_ff_sync) + if (intel->needs_ff_sync) brw_gs_ff_sync(c, 1); brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END)); } diff --git a/i965/brw_gs_state.c b/i965/brw_gs_state.c index ed9d2ff..d8ad5ce 100644 --- a/i965/brw_gs_state.c +++ b/i965/brw_gs_state.c @@ -34,7 +34,6 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "main/macros.h" struct brw_gs_unit_key { unsigned int total_grf; @@ -72,6 +71,7 @@ gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key) static dri_bo * gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) { + struct intel_context *intel = &brw->intel; struct brw_gs_unit_state gs; dri_bo *bo; @@ -98,7 +98,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) else gs.thread4.max_threads = 0; - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) gs.thread4.rendering_enable = 1; if (INTEL_DEBUG & DEBUG_STATS) @@ -107,8 +107,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT, key, sizeof(*key), &brw->gs.prog_bo, 1, - &gs, sizeof(gs), - NULL, NULL); + &gs, sizeof(gs)); if (key->prog_active) { /* Emit GS program relocation */ diff --git a/i965/brw_misc_state.c b/i965/brw_misc_state.c index 4b0d598..d030ed4 100644 --- a/i965/brw_misc_state.c +++ b/i965/brw_misc_state.c @@ -78,10 +78,7 @@ static void upload_drawing_rect(struct brw_context *brw) struct intel_context *intel = &brw->intel; GLcontext *ctx = &intel->ctx; - if (!intel->constant_cliprect) - return; - - BEGIN_BATCH(4, NO_LOOP_CLIPRECTS); + BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965); OUT_BATCH(0); /* xmin, ymin */ OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) | @@ -116,7 +113,7 @@ static void upload_binding_table_pointers(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - BEGIN_BATCH(6, IGNORE_CLIPRECTS); + BEGIN_BATCH(6); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); if (brw->vs.bind_bo != NULL) OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ @@ -139,6 +136,41 @@ const struct brw_tracked_state brw_binding_table_pointers = { .emit = upload_binding_table_pointers, }; +/** + * Upload the binding table pointers, which point each stage's array of surface + * state pointers. + * + * The binding table pointers are relative to the surface state base address, + * which is 0. + */ +static void upload_gen6_binding_table_pointers(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(4); + OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | + GEN6_BINDING_TABLE_MODIFY_VS | + GEN6_BINDING_TABLE_MODIFY_GS | + GEN6_BINDING_TABLE_MODIFY_PS | + (4 - 2)); + if (brw->vs.bind_bo != NULL) + OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ + else + OUT_BATCH(0); + OUT_BATCH(0); /* gs */ + OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */ + ADVANCE_BATCH(); +} + +const struct brw_tracked_state gen6_binding_table_pointers = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = CACHE_NEW_SURF_BIND, + }, + .prepare = prepare_binding_table_pointers, + .emit = upload_gen6_binding_table_pointers, +}; /** * Upload pointers to the per-stage state. @@ -150,7 +182,7 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) { struct intel_context *intel = &brw->intel; - BEGIN_BATCH(7, IGNORE_CLIPRECTS); + BEGIN_BATCH(7); OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2)); OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); if (brw->gs.prog_active) @@ -212,10 +244,17 @@ static void emit_depthbuffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct intel_region *region = brw->state.depth_region; - unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5; + unsigned int len; + + if (intel->gen >= 6) + len = 7; + else if (intel->is_g4x || intel->is_ironlake) + len = 6; + else + len = 5; if (region == NULL) { - BEGIN_BATCH(len, IGNORE_CLIPRECTS); + BEGIN_BATCH(len); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | (BRW_SURFACE_NULL << 29)); @@ -223,9 +262,12 @@ static void emit_depthbuffer(struct brw_context *brw) OUT_BATCH(0); OUT_BATCH(0); - if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + if (intel->is_g4x || intel->is_ironlake || intel->gen >= 6) OUT_BATCH(0); + if (intel->gen >= 6) + OUT_BATCH(0); + ADVANCE_BATCH(); } else { unsigned int format; @@ -246,8 +288,10 @@ static void emit_depthbuffer(struct brw_context *brw) } assert(region->tiling != I915_TILING_X); + if (IS_GEN6(intel->intelScreen->deviceID)) + assert(region->tiling != I915_TILING_NONE); - BEGIN_BATCH(len, IGNORE_CLIPRECTS); + BEGIN_BATCH(len); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH(((region->pitch * region->cpp) - 1) | (format << 18) | @@ -262,9 +306,20 @@ static void emit_depthbuffer(struct brw_context *brw) ((region->height - 1) << 19)); OUT_BATCH(0); - if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + if (intel->is_g4x || intel->is_ironlake || intel->gen >= 6) OUT_BATCH(0); + if (intel->gen >= 6) + OUT_BATCH(0); + + ADVANCE_BATCH(); + } + + /* Initialize it for safety. */ + if (intel->gen >= 6) { + BEGIN_BATCH(2); + OUT_BATCH(CMD_3D_CLEAR_PARAMS << 16 | (2 - 2)); + OUT_BATCH(0); ADVANCE_BATCH(); } } @@ -330,7 +385,7 @@ const struct brw_tracked_state brw_polygon_stipple = { static void upload_polygon_stipple_offset(struct brw_context *brw) { - __DRIdrawablePrivate *dPriv = brw->intel.driDrawable; + GLcontext *ctx = &brw->intel.ctx; struct brw_polygon_stipple_offset bpso; memset(&bpso, 0, sizeof(bpso)); @@ -346,8 +401,8 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) * worry about. */ if (brw->intel.ctx.DrawBuffer->Name == 0) { - bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31; - bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31; + bpso.bits0.x_offset = 0; + bpso.bits0.y_offset = (32 - (ctx->DrawBuffer->Height & 31)) & 31; } else { bpso.bits0.y_offset = 0; @@ -374,8 +429,8 @@ const struct brw_tracked_state brw_polygon_stipple_offset = { static void upload_aa_line_parameters(struct brw_context *brw) { struct brw_aa_line_parameters balp; - - if (BRW_IS_965(brw)) + + if (!brw->has_aa_line_parameters) return; /* use legacy aa line coverage computation */ @@ -438,18 +493,20 @@ const struct brw_tracked_state brw_line_stipple = { static void upload_invarient_state( struct brw_context *brw ) { + struct intel_context *intel = &brw->intel; + { /* 0x61040000 Pipeline Select */ /* PipelineSelect : 0 */ struct brw_pipeline_select ps; memset(&ps, 0, sizeof(ps)); - ps.header.opcode = CMD_PIPELINE_SELECT(brw); + ps.header.opcode = brw->CMD_PIPELINE_SELECT; ps.header.pipeline_select = 0; BRW_BATCH_STRUCT(brw, &ps); } - { + if (intel->gen < 6) { struct brw_global_depth_offset_clamp gdo; memset(&gdo, 0, sizeof(gdo)); @@ -462,6 +519,32 @@ static void upload_invarient_state( struct brw_context *brw ) BRW_BATCH_STRUCT(brw, &gdo); } + intel_batchbuffer_emit_mi_flush(intel->batch); + + if (intel->gen >= 6) { + int i; + + BEGIN_BATCH(3); + OUT_BATCH(CMD_3D_MULTISAMPLE << 16 | (3 - 2)); + OUT_BATCH(MS_PIXEL_LOCATION_CENTER | + MS_NUMSAMPLES_1); + OUT_BATCH(0); /* positions for 4/8-sample */ + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(CMD_3D_SAMPLE_MASK << 16 | (2 - 2)); + OUT_BATCH(1); + ADVANCE_BATCH(); + + for (i = 0; i < 4; i++) { + BEGIN_BATCH(4); + OUT_BATCH(CMD_GS_SVB_INDEX << 16 | (4 - 2)); + OUT_BATCH(i << SVB_INDEX_SHIFT); + OUT_BATCH(0); + OUT_BATCH(0xffffffff); + ADVANCE_BATCH(); + } + } /* 0x61020000 State Instruction Pointer */ { @@ -480,7 +563,7 @@ static void upload_invarient_state( struct brw_context *brw ) struct brw_vf_statistics vfs; memset(&vfs, 0, sizeof(vfs)); - vfs.opcode = CMD_VF_STATISTICS(brw); + vfs.opcode = brw->CMD_VF_STATISTICS; if (INTEL_DEBUG & DEBUG_STATS) vfs.statistics_enable = 1; @@ -512,8 +595,21 @@ static void upload_state_base_address( struct brw_context *brw ) /* Output the structure (brw_state_base_address) directly to the * batchbuffer, so we can emit relocations inline. */ - if (BRW_IS_IGDNG(brw)) { - BEGIN_BATCH(8, IGNORE_CLIPRECTS); + if (intel->gen >= 6) { + BEGIN_BATCH(10); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); + OUT_BATCH(1); /* General state base address */ + OUT_BATCH(1); /* Surface state base address */ + OUT_BATCH(1); /* Dynamic state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* Instruction base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Dynamic state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + OUT_BATCH(1); /* Instruction access upper bound */ + ADVANCE_BATCH(); + } else if (intel->is_ironlake) { + BEGIN_BATCH(8); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); OUT_BATCH(1); /* General state base address */ OUT_BATCH(1); /* Surface state base address */ @@ -524,7 +620,7 @@ static void upload_state_base_address( struct brw_context *brw ) OUT_BATCH(1); /* Instruction access upper bound */ ADVANCE_BATCH(); } else { - BEGIN_BATCH(6, IGNORE_CLIPRECTS); + BEGIN_BATCH(6); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); OUT_BATCH(1); /* General state base address */ OUT_BATCH(1); /* Surface state base address */ diff --git a/i965/brw_program.c b/i965/brw_program.c index bac6918..c78f7b3 100644 --- a/i965/brw_program.c +++ b/i965/brw_program.c @@ -37,7 +37,6 @@ #include "tnl/tnl.h" #include "brw_context.h" -#include "brw_util.h" #include "brw_wm.h" static void brwBindProgram( GLcontext *ctx, @@ -112,9 +111,10 @@ static GLboolean brwIsProgramNative( GLcontext *ctx, return GL_TRUE; } -static void brwProgramStringNotify( GLcontext *ctx, - GLenum target, - struct gl_program *prog ) + +static GLboolean brwProgramStringNotify( GLcontext *ctx, + GLenum target, + struct gl_program *prog ) { struct brw_context *brw = brw_context(ctx); @@ -151,6 +151,9 @@ static void brwProgramStringNotify( GLcontext *ctx, */ _tnl_program_string(ctx, target, prog); } + + /* XXX check if program is legal, within limits */ + return GL_TRUE; } void brwInitFragProgFuncs( struct dd_function_table *functions ) diff --git a/i965/brw_queryobj.c b/i965/brw_queryobj.c index a195bc3..6cce7e5 100644 --- a/i965/brw_queryobj.c +++ b/i965/brw_queryobj.c @@ -73,7 +73,7 @@ brw_new_query_object(GLcontext *ctx, GLuint id) { struct brw_query_object *query; - query = _mesa_calloc(sizeof(struct brw_query_object)); + query = calloc(1, sizeof(struct brw_query_object)); query->Base.Id = id; query->Base.Result = 0; @@ -89,7 +89,7 @@ brw_delete_query(GLcontext *ctx, struct gl_query_object *q) struct brw_query_object *query = (struct brw_query_object *)q; dri_bo_unreference(query->bo); - _mesa_free(query); + free(query); } static void @@ -188,7 +188,7 @@ brw_emit_query_begin(struct brw_context *brw) if (brw->query.active || is_empty_list(&brw->query.active_head)) return; - BEGIN_BATCH(4, IGNORE_CLIPRECTS); + BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_PIPE_CONTROL | PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_DEPTH_COUNT); @@ -227,7 +227,7 @@ brw_emit_query_end(struct brw_context *brw) if (!brw->query.active) return; - BEGIN_BATCH(4, IGNORE_CLIPRECTS); + BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_PIPE_CONTROL | PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_DEPTH_COUNT); diff --git a/i965/brw_sf.c b/i965/brw_sf.c index 968890f..8e6839b 100644 --- a/i965/brw_sf.c +++ b/i965/brw_sf.c @@ -117,12 +117,13 @@ static void compile_sf_prog( struct brw_context *brw, /* Upload */ dri_bo_unreference(brw->sf.prog_bo); - brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - &brw->sf.prog_data ); + brw->sf.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_SF_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + sizeof(c.prog_data), + &brw->sf.prog_data); } /* Calculate interpolants for triangle and line rasterization. diff --git a/i965/brw_sf_emit.c b/i965/brw_sf_emit.c index 3eae41e..bb08055 100644 --- a/i965/brw_sf_emit.c +++ b/i965/brw_sf_emit.c @@ -149,6 +149,7 @@ static void copy_colors( struct brw_sf_compile *c, static void do_flatshade_triangle( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; struct brw_reg ip = brw_ip_reg(); GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); GLuint jmpi = 1; @@ -161,7 +162,7 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) if (c->key.primitive == SF_UNFILLED_TRIS) return; - if (BRW_IS_IGDNG(p->brw)) + if (intel->is_ironlake) jmpi = 2; brw_push_insn_state(p); @@ -187,6 +188,7 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) static void do_flatshade_line( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; struct brw_reg ip = brw_ip_reg(); GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); GLuint jmpi = 1; @@ -199,7 +201,7 @@ static void do_flatshade_line( struct brw_sf_compile *c ) if (c->key.primitive == SF_UNFILLED_TRIS) return; - if (BRW_IS_IGDNG(p->brw)) + if (intel->is_ironlake) jmpi = 2; brw_push_insn_state(p); diff --git a/i965/brw_sf_state.c b/i965/brw_sf_state.c index bb69435..847c886 100644 --- a/i965/brw_sf_state.c +++ b/i965/brw_sf_state.c @@ -35,7 +35,6 @@ #include "brw_state.h" #include "brw_defines.h" #include "main/macros.h" -#include "intel_fbo.h" static void upload_sf_vp(struct brw_context *brw) { @@ -70,9 +69,9 @@ static void upload_sf_vp(struct brw_context *brw) * for DrawBuffer->_[XY]{min,max} */ - /* The scissor only needs to handle the intersection of drawable and - * scissor rect. Clipping to the boundaries of static shared buffers - * for front/back/depth is covered by looping over cliprects in brw_draw.c. + /* The scissor only needs to handle the intersection of drawable + * and scissor rect, since there are no longer cliprects for shared + * buffers with DRI2. * * Note that the hardware's coordinates are inclusive, while Mesa's min is * inclusive but max is exclusive. @@ -165,6 +164,7 @@ static dri_bo * sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, dri_bo **reloc_bufs) { + struct intel_context *intel = &brw->intel; struct brw_sf_unit_state sf; dri_bo *bo; int chipset_max_threads; @@ -177,7 +177,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.thread3.dispatch_grf_start_reg = 3; - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) sf.thread3.urb_entry_read_offset = 3; else sf.thread3.urb_entry_read_offset = 1; @@ -187,10 +187,10 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.thread4.nr_urb_entries = key->nr_urb_entries; sf.thread4.urb_entry_allocation_size = key->sfsize - 1; - /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or - * 48(IGDNG) threads + /* Each SF thread produces 1 PUE, and there can be up to 24 (Pre-Ironlake) or + * 48 (Ironlake) threads. */ - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) chipset_max_threads = 48; else chipset_max_threads = 24; @@ -308,8 +308,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT, key, sizeof(*key), reloc_bufs, 2, - &sf, sizeof(sf), - NULL, NULL); + &sf, sizeof(sf)); /* STATE_PREFETCH command description describes this state as being * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain. diff --git a/i965/brw_state.h b/i965/brw_state.h index b129b1f..f790cfa 100644 --- a/i965/brw_state.h +++ b/i965/brw_state.h @@ -35,7 +35,7 @@ #include "brw_context.h" -static inline void +static INLINE void brw_add_validated_bo(struct brw_context *brw, dri_bo *bo) { assert(brw->state.validated_bo_count < ARRAY_SIZE(brw->state.validated_bos)); @@ -90,6 +90,23 @@ const struct brw_tracked_state brw_drawing_rect; const struct brw_tracked_state brw_indices; const struct brw_tracked_state brw_vertices; const struct brw_tracked_state brw_index_buffer; +const struct brw_tracked_state gen6_binding_table_pointers; +const struct brw_tracked_state gen6_blend_state; +const struct brw_tracked_state gen6_cc_state_pointers; +const struct brw_tracked_state gen6_cc_vp; +const struct brw_tracked_state gen6_clip_state; +const struct brw_tracked_state gen6_clip_vp; +const struct brw_tracked_state gen6_color_calc_state; +const struct brw_tracked_state gen6_depth_stencil_state; +const struct brw_tracked_state gen6_gs_state; +const struct brw_tracked_state gen6_sampler_state; +const struct brw_tracked_state gen6_scissor_state; +const struct brw_tracked_state gen6_sf_state; +const struct brw_tracked_state gen6_sf_vp; +const struct brw_tracked_state gen6_urb; +const struct brw_tracked_state gen6_viewport_state; +const struct brw_tracked_state gen6_vs_state; +const struct brw_tracked_state gen6_wm_state; /** * Use same key for WM and VS surfaces. @@ -124,16 +141,26 @@ dri_bo *brw_cache_data(struct brw_cache *cache, dri_bo **reloc_bufs, GLuint nr_reloc_bufs); -dri_bo *brw_upload_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_sz, - dri_bo **reloc_bufs, - GLuint nr_reloc_bufs, - const void *data, - GLuint data_sz, - const void *aux, - void *aux_return ); +drm_intel_bo *brw_upload_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_sz, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_sz); + +drm_intel_bo *brw_upload_cache_with_auxdata(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_sz, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_sz, + const void *aux, + GLuint aux_sz, + void *aux_return); dri_bo *brw_search_cache( struct brw_cache *cache, enum brw_cache_id cache_id, @@ -151,7 +178,7 @@ void brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo); /*********************************************************************** * brw_state_batch.c */ -#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS) +#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s))) #define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) GLboolean brw_cached_batch_struct( struct brw_context *brw, diff --git a/i965/brw_state_batch.c b/i965/brw_state_batch.c index 7821898..3901941 100644 --- a/i965/brw_state_batch.c +++ b/i965/brw_state_batch.c @@ -48,7 +48,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, struct header *newheader = (struct header *)data; if (brw->emit_state_always) { - intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); + intel_batchbuffer_data(brw->intel.batch, data, sz); return GL_TRUE; } @@ -57,8 +57,8 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, if (item->sz == sz && memcmp(item->header, newheader, sz) == 0) return GL_FALSE; if (item->sz != sz) { - _mesa_free(item->header); - item->header = _mesa_malloc(sz); + free(item->header); + item->header = malloc(sz); item->sz = sz; } goto emit; @@ -68,14 +68,14 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, assert(!item); item = CALLOC_STRUCT(brw_cached_batch_item); - item->header = _mesa_malloc(sz); + item->header = malloc(sz); item->sz = sz; item->next = brw->cached_batch_items; brw->cached_batch_items = item; emit: memcpy(item->header, newheader, sz); - intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); + intel_batchbuffer_data(brw->intel.batch, data, sz); return GL_TRUE; } diff --git a/i965/brw_state_cache.c b/i965/brw_state_cache.c index e4c9ba7..c08cb45 100644 --- a/i965/brw_state_cache.c +++ b/i965/brw_state_cache.c @@ -59,37 +59,27 @@ #include "main/imports.h" #include "brw_state.h" #include "intel_batchbuffer.h" - -/* XXX: Fixme - have to include these to get the sizes of the prog_key - * structs: - */ #include "brw_wm.h" -#include "brw_vs.h" -#include "brw_clip.h" -#include "brw_sf.h" -#include "brw_gs.h" static GLuint -hash_key(const void *key, GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs) +hash_key(struct brw_cache_item *item) { - GLuint *ikey = (GLuint *)key; - GLuint hash = 0, i; + GLuint *ikey = (GLuint *)item->key; + GLuint hash = item->cache_id, i; - assert(key_size % 4 == 0); + assert(item->key_size % 4 == 0); /* I'm sure this can be improved on: */ - for (i = 0; i < key_size/4; i++) { + for (i = 0; i < item->key_size/4; i++) { hash ^= ikey[i]; hash = (hash << 5) | (hash >> 27); } /* Include the BO pointers as key data as well */ - ikey = (GLuint *)reloc_bufs; - key_size = nr_reloc_bufs * sizeof(dri_bo *); - for (i = 0; i < key_size/4; i++) { + ikey = (GLuint *)item->reloc_bufs; + for (i = 0; i < item->nr_reloc_bufs * sizeof(drm_intel_bo *) / 4; i++) { hash ^= ikey[i]; hash = (hash << 5) | (hash >> 27); } @@ -114,11 +104,22 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, cache->brw->state.dirty.cache |= 1 << cache_id; } +static int +brw_cache_item_equals(const struct brw_cache_item *a, + const struct brw_cache_item *b) +{ + return a->cache_id == b->cache_id && + a->hash == b->hash && + a->key_size == b->key_size && + (memcmp(a->key, b->key, a->key_size) == 0) && + a->nr_reloc_bufs == b->nr_reloc_bufs && + (memcmp(a->reloc_bufs, b->reloc_bufs, + a->nr_reloc_bufs * sizeof(dri_bo *)) == 0); +} static struct brw_cache_item * -search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, - GLuint hash, const void *key, GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs) +search_cache(struct brw_cache *cache, GLuint hash, + struct brw_cache_item *lookup) { struct brw_cache_item *c; @@ -133,13 +134,7 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, #endif for (c = cache->items[hash % cache->size]; c; c = c->next) { - if (c->cache_id == cache_id && - c->hash == hash && - c->key_size == key_size && - memcmp(c->key, key, key_size) == 0 && - c->nr_reloc_bufs == nr_reloc_bufs && - memcmp(c->reloc_bufs, reloc_bufs, - nr_reloc_bufs * sizeof(dri_bo *)) == 0) + if (brw_cache_item_equals(lookup, c)) return c; } @@ -155,7 +150,7 @@ rehash(struct brw_cache *cache) GLuint size, i; size = cache->size * 3; - items = (struct brw_cache_item**) _mesa_calloc(size * sizeof(*items)); + items = (struct brw_cache_item**) calloc(1, size * sizeof(*items)); for (i = 0; i < cache->size; i++) for (c = cache->items[i]; c; c = next) { @@ -182,10 +177,18 @@ brw_search_cache(struct brw_cache *cache, void *aux_return) { struct brw_cache_item *item; - GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); + struct brw_cache_item lookup; + GLuint hash; - item = search_cache(cache, cache_id, hash, key, key_size, - reloc_bufs, nr_reloc_bufs); + lookup.cache_id = cache_id; + lookup.key = key; + lookup.key_size = key_size; + lookup.reloc_bufs = reloc_bufs; + lookup.nr_reloc_bufs = nr_reloc_bufs; + hash = hash_key(&lookup); + lookup.hash = hash; + + item = search_cache(cache, hash, &lookup); if (item == NULL) return NULL; @@ -200,48 +203,52 @@ brw_search_cache(struct brw_cache *cache, } -dri_bo * -brw_upload_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - dri_bo **reloc_bufs, - GLuint nr_reloc_bufs, - const void *data, - GLuint data_size, - const void *aux, - void *aux_return ) +drm_intel_bo * +brw_upload_cache_with_auxdata(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_size, + const void *aux, + GLuint aux_size, + void *aux_return) { struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); - GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); + GLuint hash; GLuint relocs_size = nr_reloc_bufs * sizeof(dri_bo *); - GLuint aux_size = cache->aux_size[cache_id]; void *tmp; dri_bo *bo; int i; + item->cache_id = cache_id; + item->key = key; + item->key_size = key_size; + item->reloc_bufs = reloc_bufs; + item->nr_reloc_bufs = nr_reloc_bufs; + hash = hash_key(item); + item->hash = hash; + /* Create the buffer object to contain the data */ bo = dri_bo_alloc(cache->brw->intel.bufmgr, cache->name[cache_id], data_size, 1 << 6); /* Set up the memory containing the key, aux_data, and reloc_bufs */ - tmp = _mesa_malloc(key_size + aux_size + relocs_size); + tmp = malloc(key_size + aux_size + relocs_size); memcpy(tmp, key, key_size); - memcpy(tmp + key_size, aux, cache->aux_size[cache_id]); + memcpy(tmp + key_size, aux, aux_size); memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size); for (i = 0; i < nr_reloc_bufs; i++) { if (reloc_bufs[i] != NULL) dri_bo_reference(reloc_bufs[i]); } - item->cache_id = cache_id; item->key = tmp; - item->hash = hash; - item->key_size = key_size; item->reloc_bufs = tmp + key_size + aux_size; - item->nr_reloc_bufs = nr_reloc_bufs; item->bo = bo; dri_bo_reference(bo); @@ -255,12 +262,11 @@ brw_upload_cache( struct brw_cache *cache, cache->n_items++; if (aux_return) { - assert(cache->aux_size[cache_id]); *(void **)aux_return = (void *)((char *)item->key + item->key_size); } if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("upload %s: %d bytes to cache id %d\n", + printf("upload %s: %d bytes to cache id %d\n", cache->name[cache_id], data_size, cache_id); @@ -272,6 +278,23 @@ brw_upload_cache( struct brw_cache *cache, return bo; } +drm_intel_bo * +brw_upload_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_size) +{ + return brw_upload_cache_with_auxdata(cache, cache_id, + key, key_size, + reloc_bufs, nr_reloc_bufs, + data, data_size, + NULL, 0, + NULL); +} /** * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. @@ -292,11 +315,18 @@ brw_cache_data(struct brw_cache *cache, GLuint nr_reloc_bufs) { dri_bo *bo; - struct brw_cache_item *item; - GLuint hash = hash_key(data, data_size, reloc_bufs, nr_reloc_bufs); - - item = search_cache(cache, cache_id, hash, data, data_size, - reloc_bufs, nr_reloc_bufs); + struct brw_cache_item *item, lookup; + GLuint hash; + + lookup.cache_id = cache_id; + lookup.key = data; + lookup.key_size = data_size; + lookup.reloc_bufs = reloc_bufs; + lookup.nr_reloc_bufs = nr_reloc_bufs; + hash = hash_key(&lookup); + lookup.hash = hash; + + item = search_cache(cache, hash, &lookup); if (item) { update_cache_last(cache, cache_id, item->bo); dri_bo_reference(item->bo); @@ -306,8 +336,7 @@ brw_cache_data(struct brw_cache *cache, bo = brw_upload_cache(cache, cache_id, data, data_size, reloc_bufs, nr_reloc_bufs, - data, data_size, - NULL, NULL); + data, data_size); return bo; } @@ -321,11 +350,9 @@ enum pool_type { static void brw_init_cache_id(struct brw_cache *cache, const char *name, - enum brw_cache_id id, - GLuint aux_size) + enum brw_cache_id id) { cache->name[id] = strdup(name); - cache->aux_size[id] = aux_size; } @@ -339,82 +366,31 @@ brw_init_non_surface_cache(struct brw_context *brw) cache->size = 7; cache->n_items = 0; cache->items = (struct brw_cache_item **) - _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); - - brw_init_cache_id(cache, - "CC_VP", - BRW_CC_VP, - 0); - - brw_init_cache_id(cache, - "CC_UNIT", - BRW_CC_UNIT, - 0); - - brw_init_cache_id(cache, - "WM_PROG", - BRW_WM_PROG, - sizeof(struct brw_wm_prog_data)); - - brw_init_cache_id(cache, - "SAMPLER_DEFAULT_COLOR", - BRW_SAMPLER_DEFAULT_COLOR, - 0); - - brw_init_cache_id(cache, - "SAMPLER", - BRW_SAMPLER, - 0); - - brw_init_cache_id(cache, - "WM_UNIT", - BRW_WM_UNIT, - 0); - - brw_init_cache_id(cache, - "SF_PROG", - BRW_SF_PROG, - sizeof(struct brw_sf_prog_data)); - - brw_init_cache_id(cache, - "SF_VP", - BRW_SF_VP, - 0); - - brw_init_cache_id(cache, - "SF_UNIT", - BRW_SF_UNIT, - 0); - - brw_init_cache_id(cache, - "VS_UNIT", - BRW_VS_UNIT, - 0); - - brw_init_cache_id(cache, - "VS_PROG", - BRW_VS_PROG, - sizeof(struct brw_vs_prog_data)); - - brw_init_cache_id(cache, - "CLIP_UNIT", - BRW_CLIP_UNIT, - 0); - - brw_init_cache_id(cache, - "CLIP_PROG", - BRW_CLIP_PROG, - sizeof(struct brw_clip_prog_data)); - - brw_init_cache_id(cache, - "GS_UNIT", - BRW_GS_UNIT, - 0); - - brw_init_cache_id(cache, - "GS_PROG", - BRW_GS_PROG, - sizeof(struct brw_gs_prog_data)); + calloc(1, cache->size * sizeof(struct brw_cache_item)); + + brw_init_cache_id(cache, "CC_VP", BRW_CC_VP); + brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT); + brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG); + brw_init_cache_id(cache, "SAMPLER_DEFAULT_COLOR", BRW_SAMPLER_DEFAULT_COLOR); + brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER); + brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT); + brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG); + brw_init_cache_id(cache, "SF_VP", BRW_SF_VP); + + brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT); + + brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT); + + brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG); + + brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT); + + brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG); + + brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT); + + brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG); + brw_init_cache_id(cache, "BLEND_STATE", BRW_BLEND_STATE); } @@ -428,17 +404,10 @@ brw_init_surface_cache(struct brw_context *brw) cache->size = 7; cache->n_items = 0; cache->items = (struct brw_cache_item **) - _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); - - brw_init_cache_id(cache, - "SS_SURFACE", - BRW_SS_SURFACE, - 0); + calloc(1, cache->size * sizeof(struct brw_cache_item)); - brw_init_cache_id(cache, - "SS_SURF_BIND", - BRW_SS_SURF_BIND, - 0); + brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE); + brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND); } @@ -457,7 +426,7 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) GLuint i; if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s\n", __FUNCTION__); + printf("%s\n", __FUNCTION__); for (i = 0; i < cache->size; i++) { for (c = cache->items[i]; c; c = next) { @@ -476,7 +445,7 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) cache->n_items = 0; if (brw->curbe.last_buf) { - _mesa_free(brw->curbe.last_buf); + free(brw->curbe.last_buf); brw->curbe.last_buf = NULL; } @@ -497,7 +466,7 @@ brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo) GLuint i; if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s\n", __FUNCTION__); + printf("%s\n", __FUNCTION__); for (i = 0; i < cache->size; i++) { for (prev = &cache->items[i]; *prev;) { @@ -525,7 +494,7 @@ void brw_state_cache_check_size(struct brw_context *brw) { if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); + printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); /* un-tuned guess. We've got around 20 state objects for a total of around * 32k, so 1000 of them is around 1.5MB. @@ -544,7 +513,7 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) GLuint i; if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s\n", __FUNCTION__); + printf("%s\n", __FUNCTION__); brw_clear_cache(brw, cache); for (i = 0; i < BRW_MAX_CACHE; i++) { diff --git a/i965/brw_state_dump.c b/i965/brw_state_dump.c index e94fa7d..020ac52 100644 --- a/i965/brw_state_dump.c +++ b/i965/brw_state_dump.c @@ -28,7 +28,6 @@ #include "main/mtypes.h" #include "brw_context.h" -#include "brw_state.h" #include "brw_defines.h" /** diff --git a/i965/brw_state_upload.c b/i965/brw_state_upload.c index af8dfb4..9e54f29 100644 --- a/i965/brw_state_upload.c +++ b/i965/brw_state_upload.c @@ -35,6 +35,7 @@ #include "brw_state.h" #include "intel_batchbuffer.h" #include "intel_buffers.h" +#include "intel_chipset.h" /* This is used to initialize brw->state.atoms[]. We could use this * list directly except for a single atom, brw_constant_buffer, which @@ -42,7 +43,7 @@ * current fragment and vertex programs, and so cannot be a static * value. */ -const struct brw_tracked_state *atoms[] = +static const struct brw_tracked_state *gen4_atoms[] = { &brw_check_fallback, @@ -101,6 +102,63 @@ const struct brw_tracked_state *atoms[] = &brw_constant_buffer }; +const struct brw_tracked_state *gen6_atoms[] = +{ + &brw_check_fallback, + + &brw_wm_input_sizes, + &brw_vs_prog, + &brw_gs_prog, + &brw_wm_prog, + + &gen6_clip_vp, + &gen6_sf_vp, + &gen6_cc_vp, + + /* Command packets: */ + &brw_invarient_state, + + &gen6_viewport_state, /* must do after *_vp stages */ + + &gen6_urb, + &gen6_blend_state, /* must do before cc unit */ + &gen6_color_calc_state, /* must do before cc unit */ + &gen6_depth_stencil_state, /* must do before cc unit */ + &gen6_cc_state_pointers, + + &brw_vs_surfaces, /* must do before unit */ + &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ + &brw_wm_surfaces, /* must do before samplers and unit */ + + &brw_wm_samplers, + &gen6_sampler_state, + + &gen6_vs_state, + &gen6_gs_state, + &gen6_clip_state, + &gen6_sf_state, + &gen6_wm_state, + + &gen6_scissor_state, + + &brw_state_base_address, + + &gen6_binding_table_pointers, + + &brw_depthbuffer, + + &brw_polygon_stipple, + &brw_polygon_stipple_offset, + + &brw_line_stipple, + &brw_aa_line_parameters, + + &brw_drawing_rect, + + &brw_indices, + &brw_index_buffer, + &brw_vertices, +}; void brw_init_state( struct brw_context *brw ) { @@ -208,7 +266,6 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_CONTEXT), DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS), DEFINE_BIT(BRW_NEW_PSP), - DEFINE_BIT(BRW_NEW_FENCE), DEFINE_BIT(BRW_NEW_INDICES), DEFINE_BIT(BRW_NEW_INDEX_BUFFER), DEFINE_BIT(BRW_NEW_VERTICES), @@ -218,6 +275,7 @@ static struct dirty_bit_map brw_bits[] = { }; static struct dirty_bit_map cache_bits[] = { + DEFINE_BIT(CACHE_NEW_BLEND_STATE), DEFINE_BIT(CACHE_NEW_CC_VP), DEFINE_BIT(CACHE_NEW_CC_UNIT), DEFINE_BIT(CACHE_NEW_WM_PROG), @@ -277,6 +335,8 @@ void brw_validate_state( struct brw_context *brw ) struct intel_context *intel = &brw->intel; struct brw_state_flags *state = &brw->state.dirty; GLuint i; + const struct brw_tracked_state **atoms; + int num_atoms; brw_clear_validated_bos(brw); @@ -285,6 +345,14 @@ void brw_validate_state( struct brw_context *brw ) brw_add_validated_bo(brw, intel->batch->buf); + if (IS_GEN6(intel->intelScreen->deviceID)) { + atoms = gen6_atoms; + num_atoms = ARRAY_SIZE(gen6_atoms); + } else { + atoms = gen4_atoms; + num_atoms = ARRAY_SIZE(gen4_atoms); + } + if (brw->emit_state_always) { state->mesa |= ~0; state->brw |= ~0; @@ -312,7 +380,7 @@ void brw_validate_state( struct brw_context *brw ) brw->intel.Fallback = GL_FALSE; /* boolean, not bitfield */ /* do prepare stage for all atoms */ - for (i = 0; i < Elements(atoms); i++) { + for (i = 0; i < num_atoms; i++) { const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) @@ -344,9 +412,20 @@ void brw_validate_state( struct brw_context *brw ) void brw_upload_state(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; struct brw_state_flags *state = &brw->state.dirty; int i; static int dirty_count = 0; + const struct brw_tracked_state **atoms; + int num_atoms; + + if (IS_GEN6(intel->intelScreen->deviceID)) { + atoms = gen6_atoms; + num_atoms = ARRAY_SIZE(gen6_atoms); + } else { + atoms = gen4_atoms; + num_atoms = ARRAY_SIZE(gen4_atoms); + } brw_clear_validated_bos(brw); @@ -356,10 +435,10 @@ void brw_upload_state(struct brw_context *brw) * state atoms are ordered correctly in the list. */ struct brw_state_flags examined, prev; - _mesa_memset(&examined, 0, sizeof(examined)); + memset(&examined, 0, sizeof(examined)); prev = *state; - for (i = 0; i < Elements(atoms); i++) { + for (i = 0; i < num_atoms; i++) { const struct brw_tracked_state *atom = atoms[i]; struct brw_state_flags generated; @@ -388,7 +467,7 @@ void brw_upload_state(struct brw_context *brw) } } else { - for (i = 0; i < Elements(atoms); i++) { + for (i = 0; i < num_atoms; i++) { const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) diff --git a/i965/brw_structs.h b/i965/brw_structs.h index 66d4127..3c2adfc 100644 --- a/i965/brw_structs.h +++ b/i965/brw_structs.h @@ -658,7 +658,105 @@ struct brw_clip_unit_state GLfloat viewport_ymax; }; +struct gen6_blend_state +{ + struct { + GLuint dest_blend_factor:5; + GLuint source_blend_factor:5; + GLuint pad3:1; + GLuint blend_func:3; + GLuint pad2:1; + GLuint ia_dest_blend_factor:5; + GLuint ia_source_blend_factor:5; + GLuint pad1:1; + GLuint ia_blend_func:3; + GLuint pad0:1; + GLuint ia_blend_enable:1; + GLuint blend_enable:1; + } blend0; + + struct { + GLuint post_blend_clamp_enable:1; + GLuint pre_blend_clamp_enable:1; + GLuint clamp_range:2; + GLuint pad0:4; + GLuint x_dither_offset:2; + GLuint y_dither_offset:2; + GLuint dither_enable:1; + GLuint alpha_test_func:3; + GLuint alpha_test_enable:1; + GLuint pad1:1; + GLuint logic_op_func:4; + GLuint logic_op_enable:1; + GLuint pad2:1; + GLuint write_disable_b:1; + GLuint write_disable_g:1; + GLuint write_disable_r:1; + GLuint write_disable_a:1; + GLuint pad3:1; + GLuint alpha_to_coverage_dither:1; + GLuint alpha_to_one:1; + GLuint alpha_to_coverage:1; + } blend1; +}; + +struct gen6_color_calc_state +{ + struct { + GLuint alpha_test_format:1; + GLuint pad0:14; + GLuint round_disable:1; + GLuint bf_stencil_ref:8; + GLuint stencil_ref:8; + } cc0; + union { + GLfloat alpha_ref_f; + struct { + GLuint ui:8; + GLuint pad0:24; + } alpha_ref_fi; + } cc1; + + GLfloat constant_r; + GLfloat constant_g; + GLfloat constant_b; + GLfloat constant_a; +}; + +struct gen6_depth_stencil_state +{ + struct { + GLuint pad0:3; + GLuint bf_stencil_pass_depth_pass_op:3; + GLuint bf_stencil_pass_depth_fail_op:3; + GLuint bf_stencil_fail_op:3; + GLuint bf_stencil_func:3; + GLuint bf_stencil_enable:1; + GLuint pad1:2; + GLuint stencil_write_enable:1; + GLuint stencil_pass_depth_pass_op:3; + GLuint stencil_pass_depth_fail_op:3; + GLuint stencil_fail_op:3; + GLuint stencil_func:3; + GLuint stencil_enable:1; + } ds0; + + struct { + GLuint bf_stencil_write_mask:8; + GLuint bf_stencil_test_mask:8; + GLuint stencil_write_mask:8; + GLuint stencil_test_mask:8; + } ds1; + + struct { + GLuint pad0:25; + GLuint depth_write_enable:1; + GLuint depth_test_func:3; + GLuint pad1:1; + GLuint depth_test_enable:1; + } ds2; +}; struct brw_cc_unit_state { @@ -752,8 +850,6 @@ struct brw_cc_unit_state } cc7; }; - - struct brw_sf_unit_state { struct thread0 thread0; @@ -813,6 +909,11 @@ struct brw_sf_unit_state }; +struct gen6_scissor_state +{ + GLuint ymin, xmin; + GLuint ymax, xmax; +}; struct brw_gs_unit_state { @@ -1043,6 +1144,15 @@ struct brw_sf_viewport } scissor; }; +struct gen6_sf_viewport { + GLfloat m00; + GLfloat m11; + GLfloat m22; + GLfloat m30; + GLfloat m31; + GLfloat m32; +}; + /* Documented in the subsystem/shared-functions/sampler chapter... */ struct brw_surface_state diff --git a/i965/brw_tex_layout.c b/i965/brw_tex_layout.c index e59e52e..09edfd8 100644 --- a/i965/brw_tex_layout.c +++ b/i965/brw_tex_layout.c @@ -36,7 +36,6 @@ #include "intel_tex_layout.h" #include "intel_context.h" #include "main/macros.h" -#include "intel_chipset.h" #define FILE_DEBUG_FLAG DEBUG_MIPTREE @@ -49,7 +48,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel, switch (mt->target) { case GL_TEXTURE_CUBE_MAP: - if (IS_IGDNG(intel->intelScreen->deviceID)) { + if (intel->is_ironlake) { GLuint align_h = 2, align_w = 4; GLuint level; GLuint x = 0; diff --git a/i965/brw_urb.c b/i965/brw_urb.c index 8c6f435..4f6b900 100644 --- a/i965/brw_urb.c +++ b/i965/brw_urb.c @@ -105,7 +105,8 @@ static GLboolean check_urb_layout( struct brw_context *brw ) brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize; brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize; - return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= URB_SIZES(brw); + return brw->urb.cs_start + brw->urb.nr_cs_entries * + brw->urb.csize <= brw->urb.size; } /* Most minimal update, forces re-emit of URB fence packet after GS @@ -113,6 +114,7 @@ static GLboolean check_urb_layout( struct brw_context *brw ) */ static void recalculate_urb_fence( struct brw_context *brw ) { + struct intel_context *intel = &brw->intel; GLuint csize = brw->curbe.total_size; GLuint vsize = brw->vs.prog_data->urb_entry_size; GLuint sfsize = brw->sf.prog_data->urb_entry_size; @@ -146,7 +148,7 @@ static void recalculate_urb_fence( struct brw_context *brw ) brw->urb.constrained = 0; - if (BRW_IS_IGDNG(brw)) { + if (intel->is_ironlake) { brw->urb.nr_vs_entries = 128; brw->urb.nr_sf_entries = 48; if (check_urb_layout(brw)) { @@ -156,7 +158,7 @@ static void recalculate_urb_fence( struct brw_context *brw ) brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; } - } else if (BRW_IS_G4X(brw)) { + } else if (intel->is_g4x) { brw->urb.nr_vs_entries = 64; if (check_urb_layout(brw)) { goto done; @@ -184,23 +186,23 @@ static void recalculate_urb_fence( struct brw_context *brw ) * entries and the values for minimum nr of entries * provided above. */ - _mesa_printf("couldn't calculate URB layout!\n"); + printf("couldn't calculate URB layout!\n"); exit(1); } if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) - _mesa_printf("URB CONSTRAINED\n"); + printf("URB CONSTRAINED\n"); } done: if (INTEL_DEBUG & DEBUG_URB) - _mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", + printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", brw->urb.vs_start, brw->urb.gs_start, brw->urb.clip_start, brw->urb.sf_start, brw->urb.cs_start, - URB_SIZES(brw)); + brw->urb.size); brw->state.dirty.brw |= BRW_NEW_URB_FENCE; } @@ -244,7 +246,7 @@ void brw_upload_urb_fence(struct brw_context *brw) uf.bits0.gs_fence = brw->urb.clip_start; uf.bits0.clp_fence = brw->urb.sf_start; uf.bits1.sf_fence = brw->urb.cs_start; - uf.bits1.cs_fence = URB_SIZES(brw); + uf.bits1.cs_fence = brw->urb.size; BRW_BATCH_STRUCT(brw, &uf); } diff --git a/i965/brw_vs.c b/i965/brw_vs.c index fd055e2..44b085e 100644 --- a/i965/brw_vs.c +++ b/i965/brw_vs.c @@ -35,6 +35,7 @@ #include "brw_util.h" #include "brw_state.h" #include "shader/prog_print.h" +#include "shader/prog_parameter.h" @@ -42,9 +43,11 @@ static void do_vs_prog( struct brw_context *brw, struct brw_vertex_program *vp, struct brw_vs_prog_key *key ) { + GLcontext *ctx = &brw->intel.ctx; GLuint program_size; const GLuint *program; struct brw_vs_compile c; + int aux_size; memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); @@ -73,13 +76,27 @@ static void do_vs_prog( struct brw_context *brw, */ program = brw_get_program(&c.func, &program_size); + /* We upload from &c.prog_data including the constant_map assuming + * they're packed together. It would be nice to have a + * compile-time assert macro here. + */ + assert(c.constant_map == (int8_t *)&c.prog_data + + sizeof(c.prog_data)); + assert(ctx->Const.VertexProgram.MaxNativeParameters == + ARRAY_SIZE(c.constant_map)); + + aux_size = sizeof(c.prog_data); + if (c.vp->use_const_buffer) + aux_size += c.vp->program.Base.Parameters->NumParameters; + dri_bo_unreference(brw->vs.prog_bo); - brw->vs.prog_bo = brw_upload_cache( &brw->cache, BRW_VS_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - &brw->vs.prog_data ); + brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + aux_size, + &brw->vs.prog_data); } @@ -109,6 +126,8 @@ static void brw_upload_vs_prog(struct brw_context *brw) &brw->vs.prog_data); if (brw->vs.prog_bo == NULL) do_vs_prog(brw, vp, &key); + brw->vs.constant_map = ((int8_t *)brw->vs.prog_data + + sizeof(*brw->vs.prog_data)); } diff --git a/i965/brw_vs.h b/i965/brw_vs.h index 4a59136..95e0501 100644 --- a/i965/brw_vs.h +++ b/i965/brw_vs.h @@ -51,6 +51,7 @@ struct brw_vs_compile { struct brw_compile func; struct brw_vs_prog_key key; struct brw_vs_prog_data prog_data; + int8_t constant_map[1024]; struct brw_vertex_program *vp; @@ -81,6 +82,8 @@ struct brw_vs_compile { GLint index; struct brw_reg reg; } current_const[3]; + + GLboolean needs_stack; }; void brw_vs_emit( struct brw_vs_compile *c ); diff --git a/i965/brw_vs_emit.c b/i965/brw_vs_emit.c index 27aac8b..a7c4b58 100644 --- a/i965/brw_vs_emit.c +++ b/i965/brw_vs_emit.c @@ -67,6 +67,7 @@ static void release_tmps( struct brw_vs_compile *c ) */ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { + struct intel_context *intel = &c->func.brw->intel; GLuint i, reg = 0, mrf; int attributes_in_vue; @@ -103,9 +104,47 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* Vertex program parameters from curbe: */ if (c->vp->use_const_buffer) { - /* get constants from a real constant buffer */ - c->prog_data.curb_read_length = 0; - c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */ + int max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries; + int constant = 0; + + /* We've got more constants than we can load with the push + * mechanism. This is often correlated with reladdr loads where + * we should probably be using a pull mechanism anyway to avoid + * excessive reading. However, the pull mechanism is slow in + * general. So, we try to allocate as many non-reladdr-loaded + * constants through the push buffer as we can before giving up. + */ + memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters); + for (i = 0; + i < c->vp->program.Base.NumInstructions && constant < max_constant; + i++) { + struct prog_instruction *inst = &c->vp->program.Base.Instructions[i]; + int arg; + + for (arg = 0; arg < 3 && constant < max_constant; arg++) { + if ((inst->SrcReg[arg].File != PROGRAM_STATE_VAR && + inst->SrcReg[arg].File != PROGRAM_CONSTANT && + inst->SrcReg[arg].File != PROGRAM_UNIFORM && + inst->SrcReg[arg].File != PROGRAM_ENV_PARAM && + inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) || + inst->SrcReg[arg].RelAddr) + continue; + + if (c->constant_map[inst->SrcReg[arg].Index] == -1) { + c->constant_map[inst->SrcReg[arg].Index] = constant++; + } + } + } + + for (i = 0; i < constant; i++) { + c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, + (i%2) * 4), + 0, 4, 1); + } + reg += (constant + 1) / 2; + c->prog_data.curb_read_length = reg - 1; + /* XXX 0 causes a bug elsewhere... */ + c->prog_data.nr_params = MAX2(constant * 4, 4); } else { /* use a section of the GRF for constants */ @@ -141,10 +180,12 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->first_output = reg; c->first_overflow_output = 0; - if (BRW_IS_IGDNG(c->func.brw)) - mrf = 8; + if (intel->gen >= 6) + mrf = 6; + else if (intel->is_ironlake) + mrf = 8; else - mrf = 4; + mrf = 4; for (i = 0; i < VERT_RESULT_MAX; i++) { if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) { @@ -213,8 +254,10 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) } } - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); - reg += 2; + if (c->needs_stack) { + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); + reg += 2; + } /* Some opcodes need an internal temporary: */ @@ -238,17 +281,19 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) */ attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs); - if (BRW_IS_IGDNG(c->func.brw)) - c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; + if (intel->gen >= 6) + c->prog_data.urb_entry_size = (attributes_in_vue + 4 + 7) / 8; + else if (intel->is_ironlake) + c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; else - c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; + c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; c->prog_data.total_grf = reg; if (INTEL_DEBUG & DEBUG_VS) { - _mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs); - _mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries); - _mesa_printf("%s reg = %d\n", __FUNCTION__, reg); + printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs); + printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries); + printf("%s reg = %d\n", __FUNCTION__, reg); } } @@ -438,9 +483,11 @@ static void emit_math1( struct brw_vs_compile *c, * whether that turns out to be a simulator bug or not: */ struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; struct brw_reg tmp = dst; - GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf || - dst.file != BRW_GENERAL_REGISTER_FILE); + GLboolean need_tmp = (intel->gen < 6 && + (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE)); if (need_tmp) tmp = get_tmp(c); @@ -469,9 +516,11 @@ static void emit_math2( struct brw_vs_compile *c, GLuint precision) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; struct brw_reg tmp = dst; - GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf || - dst.file != BRW_GENERAL_REGISTER_FILE); + GLboolean need_tmp = (intel->gen < 6 && + (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE)); if (need_tmp) tmp = get_tmp(c); @@ -761,15 +810,14 @@ get_constant(struct brw_vs_compile *c, { const struct prog_src_register *src = &inst->SrcReg[argIndex]; struct brw_compile *p = &c->func; - struct brw_reg const_reg; - struct brw_reg const2_reg; - const GLboolean relAddr = src->RelAddr; + struct brw_reg const_reg = c->current_const[argIndex].reg; assert(argIndex < 3); - if (c->current_const[argIndex].index != src->Index || relAddr) { + if (c->current_const[argIndex].index != src->Index) { struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; + /* Keep track of the last constant loaded in this slot, for reuse. */ c->current_const[argIndex].index = src->Index; #if 0 @@ -778,48 +826,74 @@ get_constant(struct brw_vs_compile *c, #endif /* need to fetch the constant now */ brw_dp_READ_4_vs(p, - c->current_const[argIndex].reg,/* writeback dest */ + const_reg, /* writeback dest */ 0, /* oword */ - relAddr, /* relative indexing? */ + 0, /* relative indexing? */ addrReg, /* address register */ 16 * src->Index, /* byte offset */ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ ); - - if (relAddr) { - /* second read */ - const2_reg = get_tmp(c); - - /* use upper half of address reg for second read */ - addrReg = stride(addrReg, 0, 4, 0); - addrReg.subnr = 16; - - brw_dp_READ_4_vs(p, - const2_reg, /* writeback dest */ - 1, /* oword */ - relAddr, /* relative indexing? */ - addrReg, /* address register */ - 16 * src->Index, /* byte offset */ - SURF_INDEX_VERT_CONST_BUFFER - ); - } } - const_reg = c->current_const[argIndex].reg; + /* replicate lower four floats into upper half (to get XYZWXYZW) */ + const_reg = stride(const_reg, 0, 4, 0); + const_reg.subnr = 0; - if (relAddr) { - /* merge the two Owords into the constant register */ - /* const_reg[7..4] = const2_reg[7..4] */ - brw_MOV(p, - suboffset(stride(const_reg, 0, 4, 1), 4), - suboffset(stride(const2_reg, 0, 4, 1), 4)); - release_tmp(c, const2_reg); - } - else { - /* replicate lower four floats into upper half (to get XYZWXYZW) */ - const_reg = stride(const_reg, 0, 4, 0); - const_reg.subnr = 0; - } + return const_reg; +} + +static struct brw_reg +get_reladdr_constant(struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex) +{ + const struct prog_src_register *src = &inst->SrcReg[argIndex]; + struct brw_compile *p = &c->func; + struct brw_reg const_reg = c->current_const[argIndex].reg; + struct brw_reg const2_reg; + struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; + + assert(argIndex < 3); + + /* Can't reuse a reladdr constant load. */ + c->current_const[argIndex].index = -1; + + #if 0 + printf(" fetch const[a0.x+%d] for arg %d into reg %d\n", + src->Index, argIndex, c->current_const[argIndex].reg.nr); +#endif + + /* fetch the first vec4 */ + brw_dp_READ_4_vs(p, + const_reg, /* writeback dest */ + 0, /* oword */ + 1, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ + ); + /* second vec4 */ + const2_reg = get_tmp(c); + + /* use upper half of address reg for second read */ + addrReg = stride(addrReg, 0, 4, 0); + addrReg.subnr = 16; + + brw_dp_READ_4_vs(p, + const2_reg, /* writeback dest */ + 1, /* oword */ + 1, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER + ); + + /* merge the two Owords into the constant register */ + /* const_reg[7..4] = const2_reg[7..4] */ + brw_MOV(p, + suboffset(stride(const_reg, 0, 4, 1), 4), + suboffset(stride(const2_reg, 0, 4, 1), 4)); + release_tmp(c, const2_reg); return const_reg; } @@ -927,7 +1001,13 @@ get_src_reg( struct brw_vs_compile *c, case PROGRAM_ENV_PARAM: case PROGRAM_LOCAL_PARAM: if (c->vp->use_const_buffer) { - return get_constant(c, inst, argIndex); + if (!relAddr && c->constant_map[index] != -1) { + assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0); + return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]]; + } else if (relAddr) + return get_reladdr_constant(c, inst, argIndex); + else + return get_constant(c, inst, argIndex); } else if (relAddr) { return deref(c, c->regs[PROGRAM_STATE_VAR][0], index); @@ -1113,11 +1193,13 @@ static void emit_swz( struct brw_vs_compile *c, static void emit_vertex_write( struct brw_vs_compile *c) { struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; struct brw_reg m0 = brw_message_reg(0); struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS]; struct brw_reg ndc; int eot; - GLuint len_vertext_header = 2; + GLuint len_vertex_header = 2; if (c->key.copy_edgeflag) { brw_MOV(p, @@ -1125,18 +1207,20 @@ static void emit_vertex_write( struct brw_vs_compile *c) get_reg(c, PROGRAM_INPUT, VERT_ATTRIB_EDGEFLAG)); } - /* Build ndc coords */ - ndc = get_tmp(c); - /* ndc = 1.0 / pos.w */ - emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); - /* ndc.xyz = pos * ndc */ - brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc); + if (intel->gen < 6) { + /* Build ndc coords */ + ndc = get_tmp(c); + /* ndc = 1.0 / pos.w */ + emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); + /* ndc.xyz = pos * ndc */ + brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc); + } /* Update the header for point size, user clipping flags, and -ve rhw * workaround. */ if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || - c->key.nr_userclip || BRW_IS_965(p->brw)) + c->key.nr_userclip || brw->has_negative_rhw_bug) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); GLuint i; @@ -1167,7 +1251,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) * Later, clipping will detect ucp[6] and ensure the primitive is * clipped against all fixed planes. */ - if (BRW_IS_965(p->brw)) { + if (brw->has_negative_rhw_bug) { brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_L, @@ -1193,21 +1277,41 @@ static void emit_vertex_write( struct brw_vs_compile *c) * of zeros followed by two sets of NDC coordinates: */ brw_set_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, offset(m0, 2), ndc); - - if (BRW_IS_IGDNG(p->brw)) { - /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */ - brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */ - /* m4, m5 contain the distances from vertex to the user clip planeXXX. - * Seems it is useless for us. - * m6 is used for aligning, so that the remainder of vertex element is - * reg-aligned. - */ - brw_MOV(p, offset(m0, 7), pos); /* the remainder of vertex element */ - len_vertext_header = 6; + + if (intel->gen >= 6) { + /* There are 16 DWs (D0-D15) in VUE header on Sandybridge: + * dword 0-3 (m1) of the header is indices, point width, clip flags. + * dword 4-7 (m2) is the 4D space position + * dword 8-15 (m3,m4) of the vertex header is the user clip distance. + * m5 is the first vertex data we fill, which is the vertex position. + */ + brw_MOV(p, offset(m0, 2), pos); + brw_MOV(p, offset(m0, 5), pos); + len_vertex_header = 4; + } else if (intel->is_ironlake) { + /* There are 20 DWs (D0-D19) in VUE header on Ironlake: + * dword 0-3 (m1) of the header is indices, point width, clip flags. + * dword 4-7 (m2) is the ndc position (set above) + * dword 8-11 (m3) of the vertex header is the 4D space position + * dword 12-19 (m4,m5) of the vertex header is the user clip distance. + * m6 is a pad so that the vertex element data is aligned + * m7 is the first vertex data we fill, which is the vertex position. + */ + brw_MOV(p, offset(m0, 2), ndc); + brw_MOV(p, offset(m0, 3), pos); + brw_MOV(p, offset(m0, 7), pos); + len_vertex_header = 6; } else { - brw_MOV(p, offset(m0, 3), pos); - len_vertext_header = 2; + /* There are 8 dwords in VUE header pre-Ironlake: + * dword 0-3 (m1) is indices, point width, clip flags. + * dword 4-7 (m2) is ndc position (set above) + * + * dword 8-11 (m3) is the first vertex data, which we always have be the + * vertex position. + */ + brw_MOV(p, offset(m0, 2), ndc); + brw_MOV(p, offset(m0, 3), pos); + len_vertex_header = 2; } eot = (c->first_overflow_output == 0); @@ -1218,7 +1322,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) c->r0, /* src */ 0, /* allocate */ 1, /* used */ - MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */ + MIN2(c->nr_outputs + 1 + len_vertex_header, (BRW_MAX_MRF-1)), /* msg len */ 0, /* response len */ eot, /* eot */ eot, /* writes complete */ @@ -1359,29 +1463,32 @@ void brw_vs_emit(struct brw_vs_compile *c ) #define MAX_LOOP_DEPTH 32 struct brw_compile *p = &c->func; struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; const GLuint nr_insns = c->vp->program.Base.NumInstructions; GLuint insn, if_depth = 0, loop_depth = 0; GLuint end_offset = 0; struct brw_instruction *end_inst, *last_inst; - struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; + struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH] = { 0 }; const struct brw_indirect stack_index = brw_indirect(0, 0); GLuint index; GLuint file; if (INTEL_DEBUG & DEBUG_VS) { - _mesa_printf("vs-mesa:\n"); + printf("vs-mesa:\n"); _mesa_print_program(&c->vp->program.Base); - _mesa_printf("\n"); + printf("\n"); } brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_access_mode(p, BRW_ALIGN_16); - - /* Message registers can't be read, so copy the output into GRF register - if they are used in source registers */ + for (insn = 0; insn < nr_insns; insn++) { GLuint i; struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; + + /* Message registers can't be read, so copy the output into GRF + * register if they are used in source registers + */ for (i = 0; i < 3; i++) { struct prog_src_register *src = &inst->SrcReg[i]; GLuint index = src->Index; @@ -1389,12 +1496,23 @@ void brw_vs_emit(struct brw_vs_compile *c ) if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS) c->output_regs[index].used_in_src = GL_TRUE; } + + switch (inst->Opcode) { + case OPCODE_CAL: + case OPCODE_RET: + c->needs_stack = GL_TRUE; + break; + default: + break; + } } /* Static register allocation */ brw_vs_alloc_regs(c); - brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); + + if (c->needs_stack) + brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); for (insn = 0; insn < nr_insns; insn++) { @@ -1592,7 +1710,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) loop_depth--; - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) br = 2; inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); @@ -1708,9 +1826,9 @@ void brw_vs_emit(struct brw_vs_compile *c ) if (INTEL_DEBUG & DEBUG_VS) { int i; - _mesa_printf("vs-native:\n"); + printf("vs-native:\n"); for (i = 0; i < p->nr_insn; i++) brw_disasm(stderr, &p->store[i]); - _mesa_printf("\n"); + printf("\n"); } } diff --git a/i965/brw_vs_state.c b/i965/brw_vs_state.c index 7285466..fd9f2fe 100644 --- a/i965/brw_vs_state.c +++ b/i965/brw_vs_state.c @@ -82,9 +82,9 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) static dri_bo * vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) { + struct intel_context *intel = &brw->intel; struct brw_vs_unit_state vs; dri_bo *bo; - int chipset_max_threads; memset(&vs, 0, sizeof(vs)); @@ -98,7 +98,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) */ vs.thread1.single_program_flow = 0; - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) vs.thread1.binding_table_entry_count = 0; /* hardware requirement */ else vs.thread1.binding_table_entry_count = key->nr_surfaces; @@ -109,7 +109,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) vs.thread3.urb_entry_read_offset = 0; vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - if (BRW_IS_IGDNG(brw)) { + if (intel->is_ironlake) { switch (key->nr_urb_entries) { case 8: case 12: @@ -135,7 +135,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) case 32: break; case 64: - assert(BRW_IS_G4X(brw)); + assert(intel->is_g4x); break; default: assert(0); @@ -145,17 +145,8 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) vs.thread4.urb_entry_allocation_size = key->urb_size - 1; - if (BRW_IS_IGDNG(brw)) - chipset_max_threads = 72; - else if (BRW_IS_G4X(brw)) - chipset_max_threads = 32; - else - chipset_max_threads = 16; vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2, - 1, chipset_max_threads) - 1; - - if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) - vs.thread4.max_threads = 0; + 1, brw->vs_max_threads) - 1; /* No samplers for ARB_vp programs: */ @@ -173,8 +164,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT, key, sizeof(*key), &brw->vs.prog_bo, 1, - &vs, sizeof(vs), - NULL, NULL); + &vs, sizeof(vs)); /* Emit VS program relocation */ dri_bo_emit_reloc(bo, diff --git a/i965/brw_vs_surface_state.c b/i965/brw_vs_surface_state.c index 3bc9840..4007b5a 100644 --- a/i965/brw_vs_surface_state.c +++ b/i965/brw_vs_surface_state.c @@ -35,7 +35,6 @@ #include "brw_context.h" #include "brw_state.h" -#include "brw_defines.h" /* Creates a new VS constant buffer reflecting the current VS program's * constants, if needed by the VS program. @@ -68,13 +67,13 @@ brw_vs_update_constant_buffer(struct brw_context *brw) */ _mesa_load_state_parameters(&brw->intel.ctx, vp->program.Base.Parameters); - intel_bo_map_gtt_preferred(intel, const_buffer, GL_TRUE); + drm_intel_gem_bo_map_gtt(const_buffer); for (i = 0; i < params->NumParameters; i++) { memcpy(const_buffer->virtual + i * 4 * sizeof(float), params->ParameterValues[i], 4 * sizeof(float)); } - intel_bo_unmap_gtt_preferred(intel, const_buffer); + drm_intel_gem_bo_unmap_gtt(const_buffer); return const_buffer; } @@ -105,7 +104,7 @@ brw_update_vs_constant_surface( GLcontext *ctx, /* If there's no constant buffer, then no surface BO is needed to point at * it. */ - if (vp->const_buffer == 0) { + if (vp->const_buffer == NULL) { drm_intel_bo_unreference(brw->vs.surf_bo[surf]); brw->vs.surf_bo[surf] = NULL; return; @@ -133,7 +132,7 @@ brw_update_vs_constant_surface( GLcontext *ctx, brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache, BRW_SS_SURFACE, &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, + &key.bo, 1, NULL); if (brw->vs.surf_bo[surf] == NULL) { brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); @@ -156,7 +155,7 @@ brw_vs_get_binding_table(struct brw_context *brw) if (bind_bo == NULL) { GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint); - uint32_t *data = malloc(data_size); + uint32_t data[BRW_VS_MAX_SURF]; int i; for (i = 0; i < BRW_VS_MAX_SURF; i++) @@ -168,8 +167,7 @@ brw_vs_get_binding_table(struct brw_context *brw) bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->vs.surf_bo, BRW_VS_MAX_SURF, - data, data_size, - NULL, NULL); + data, data_size); /* Emit binding table relocations to surface state */ for (i = 0; i < BRW_VS_MAX_SURF; i++) { @@ -182,8 +180,6 @@ brw_vs_get_binding_table(struct brw_context *brw) I915_GEM_DOMAIN_INSTRUCTION, 0); } } - - free(data); } return bind_bo; diff --git a/i965/brw_vtbl.c b/i965/brw_vtbl.c index 34aaea3..96a44bf 100644 --- a/i965/brw_vtbl.c +++ b/i965/brw_vtbl.c @@ -44,7 +44,6 @@ #include "brw_state.h" #include "brw_draw.h" #include "brw_state.h" -#include "brw_fallback.h" #include "brw_vs.h" #include "brw_wm.h" @@ -68,11 +67,11 @@ static void brw_destroy_context( struct intel_context *intel ) brw_draw_destroy( brw ); brw_clear_validated_bos(brw); if (brw->wm.compile_data) { - _mesa_free(brw->wm.compile_data->instruction); - _mesa_free(brw->wm.compile_data->vreg); - _mesa_free(brw->wm.compile_data->refs); - _mesa_free(brw->wm.compile_data->prog_instructions); - _mesa_free(brw->wm.compile_data); + free(brw->wm.compile_data->instruction); + free(brw->wm.compile_data->vreg); + free(brw->wm.compile_data->refs); + free(brw->wm.compile_data->prog_instructions); + free(brw->wm.compile_data); } for (i = 0; i < brw->state.nr_color_regions; i++) @@ -103,6 +102,9 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->cc.prog_bo); dri_bo_release(&brw->cc.state_bo); dri_bo_release(&brw->cc.vp_bo); + dri_bo_release(&brw->cc.blend_state_bo); + dri_bo_release(&brw->cc.depth_stencil_state_bo); + dri_bo_release(&brw->cc.color_calc_state_bo); } @@ -140,6 +142,12 @@ static void brw_finish_batch(struct intel_context *intel) { struct brw_context *brw = brw_context(&intel->ctx); brw_emit_query_end(brw); + + if (brw->curbe.curbe_bo) { + drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo); + drm_intel_bo_unreference(brw->curbe.curbe_bo); + brw->curbe.curbe_bo = NULL; + } } @@ -150,11 +158,6 @@ static void brw_new_batch( struct intel_context *intel ) { struct brw_context *brw = brw_context(&intel->ctx); - /* Check that we didn't just wrap our batchbuffer at a bad time. */ - assert(!brw->no_batch_wrap); - - brw->curbe.need_new_bo = GL_TRUE; - /* Mark all context state as needing to be re-emitted. * This is probably not as severe as on 915, since almost all of our state * is just in referenced buffers. @@ -175,12 +178,6 @@ static void brw_new_batch( struct intel_context *intel ) } } - -static void brw_note_fence( struct intel_context *intel, GLuint fence ) -{ - brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; -} - static void brw_invalidate_state( struct intel_context *intel, GLuint new_state ) { /* nothing */ @@ -196,7 +193,6 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.update_texture_state = 0; brw->intel.vtbl.invalidate_state = brw_invalidate_state; - brw->intel.vtbl.note_fence = brw_note_fence; brw->intel.vtbl.new_batch = brw_new_batch; brw->intel.vtbl.finish_batch = brw_finish_batch; brw->intel.vtbl.destroy = brw_destroy_context; diff --git a/i965/brw_wm.c b/i965/brw_wm.c index 6895f64..991e1b9 100644 --- a/i965/brw_wm.c +++ b/i965/brw_wm.c @@ -30,7 +30,6 @@ */ #include "brw_context.h" -#include "brw_util.h" #include "brw_wm.h" #include "brw_state.h" @@ -152,11 +151,11 @@ static void do_wm_prog( struct brw_context *brw, */ return; } - c->instruction = _mesa_calloc(BRW_WM_MAX_INSN * sizeof(*c->instruction)); - c->prog_instructions = _mesa_calloc(BRW_WM_MAX_INSN * + c->instruction = calloc(1, BRW_WM_MAX_INSN * sizeof(*c->instruction)); + c->prog_instructions = calloc(1, BRW_WM_MAX_INSN * sizeof(*c->prog_instructions)); - c->vreg = _mesa_calloc(BRW_WM_MAX_VREG * sizeof(*c->vreg)); - c->refs = _mesa_calloc(BRW_WM_MAX_REF * sizeof(*c->refs)); + c->vreg = calloc(1, BRW_WM_MAX_VREG * sizeof(*c->vreg)); + c->refs = calloc(1, BRW_WM_MAX_REF * sizeof(*c->refs)); } else { void *instruction = c->instruction; void *prog_instructions = c->prog_instructions; @@ -199,12 +198,13 @@ static void do_wm_prog( struct brw_context *brw, program = brw_get_program(&c->func, &program_size); dri_bo_unreference(brw->wm.prog_bo); - brw->wm.prog_bo = brw_upload_cache( &brw->cache, BRW_WM_PROG, - &c->key, sizeof(c->key), - NULL, 0, - program, program_size, - &c->prog_data, - &brw->wm.prog_data ); + brw->wm.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_WM_PROG, + &c->key, sizeof(c->key), + NULL, 0, + program, program_size, + &c->prog_data, + sizeof(c->prog_data), + &brw->wm.prog_data); } @@ -336,11 +336,7 @@ static void brw_wm_populate_key( struct brw_context *brw, * drawable height in order to invert the Y axis. */ if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) { - if (brw->intel.driDrawable != NULL) { - key->origin_x = brw->intel.driDrawable->x; - key->origin_y = brw->intel.driDrawable->y; - key->drawable_height = brw->intel.driDrawable->h; - } + key->drawable_height = ctx->DrawBuffer->Height; } key->nr_color_regions = brw->state.nr_color_regions; diff --git a/i965/brw_wm.h b/i965/brw_wm.h index 9dcb6e1..88d84ee 100644 --- a/i965/brw_wm.h +++ b/i965/brw_wm.h @@ -76,10 +76,9 @@ struct brw_wm_prog_key { GLushort tex_swizzles[BRW_MAX_TEX_UNIT]; - GLuint program_string_id:32; - GLushort origin_x, origin_y; GLushort drawable_height; GLbitfield64 vp_outputs_written; + GLuint program_string_id:32; }; diff --git a/i965/brw_wm_debug.c b/i965/brw_wm_debug.c index 2208210..a78cc8b 100644 --- a/i965/brw_wm_debug.c +++ b/i965/brw_wm_debug.c @@ -41,21 +41,21 @@ void brw_wm_print_value( struct brw_wm_compile *c, if (c->state >= PASS2_DONE) brw_print_reg(value->hw_reg); else if( value == &c->undef_value ) - _mesa_printf("undef"); + printf("undef"); else if( value - c->vreg >= 0 && value - c->vreg < BRW_WM_MAX_VREG) - _mesa_printf("r%d", value - c->vreg); + printf("r%d", value - c->vreg); else if (value - c->creg >= 0 && value - c->creg < BRW_WM_MAX_PARAM) - _mesa_printf("c%d", value - c->creg); + printf("c%d", value - c->creg); else if (value - c->payload.input_interp >= 0 && value - c->payload.input_interp < FRAG_ATTRIB_MAX) - _mesa_printf("i%d", value - c->payload.input_interp); + printf("i%d", value - c->payload.input_interp); else if (value - c->payload.depth >= 0 && value - c->payload.depth < FRAG_ATTRIB_MAX) - _mesa_printf("d%d", value - c->payload.depth); + printf("d%d", value - c->payload.depth); else - _mesa_printf("?"); + printf("?"); } void brw_wm_print_ref( struct brw_wm_compile *c, @@ -64,16 +64,16 @@ void brw_wm_print_ref( struct brw_wm_compile *c, struct brw_reg hw_reg = ref->hw_reg; if (ref->unspill_reg) - _mesa_printf("UNSPILL(%x)/", ref->value->spill_slot); + printf("UNSPILL(%x)/", ref->value->spill_slot); if (c->state >= PASS2_DONE) brw_print_reg(ref->hw_reg); else { - _mesa_printf("%s", hw_reg.negate ? "-" : ""); - _mesa_printf("%s", hw_reg.abs ? "abs/" : ""); + printf("%s", hw_reg.negate ? "-" : ""); + printf("%s", hw_reg.abs ? "abs/" : ""); brw_wm_print_value(c, ref->value); if ((hw_reg.nr&1) || hw_reg.subnr) { - _mesa_printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr); + printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr); } } } @@ -84,22 +84,22 @@ void brw_wm_print_insn( struct brw_wm_compile *c, GLuint i, arg; GLuint nr_args = brw_wm_nr_args(inst->opcode); - _mesa_printf("["); + printf("["); for (i = 0; i < 4; i++) { if (inst->dst[i]) { brw_wm_print_value(c, inst->dst[i]); if (inst->dst[i]->spill_slot) - _mesa_printf("/SPILL(%x)",inst->dst[i]->spill_slot); + printf("/SPILL(%x)",inst->dst[i]->spill_slot); } else - _mesa_printf("#"); + printf("#"); if (i < 3) - _mesa_printf(","); + printf(","); } - _mesa_printf("]"); + printf("]"); if (inst->writemask != WRITEMASK_XYZW) - _mesa_printf(".%s%s%s%s", + printf(".%s%s%s%s", GET_BIT(inst->writemask, 0) ? "x" : "", GET_BIT(inst->writemask, 1) ? "y" : "", GET_BIT(inst->writemask, 2) ? "z" : "", @@ -107,58 +107,58 @@ void brw_wm_print_insn( struct brw_wm_compile *c, switch (inst->opcode) { case WM_PIXELXY: - _mesa_printf(" = PIXELXY"); + printf(" = PIXELXY"); break; case WM_DELTAXY: - _mesa_printf(" = DELTAXY"); + printf(" = DELTAXY"); break; case WM_PIXELW: - _mesa_printf(" = PIXELW"); + printf(" = PIXELW"); break; case WM_WPOSXY: - _mesa_printf(" = WPOSXY"); + printf(" = WPOSXY"); break; case WM_PINTERP: - _mesa_printf(" = PINTERP"); + printf(" = PINTERP"); break; case WM_LINTERP: - _mesa_printf(" = LINTERP"); + printf(" = LINTERP"); break; case WM_CINTERP: - _mesa_printf(" = CINTERP"); + printf(" = CINTERP"); break; case WM_FB_WRITE: - _mesa_printf(" = FB_WRITE"); + printf(" = FB_WRITE"); break; case WM_FRONTFACING: - _mesa_printf(" = FRONTFACING"); + printf(" = FRONTFACING"); break; default: - _mesa_printf(" = %s", _mesa_opcode_string(inst->opcode)); + printf(" = %s", _mesa_opcode_string(inst->opcode)); break; } if (inst->saturate) - _mesa_printf("_SAT"); + printf("_SAT"); for (arg = 0; arg < nr_args; arg++) { - _mesa_printf(" ["); + printf(" ["); for (i = 0; i < 4; i++) { if (inst->src[arg][i]) { brw_wm_print_ref(c, inst->src[arg][i]); } else - _mesa_printf("%%"); + printf("%%"); if (i < 3) - _mesa_printf(","); + printf(","); else - _mesa_printf("]"); + printf("]"); } } - _mesa_printf("\n"); + printf("\n"); } void brw_wm_print_program( struct brw_wm_compile *c, @@ -166,9 +166,9 @@ void brw_wm_print_program( struct brw_wm_compile *c, { GLuint insn; - _mesa_printf("%s:\n", stage); + printf("%s:\n", stage); for (insn = 0; insn < c->nr_insns; insn++) brw_wm_print_insn(c, &c->instruction[insn]); - _mesa_printf("\n"); + printf("\n"); } diff --git a/i965/brw_wm_emit.c b/i965/brw_wm_emit.c index 5390fd2..9315bca 100644 --- a/i965/brw_wm_emit.c +++ b/i965/brw_wm_emit.c @@ -138,19 +138,43 @@ void emit_wpos_xy(struct brw_wm_compile *c, * X and Y channels. */ if (mask & WRITEMASK_X) { - /* X' = X - origin */ - brw_ADD(p, - dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_W), - brw_imm_d(0 - c->key.origin_x)); + if (c->fp->program.PixelCenterInteger) { + /* X' = X */ + brw_MOV(p, + dst[0], + retype(arg0[0], BRW_REGISTER_TYPE_W)); + } else { + /* X' = X + 0.5 */ + brw_ADD(p, + dst[0], + retype(arg0[0], BRW_REGISTER_TYPE_W), + brw_imm_f(0.5)); + } } if (mask & WRITEMASK_Y) { - /* Y' = height - (Y - origin_y) = height + origin_y - Y */ - brw_ADD(p, - dst[1], - negate(retype(arg0[1], BRW_REGISTER_TYPE_W)), - brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); + if (c->fp->program.OriginUpperLeft) { + if (c->fp->program.PixelCenterInteger) { + /* Y' = Y */ + brw_MOV(p, + dst[1], + retype(arg0[1], BRW_REGISTER_TYPE_W)); + } else { + /* Y' = Y + 0.5 */ + brw_ADD(p, + dst[1], + retype(arg0[1], BRW_REGISTER_TYPE_W), + brw_imm_f(0.5)); + } + } else { + float center_offset = c->fp->program.PixelCenterInteger ? 0.0 : 0.5; + + /* Y' = (height - 1) - Y + center */ + brw_ADD(p, + dst[1], + negate(retype(arg0[1], BRW_REGISTER_TYPE_W)), + brw_imm_f(c->key.drawable_height - 1 + center_offset)); + } } } @@ -692,7 +716,7 @@ void emit_xpd(struct brw_compile *p, { GLuint i; - assert(!(mask & WRITEMASK_W) == WRITEMASK_X); + assert((mask & WRITEMASK_W) != WRITEMASK_W); for (i = 0 ; i < 3; i++) { if (mask & (1<<i)) { @@ -830,6 +854,7 @@ void emit_tex(struct brw_wm_compile *c, GLboolean shadow) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; struct brw_reg dst_retyped; GLuint cur_mrf = 2, response_length; GLuint i, nr_texcoords; @@ -873,7 +898,7 @@ void emit_tex(struct brw_wm_compile *c, } /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */ - if (!BRW_IS_IGDNG(p->brw) && c->dispatch_width == 8) + if (!intel->is_ironlake && c->dispatch_width == 8) nr_texcoords = 3; /* For shadow comparisons, we have to supply u,v,r. */ @@ -891,7 +916,7 @@ void emit_tex(struct brw_wm_compile *c, /* Fill in the shadow comparison reference value. */ if (shadow) { - if (BRW_IS_IGDNG(p->brw)) { + if (intel->is_ironlake) { /* Fill in the cube map array index value. */ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); cur_mrf += mrf_per_channel; @@ -904,7 +929,7 @@ void emit_tex(struct brw_wm_compile *c, cur_mrf += mrf_per_channel; } - if (BRW_IS_IGDNG(p->brw)) { + if (intel->is_ironlake) { if (shadow) msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG; else @@ -944,6 +969,7 @@ void emit_txb(struct brw_wm_compile *c, GLuint sampler) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; GLuint msgLength; GLuint msg_type; GLuint mrf_per_channel; @@ -955,8 +981,8 @@ void emit_txb(struct brw_wm_compile *c, * undefined, and trust the execution mask to keep the undefined pixels * from mattering. */ - if (c->dispatch_width == 16 || !BRW_IS_IGDNG(p->brw)) { - if (BRW_IS_IGDNG(p->brw)) + if (c->dispatch_width == 16 || !intel->is_ironlake) { + if (intel->is_ironlake) msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG; else msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; @@ -1084,7 +1110,7 @@ static void emit_kil_nv( struct brw_wm_compile *c ) brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */ brw_AND(p, r0uw, c->emit_mask_reg, r0uw); brw_pop_insn_state(p); } @@ -1174,7 +1200,7 @@ void emit_fb_write(struct brw_wm_compile *c, brw_push_insn_state(p); for (channel = 0; channel < 4; channel++) { - if (c->dispatch_width == 16 && (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) { + if (c->dispatch_width == 16 && brw->has_compr4) { /* By setting the high bit of the MRF register number, we indicate * that we want COMPR4 mode - instead of doing the usual destination * + 1 for the second half we get destination + 4. @@ -1596,10 +1622,10 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; default: - _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n", - inst->opcode, inst->opcode < MAX_OPCODE ? - _mesa_opcode_string(inst->opcode) : - "unknown"); + printf("Unsupported opcode %i (%s) in fragment shader\n", + inst->opcode, inst->opcode < MAX_OPCODE ? + _mesa_opcode_string(inst->opcode) : + "unknown"); } for (i = 0; i < 4; i++) @@ -1612,9 +1638,9 @@ void brw_wm_emit( struct brw_wm_compile *c ) if (INTEL_DEBUG & DEBUG_WM) { int i; - _mesa_printf("wm-native:\n"); + printf("wm-native:\n"); for (i = 0; i < p->nr_insn; i++) brw_disasm(stderr, &p->store[i]); - _mesa_printf("\n"); + printf("\n"); } } diff --git a/i965/brw_wm_fp.c b/i965/brw_wm_fp.c index 7d03179..d73c391 100644 --- a/i965/brw_wm_fp.c +++ b/i965/brw_wm_fp.c @@ -138,7 +138,6 @@ static struct prog_dst_register dst_reg(GLuint file, GLuint idx) reg.CondMask = COND_TR; reg.CondSwizzle = 0; reg.CondSrc = 0; - reg.pad = 0; return reg; } @@ -160,7 +159,7 @@ static struct prog_dst_register get_temp( struct brw_wm_compile *c ) int bit = _mesa_ffs( ~c->fp_temp ); if (!bit) { - _mesa_printf("%s: out of temporaries\n", __FILE__); + printf("%s: out of temporaries\n", __FILE__); exit(1); } @@ -1035,7 +1034,7 @@ static void print_insns( const struct prog_instruction *insn, { GLuint i; for (i = 0; i < nr; i++, insn++) { - _mesa_printf("%3d: ", i); + printf("%3d: ", i); if (insn->Opcode < MAX_OPCODE) _mesa_print_instruction(insn); else if (insn->Opcode < MAX_WM_OPCODE) { @@ -1046,7 +1045,7 @@ static void print_insns( const struct prog_instruction *insn, 3); } else - _mesa_printf("965 Opcode %d\n", insn->Opcode); + printf("965 Opcode %d\n", insn->Opcode); } } @@ -1061,9 +1060,9 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) GLuint insn; if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("pre-fp:\n"); + printf("pre-fp:\n"); _mesa_print_program(&fp->program.Base); - _mesa_printf("\n"); + printf("\n"); } c->pixel_xy = src_undef(); @@ -1169,9 +1168,9 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) } if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("pass_fp:\n"); + printf("pass_fp:\n"); print_insns( c->prog_instructions, c->nr_fp_insns ); - _mesa_printf("\n"); + printf("\n"); } } diff --git a/i965/brw_wm_glsl.c b/i965/brw_wm_glsl.c index e8c2cb6..562608e 100644 --- a/i965/brw_wm_glsl.c +++ b/i965/brw_wm_glsl.c @@ -743,7 +743,7 @@ static void emit_kil(struct brw_wm_compile *c) struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */ brw_AND(p, depth, c->emit_mask_reg, depth); brw_pop_insn_state(p); } @@ -1826,6 +1826,7 @@ get_argument_regs(struct brw_wm_compile *c, static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) { + struct intel_context *intel = &brw->intel; #define MAX_IF_DEPTH 32 #define MAX_LOOP_DEPTH 32 struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; @@ -1848,7 +1849,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) c->cur_inst = i; #if 0 - _mesa_printf("Inst %d: ", i); + printf("Inst %d: ", i); _mesa_print_instruction(inst); #endif @@ -1876,10 +1877,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) else brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); - dst_flags = inst->DstReg.WriteMask; - if (inst->SaturateMode == SATURATE_ZERO_ONE) - dst_flags |= SATURATE; - switch (inst->Opcode) { case WM_PIXELXY: emit_pixel_xy(c, dst, dst_flags); @@ -2043,6 +2040,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); break; case OPCODE_ELSE: + assert(if_depth > 0); if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); break; case OPCODE_ENDIF: @@ -2096,9 +2094,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) struct brw_instruction *inst0, *inst1; GLuint br = 1; - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) br = 2; - + + assert(loop_depth > 0); loop_depth--; inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); /* patch all the BREAK/CONT instructions from last BGNLOOP */ @@ -2116,7 +2115,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) } break; default: - _mesa_printf("unsupported IR in fragment shader %d\n", + printf("unsupported IR in fragment shader %d\n", inst->Opcode); } @@ -2128,10 +2127,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) post_wm_emit(c); if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("wm-native:\n"); + printf("wm-native:\n"); for (i = 0; i < p->nr_insn; i++) brw_disasm(stderr, &p->store[i]); - _mesa_printf("\n"); + printf("\n"); } } @@ -2142,7 +2141,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) { if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("brw_wm_glsl_emit:\n"); + printf("brw_wm_glsl_emit:\n"); } /* initial instruction translation/simplification */ diff --git a/i965/brw_wm_pass0.c b/i965/brw_wm_pass0.c index ff4c082..60bd92e 100644 --- a/i965/brw_wm_pass0.c +++ b/i965/brw_wm_pass0.c @@ -105,7 +105,7 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, GLuint i = c->prog_data.nr_params++; if (i >= BRW_WM_MAX_PARAM) { - _mesa_printf("%s: out of params\n", __FUNCTION__); + printf("%s: out of params\n", __FUNCTION__); c->prog_data.error = 1; return NULL; } @@ -154,7 +154,7 @@ static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c, return c->constref[i].ref; } else { - _mesa_printf("%s: out of constrefs\n", __FUNCTION__); + printf("%s: out of constrefs\n", __FUNCTION__); c->prog_data.error = 1; return NULL; } diff --git a/i965/brw_wm_sampler_state.c b/i965/brw_wm_sampler_state.c index aa2e519..d7650af 100644 --- a/i965/brw_wm_sampler_state.c +++ b/i965/brw_wm_sampler_state.c @@ -89,7 +89,6 @@ struct wm_sampler_key { float max_aniso; GLenum minfilter, magfilter; GLenum comparemode, comparefunc; - dri_bo *sdc_bo; /** If target is cubemap, take context setting. */ @@ -105,7 +104,7 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, dri_bo *sdc_bo, struct brw_sampler_state *sampler) { - _mesa_memset(sampler, 0, sizeof(*sampler)); + memset(sampler, 0, sizeof(*sampler)); switch (key->minfilter) { case GL_NEAREST: @@ -230,7 +229,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw, GLcontext *ctx = &brw->intel.ctx; int unit; - memset(key, 0, sizeof(*key)); + key->sampler_count = 0; for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { if (ctx->Texture.Unit[unit]._ReallyEnabled) { @@ -241,6 +240,8 @@ brw_wm_sampler_populate_key(struct brw_context *brw, struct gl_texture_image *firstImage = texObj->Image[0][intelObj->firstLevel]; + memset(entry, 0, sizeof(*entry)); + entry->tex_target = texObj->Target; entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP) @@ -262,10 +263,10 @@ brw_wm_sampler_populate_key(struct brw_context *brw, dri_bo_unreference(brw->wm.sdc_bo[unit]); if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { float bordercolor[4] = { - texObj->BorderColor[0], - texObj->BorderColor[0], - texObj->BorderColor[0], - texObj->BorderColor[0] + texObj->BorderColor.f[0], + texObj->BorderColor.f[0], + texObj->BorderColor.f[0], + texObj->BorderColor.f[0] }; /* GL specs that border color for depth textures is taken from the * R channel, while the hardware uses A. Spam R into all the @@ -274,7 +275,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw, brw->wm.sdc_bo[unit] = upload_default_color(brw, bordercolor); } else { brw->wm.sdc_bo[unit] = upload_default_color(brw, - texObj->BorderColor); + texObj->BorderColor.f); } key->sampler_count = unit + 1; } @@ -289,7 +290,7 @@ static void upload_wm_samplers( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; struct wm_sampler_key key; - int i; + int i, sampler_key_size; brw_wm_sampler_populate_key(brw, &key); @@ -303,8 +304,11 @@ static void upload_wm_samplers( struct brw_context *brw ) if (brw->wm.sampler_count == 0) return; + /* Only include the populated portion of the key in the search. */ + sampler_key_size = offsetof(struct wm_sampler_key, + sampler[key.sampler_count]); brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER, - &key, sizeof(key), + &key, sampler_key_size, brw->wm.sdc_bo, key.sampler_count, NULL); @@ -324,10 +328,9 @@ static void upload_wm_samplers( struct brw_context *brw ) } brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER, - &key, sizeof(key), + &key, sampler_key_size, brw->wm.sdc_bo, key.sampler_count, - &sampler, sizeof(sampler), - NULL, NULL); + &sampler, sizeof(sampler)); /* Emit SDC relocations */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { diff --git a/i965/brw_wm_state.c b/i965/brw_wm_state.c index f89ed9b..a7f80db 100644 --- a/i965/brw_wm_state.c +++ b/i965/brw_wm_state.c @@ -49,8 +49,6 @@ struct brw_wm_unit_key { unsigned int curbe_offset; unsigned int urb_size; - unsigned int max_threads; - unsigned int nr_surfaces, sampler_count; GLboolean uses_depth, computes_depth, uses_kill, is_glsl; GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable; @@ -67,18 +65,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) memset(key, 0, sizeof(*key)); - if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) - key->max_threads = 1; - else { - /* WM maximum threads is number of EUs times number of threads per EU. */ - if (BRW_IS_IGDNG(brw)) - key->max_threads = 12 * 6; - else if (BRW_IS_G4X(brw)) - key->max_threads = 10 * 5; - else - key->max_threads = 8 * 4; - } - /* CACHE_NEW_WM_PROG */ key->total_grf = brw->wm.prog_data->total_grf; key->urb_entry_read_length = brw->wm.prog_data->urb_read_length; @@ -140,6 +126,7 @@ static dri_bo * wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, dri_bo **reloc_bufs) { + struct intel_context *intel = &brw->intel; struct brw_wm_unit_state wm; dri_bo *bo; @@ -150,7 +137,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.thread1.depth_coef_urb_read_offset = 1; wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) wm.thread1.binding_table_entry_count = 0; /* hardware requirement */ else wm.thread1.binding_table_entry_count = key->nr_surfaces; @@ -170,7 +157,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length; wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) wm.wm4.sampler_count = 0; /* hardware requirement */ else wm.wm4.sampler_count = (key->sampler_count + 1) / 4; @@ -191,7 +178,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, else wm.wm5.enable_16_pix = 1; - wm.wm5.max_threads = key->max_threads - 1; + wm.wm5.max_threads = brw->wm_max_threads - 1; wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */ wm.wm5.legacy_line_rast = 0; wm.wm5.legacy_global_depth_bias = 0; @@ -223,8 +210,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT, key, sizeof(*key), reloc_bufs, 3, - &wm, sizeof(wm), - NULL, NULL); + &wm, sizeof(wm)); /* Emit WM program relocation */ dri_bo_emit_reloc(bo, @@ -268,7 +254,7 @@ static void upload_wm_unit( struct brw_context *brw ) */ assert(key.total_scratch <= 12 * 1024); if (key.total_scratch) { - GLuint total = key.total_scratch * key.max_threads; + GLuint total = key.total_scratch * brw->wm_max_threads; if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) { dri_bo_unreference(brw->wm.scratch_bo); diff --git a/i965/brw_wm_surface_state.c b/i965/brw_wm_surface_state.c index 8335e5a..ce0bf0b 100644 --- a/i965/brw_wm_surface_state.c +++ b/i965/brw_wm_surface_state.c @@ -207,33 +207,14 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; surf.ss0.surface_type = translate_tex_target(key->target); - if (key->bo) { - surf.ss0.surface_format = translate_tex_format(key->format, - key->internal_format, - key->depthmode); - } - else { - switch (key->depth) { - case 32: - surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - break; - default: - case 24: - surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8X8_UNORM; - break; - case 16: - surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; - break; - } - } + surf.ss0.surface_format = translate_tex_format(key->format, + key->internal_format, + key->depthmode); /* This is ok for all textures with channel width 8bit or less: */ /* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ - if (key->bo) - surf.ss1.base_addr = key->bo->offset; /* reloc */ - else - surf.ss1.base_addr = key->offset; + surf.ss1.base_addr = key->bo->offset; /* reloc */ surf.ss2.mip_count = key->last_level - key->first_level; surf.ss2.width = key->width - 1; @@ -255,18 +236,14 @@ brw_create_texture_surface( struct brw_context *brw, bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), - &key->bo, key->bo ? 1 : 0, - &surf, sizeof(surf), - NULL, NULL); - - if (key->bo) { - /* Emit relocation to surface contents */ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_SAMPLER, 0, - 0, - offsetof(struct brw_surface_state, ss1), - key->bo); - } + &key->bo, 1, + &surf, sizeof(surf)); + + /* Emit relocation to surface contents */ + drm_intel_bo_emit_reloc(bo, offsetof(struct brw_surface_state, ss1), + key->bo, 0, + I915_GEM_DOMAIN_SAMPLER, 0); + return bo; } @@ -282,19 +259,12 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) memset(&key, 0, sizeof(key)); - if (intelObj->imageOverride) { - key.pitch = intelObj->pitchOverride / intelObj->mt->cpp; - key.depth = intelObj->depthOverride; - key.bo = NULL; - key.offset = intelObj->textureOffset; - } else { - key.format = firstImage->TexFormat; - key.internal_format = firstImage->InternalFormat; - key.pitch = intelObj->mt->pitch; - key.depth = firstImage->Depth; - key.bo = intelObj->mt->region->buffer; - key.offset = 0; - } + key.format = firstImage->TexFormat; + key.internal_format = firstImage->InternalFormat; + key.pitch = intelObj->mt->pitch; + key.depth = firstImage->Depth; + key.bo = intelObj->mt->region->buffer; + key.offset = 0; key.target = tObj->Target; key.depthmode = tObj->DepthMode; @@ -309,7 +279,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, BRW_SS_SURFACE, &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, + &key.bo, 1, NULL); if (brw->wm.surf_bo[surf] == NULL) { brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key); @@ -337,10 +307,7 @@ brw_create_constant_surface( struct brw_context *brw, surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; assert(key->bo); - if (key->bo) - surf.ss1.base_addr = key->bo->offset; /* reloc */ - else - surf.ss1.base_addr = key->offset; + surf.ss1.base_addr = key->bo->offset; /* reloc */ surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ @@ -350,21 +317,16 @@ brw_create_constant_surface( struct brw_context *brw, bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), - &key->bo, key->bo ? 1 : 0, - &surf, sizeof(surf), - NULL, NULL); - - if (key->bo) { - /* Emit relocation to surface contents. Section 5.1.1 of the gen4 - * bspec ("Data Cache") says that the data cache does not exist as - * a separate cache and is just the sampler cache. - */ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_SAMPLER, 0, - 0, - offsetof(struct brw_surface_state, ss1), - key->bo); - } + &key->bo, 1, + &surf, sizeof(surf)); + + /* Emit relocation to surface contents. Section 5.1.1 of the gen4 + * bspec ("Data Cache") says that the data cache does not exist as + * a separate cache and is just the sampler cache. + */ + drm_intel_bo_emit_reloc(bo, offsetof(struct brw_surface_state, ss1), + key->bo, 0, + I915_GEM_DOMAIN_SAMPLER, 0); return bo; } @@ -422,7 +384,7 @@ brw_update_wm_constant_surface( GLcontext *ctx, /* If there's no constant buffer, then no surface BO is needed to point at * it. */ - if (fp->const_buffer == 0) { + if (fp->const_buffer == NULL) { drm_intel_bo_unreference(brw->wm.surf_bo[surf]); brw->wm.surf_bo[surf] = NULL; return; @@ -450,7 +412,7 @@ brw_update_wm_constant_surface( GLcontext *ctx, brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, BRW_SS_SURFACE, &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, + &key.bo, 1, NULL); if (brw->wm.surf_bo[surf] == NULL) { brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key); @@ -511,7 +473,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, struct gl_renderbuffer *rb, unsigned int unit) { - GLcontext *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; dri_bo *region_bo = NULL; struct intel_renderbuffer *irb = intel_renderbuffer(rb); struct intel_region *region = irb ? irb->region : NULL; @@ -522,7 +485,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, GLubyte color_mask[4]; GLboolean color_blend; uint32_t tiling; - uint32_t draw_offset; + uint32_t draw_x; + uint32_t draw_y; } key; memset(&key, 0, sizeof(key)); @@ -564,7 +528,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, } key.pitch = region->pitch; key.cpp = region->cpp; - key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */ + key.draw_x = region->draw_x; + key.draw_y = region->draw_y; } else { key.surface_type = BRW_SURFACE_NULL; key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; @@ -572,20 +537,24 @@ brw_update_renderbuffer_surface(struct brw_context *brw, key.width = 1; key.height = 1; key.cpp = 4; - key.draw_offset = 0; + key.draw_x = 0; + key.draw_y = 0; } - /* _NEW_COLOR */ - memcpy(key.color_mask, ctx->Color.ColorMask, - sizeof(key.color_mask)); - /* As mentioned above, disable writes to the alpha component when the - * renderbuffer is XRGB. - */ - if (ctx->DrawBuffer->Visual.alphaBits == 0) - key.color_mask[3] = GL_FALSE; + if (intel->gen < 6) { + /* _NEW_COLOR */ + memcpy(key.color_mask, ctx->Color.ColorMask[unit], + sizeof(key.color_mask)); - key.color_blend = (!ctx->Color._LogicOpEnabled && - ctx->Color.BlendEnabled); + /* As mentioned above, disable writes to the alpha component when the + * renderbuffer is XRGB. + */ + if (ctx->DrawBuffer->Visual.alphaBits == 0) + key.color_mask[3] = GL_FALSE; + + key.color_blend = (!ctx->Color._LogicOpEnabled && + (ctx->Color.BlendEnabled & (1 << unit))); + } dri_bo_unreference(brw->wm.surf_bo[unit]); brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache, @@ -602,25 +571,32 @@ brw_update_renderbuffer_surface(struct brw_context *brw, surf.ss0.surface_format = key.surface_format; surf.ss0.surface_type = key.surface_type; if (key.tiling == I915_TILING_NONE) { - surf.ss1.base_addr = key.draw_offset; + surf.ss1.base_addr = (key.draw_x + key.draw_y * key.pitch) * key.cpp; } else { - uint32_t tile_offset = key.draw_offset % 4096; - - surf.ss1.base_addr = key.draw_offset - tile_offset; - - assert(BRW_IS_G4X(brw) || tile_offset == 0); - if (BRW_IS_G4X(brw)) { - if (key.tiling == I915_TILING_X) { - /* Note that the low bits of these fields are missing, so - * there's the possibility of getting in trouble. - */ - surf.ss5.x_offset = (tile_offset % 512) / key.cpp / 4; - surf.ss5.y_offset = tile_offset / 512 / 2; - } else { - surf.ss5.x_offset = (tile_offset % 128) / key.cpp / 4; - surf.ss5.y_offset = tile_offset / 128 / 2; - } + uint32_t tile_base, tile_x, tile_y; + uint32_t pitch = key.pitch * key.cpp; + + if (key.tiling == I915_TILING_X) { + tile_x = key.draw_x % (512 / key.cpp); + tile_y = key.draw_y % 8; + tile_base = ((key.draw_y / 8) * (8 * pitch)); + tile_base += (key.draw_x - tile_x) / (512 / key.cpp) * 4096; + } else { + /* Y */ + tile_x = key.draw_x % (128 / key.cpp); + tile_y = key.draw_y % 32; + tile_base = ((key.draw_y / 32) * (32 * pitch)); + tile_base += (key.draw_x - tile_x) / (128 / key.cpp) * 4096; } + assert(intel->is_g4x || (tile_x == 0 && tile_y == 0)); + assert(tile_x % 4 == 0); + assert(tile_y % 2 == 0); + /* Note that the low bits of these fields are missing, so + * there's the possibility of getting in trouble. + */ + surf.ss1.base_addr = tile_base; + surf.ss5.x_offset = tile_x / 4; + surf.ss5.y_offset = tile_y / 2; } if (region_bo != NULL) surf.ss1.base_addr += region_bo->offset; /* reloc */ @@ -630,20 +606,21 @@ brw_update_renderbuffer_surface(struct brw_context *brw, brw_set_surface_tiling(&surf, key.tiling); surf.ss3.pitch = (key.pitch * key.cpp) - 1; - /* _NEW_COLOR */ - surf.ss0.color_blend = key.color_blend; - surf.ss0.writedisable_red = !key.color_mask[0]; - surf.ss0.writedisable_green = !key.color_mask[1]; - surf.ss0.writedisable_blue = !key.color_mask[2]; - surf.ss0.writedisable_alpha = !key.color_mask[3]; + if (intel->gen < 6) { + /* _NEW_COLOR */ + surf.ss0.color_blend = key.color_blend; + surf.ss0.writedisable_red = !key.color_mask[0]; + surf.ss0.writedisable_green = !key.color_mask[1]; + surf.ss0.writedisable_blue = !key.color_mask[2]; + surf.ss0.writedisable_alpha = !key.color_mask[3]; + } /* Key size will never match key size for textures, so we're safe. */ brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, &key, sizeof(key), ®ion_bo, 1, - &surf, sizeof(surf), - NULL, NULL); + &surf, sizeof(surf)); if (region_bo != NULL) { /* We might sample from it, and we might render to it, so flag * them both. We might be able to figure out from other state @@ -690,8 +667,7 @@ brw_wm_get_binding_table(struct brw_context *brw) bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, - data, data_size, - NULL, NULL); + data, data_size); /* Emit binding table relocations to surface state */ for (i = 0; i < BRW_WM_MAX_SURF; i++) { diff --git a/i965/gen6_cc.c b/i965/gen6_cc.c new file mode 100644 index 0000000..f7acad6 --- /dev/null +++ b/i965/gen6_cc.c @@ -0,0 +1,296 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" +#include "intel_batchbuffer.h" +#include "main/macros.h" + +struct gen6_blend_state_key { + GLboolean color_blend, alpha_enabled; + GLboolean dither; + + GLenum logic_op; + + GLenum blend_eq_rgb, blend_eq_a; + GLenum blend_src_rgb, blend_src_a; + GLenum blend_dst_rgb, blend_dst_a; + + GLenum alpha_func; +}; + +static void +blend_state_populate_key(struct brw_context *brw, + struct gen6_blend_state_key *key) +{ + GLcontext *ctx = &brw->intel.ctx; + + memset(key, 0, sizeof(*key)); + + /* _NEW_COLOR */ + if (ctx->Color._LogicOpEnabled) + key->logic_op = ctx->Color.LogicOp; + else + key->logic_op = GL_COPY; + + /* _NEW_COLOR */ + key->color_blend = ctx->Color.BlendEnabled; + if (key->color_blend) { + key->blend_eq_rgb = ctx->Color.BlendEquationRGB; + key->blend_eq_a = ctx->Color.BlendEquationA; + key->blend_src_rgb = ctx->Color.BlendSrcRGB; + key->blend_dst_rgb = ctx->Color.BlendDstRGB; + key->blend_src_a = ctx->Color.BlendSrcA; + key->blend_dst_a = ctx->Color.BlendDstA; + } + + /* _NEW_COLOR */ + key->alpha_enabled = ctx->Color.AlphaEnabled; + if (key->alpha_enabled) { + key->alpha_func = ctx->Color.AlphaFunc; + } + + /* _NEW_COLOR */ + key->dither = ctx->Color.DitherFlag; +} + +/** + * Creates the state cache entry for the given CC unit key. + */ +static drm_intel_bo * +blend_state_create_from_key(struct brw_context *brw, + struct gen6_blend_state_key *key) +{ + struct gen6_blend_state blend; + drm_intel_bo *bo; + + memset(&blend, 0, sizeof(blend)); + + if (key->logic_op != GL_COPY) { + blend.blend1.logic_op_enable = 1; + blend.blend1.logic_op_func = intel_translate_logic_op(key->logic_op); + } else if (key->color_blend) { + GLenum eqRGB = key->blend_eq_rgb; + GLenum eqA = key->blend_eq_a; + GLenum srcRGB = key->blend_src_rgb; + GLenum dstRGB = key->blend_dst_rgb; + GLenum srcA = key->blend_src_a; + GLenum dstA = key->blend_dst_a; + + if (eqRGB == GL_MIN || eqRGB == GL_MAX) { + srcRGB = dstRGB = GL_ONE; + } + + if (eqA == GL_MIN || eqA == GL_MAX) { + srcA = dstA = GL_ONE; + } + + blend.blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB); + blend.blend0.source_blend_factor = brw_translate_blend_factor(srcRGB); + blend.blend0.blend_func = brw_translate_blend_equation(eqRGB); + + blend.blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA); + blend.blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA); + blend.blend0.ia_blend_func = brw_translate_blend_equation(eqA); + + blend.blend0.blend_enable = 1; + blend.blend0.ia_blend_enable = (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB); + } + + if (key->alpha_enabled) { + blend.blend1.alpha_test_enable = 1; + blend.blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func); + + } + + if (key->dither) { + blend.blend1.dither_enable = 1; + blend.blend1.y_dither_offset = 0; + blend.blend1.x_dither_offset = 0; + } + + bo = brw_upload_cache(&brw->cache, BRW_BLEND_STATE, + key, sizeof(*key), + NULL, 0, + &blend, sizeof(blend)); + + return bo; +} + +static void +prepare_blend_state(struct brw_context *brw) +{ + struct gen6_blend_state_key key; + + blend_state_populate_key(brw, &key); + + drm_intel_bo_unreference(brw->cc.blend_state_bo); + brw->cc.blend_state_bo = brw_search_cache(&brw->cache, BRW_BLEND_STATE, + &key, sizeof(key), + NULL, 0, + NULL); + + if (brw->cc.blend_state_bo == NULL) + brw->cc.blend_state_bo = blend_state_create_from_key(brw, &key); +} + +const struct brw_tracked_state gen6_blend_state = { + .dirty = { + .mesa = _NEW_COLOR, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_blend_state, +}; + +struct gen6_color_calc_state_key { + GLubyte blend_constant_color[4]; + GLclampf alpha_ref; + GLubyte stencil_ref[2]; +}; + +static void +color_calc_state_populate_key(struct brw_context *brw, + struct gen6_color_calc_state_key *key) +{ + GLcontext *ctx = &brw->intel.ctx; + + memset(key, 0, sizeof(*key)); + + /* _NEW_STENCIL */ + if (ctx->Stencil._Enabled) { + const unsigned back = ctx->Stencil._BackFace; + + key->stencil_ref[0] = ctx->Stencil.Ref[0]; + if (ctx->Stencil._TestTwoSide) + key->stencil_ref[1] = ctx->Stencil.Ref[back]; + } + + /* _NEW_COLOR */ + if (ctx->Color.AlphaEnabled) + key->alpha_ref = ctx->Color.AlphaRef; + + key->blend_constant_color[0] = ctx->Color.BlendColor[0]; + key->blend_constant_color[1] = ctx->Color.BlendColor[1]; + key->blend_constant_color[2] = ctx->Color.BlendColor[2]; + key->blend_constant_color[3] = ctx->Color.BlendColor[3]; +} + +/** + * Creates the state cache entry for the given CC state key. + */ +static drm_intel_bo * +color_calc_state_create_from_key(struct brw_context *brw, + struct gen6_color_calc_state_key *key) +{ + struct gen6_color_calc_state cc; + drm_intel_bo *bo; + + memset(&cc, 0, sizeof(cc)); + + cc.cc0.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; + UNCLAMPED_FLOAT_TO_UBYTE(cc.cc1.alpha_ref_fi.ui, key->alpha_ref); + + cc.cc0.stencil_ref = key->stencil_ref[0]; + cc.cc0.bf_stencil_ref = key->stencil_ref[1]; + + cc.constant_r = key->blend_constant_color[0]; + cc.constant_g = key->blend_constant_color[1]; + cc.constant_b = key->blend_constant_color[2]; + cc.constant_a = key->blend_constant_color[3]; + + bo = brw_upload_cache(&brw->cache, BRW_COLOR_CALC_STATE, + key, sizeof(*key), + NULL, 0, + &cc, sizeof(cc)); + + return bo; +} + +static void +prepare_color_calc_state(struct brw_context *brw) +{ + struct gen6_color_calc_state_key key; + + color_calc_state_populate_key(brw, &key); + + drm_intel_bo_unreference(brw->cc.state_bo); + brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_COLOR_CALC_STATE, + &key, sizeof(key), + NULL, 0, + NULL); + + if (brw->cc.state_bo == NULL) + brw->cc.state_bo = color_calc_state_create_from_key(brw, &key); +} + +const struct brw_tracked_state gen6_color_calc_state = { + .dirty = { + .mesa = _NEW_COLOR, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_color_calc_state, +}; + +static void upload_cc_state_pointers(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(4); + OUT_BATCH(CMD_3D_CC_STATE_POINTERS << 16 | (4 - 2)); + OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(brw->cc.blend_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(brw->cc.depth_stencil_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + + +static void prepare_cc_state_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->cc.state_bo); + brw_add_validated_bo(brw, brw->cc.blend_state_bo); + brw_add_validated_bo(brw, brw->cc.depth_stencil_state_bo); +} + +const struct brw_tracked_state gen6_cc_state_pointers = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = (CACHE_NEW_BLEND_STATE | + CACHE_NEW_COLOR_CALC_STATE | + CACHE_NEW_DEPTH_STENCIL_STATE) + }, + .prepare = prepare_cc_state_pointers, + .emit = upload_cc_state_pointers, +}; diff --git a/i965/gen6_clip_state.c b/i965/gen6_clip_state.c new file mode 100644 index 0000000..06f8145 --- /dev/null +++ b/i965/gen6_clip_state.c @@ -0,0 +1,75 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" + +static void +upload_clip_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + uint32_t depth_clamp = 0; + uint32_t provoking; + + if (!ctx->Transform.DepthClamp) + depth_clamp = GEN6_CLIP_Z_TEST; + + if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) { + provoking = 0; + } else { + provoking = + (2 << GEN6_CLIP_TRI_PROVOKE_SHIFT) | + (2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) | + (1 << GEN6_CLIP_LINE_PROVOKE_SHIFT); + } + + BEGIN_BATCH(4); + OUT_BATCH(CMD_3D_CLIP_STATE << 16 | (4 - 2)); + OUT_BATCH(GEN6_CLIP_STATISTICS_ENABLE); + OUT_BATCH(GEN6_CLIP_ENABLE | + GEN6_CLIP_API_OGL | + GEN6_CLIP_MODE_REJECT_ALL | /* XXX: debug: get VS working */ + GEN6_CLIP_XY_TEST | + depth_clamp | + provoking); + OUT_BATCH(0); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_clip_state = { + .dirty = { + .mesa = _NEW_TRANSFORM, + .brw = BRW_NEW_CONTEXT, + .cache = 0 + }, + .emit = upload_clip_state, +}; diff --git a/i965/gen6_depthstencil.c b/i965/gen6_depthstencil.c new file mode 100644 index 0000000..4924f0f --- /dev/null +++ b/i965/gen6_depthstencil.c @@ -0,0 +1,165 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" + +struct brw_depth_stencil_state_key { + GLenum depth_func; + GLboolean depth_test, depth_write; + GLboolean stencil, stencil_two_side; + GLenum stencil_func[2], stencil_fail_op[2]; + GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2]; + GLubyte stencil_write_mask[2], stencil_test_mask[2]; +}; + +static void +depth_stencil_state_populate_key(struct brw_context *brw, + struct brw_depth_stencil_state_key *key) +{ + GLcontext *ctx = &brw->intel.ctx; + const unsigned back = ctx->Stencil._BackFace; + + memset(key, 0, sizeof(*key)); + + /* _NEW_STENCIL */ + key->stencil = ctx->Stencil._Enabled; + key->stencil_two_side = ctx->Stencil._TestTwoSide; + + if (key->stencil) { + key->stencil_func[0] = ctx->Stencil.Function[0]; + key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0]; + key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0]; + key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0]; + key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0]; + key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0]; + } + if (key->stencil_two_side) { + key->stencil_func[1] = ctx->Stencil.Function[back]; + key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back]; + key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back]; + key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back]; + key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back]; + key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back]; + } + + key->depth_test = ctx->Depth.Test; + if (key->depth_test) { + key->depth_func = ctx->Depth.Func; + key->depth_write = ctx->Depth.Mask; + } +} + +/** + * Creates the state cache entry for the given DEPTH_STENCIL_STATE state key. + */ +static dri_bo * +depth_stencil_state_create_from_key(struct brw_context *brw, + struct brw_depth_stencil_state_key *key) +{ + struct gen6_depth_stencil_state ds; + dri_bo *bo; + + memset(&ds, 0, sizeof(ds)); + + /* _NEW_STENCIL */ + if (key->stencil) { + ds.ds0.stencil_enable = 1; + ds.ds0.stencil_func = + intel_translate_compare_func(key->stencil_func[0]); + ds.ds0.stencil_fail_op = + intel_translate_stencil_op(key->stencil_fail_op[0]); + ds.ds0.stencil_pass_depth_fail_op = + intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]); + ds.ds0.stencil_pass_depth_pass_op = + intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]); + ds.ds1.stencil_write_mask = key->stencil_write_mask[0]; + ds.ds1.stencil_test_mask = key->stencil_test_mask[0]; + + if (key->stencil_two_side) { + ds.ds0.bf_stencil_enable = 1; + ds.ds0.bf_stencil_func = + intel_translate_compare_func(key->stencil_func[1]); + ds.ds0.bf_stencil_fail_op = + intel_translate_stencil_op(key->stencil_fail_op[1]); + ds.ds0.bf_stencil_pass_depth_fail_op = + intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]); + ds.ds0.bf_stencil_pass_depth_pass_op = + intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]); + ds.ds1.bf_stencil_write_mask = key->stencil_write_mask[1]; + ds.ds1.bf_stencil_test_mask = key->stencil_test_mask[1]; + } + + /* Not really sure about this: + */ + if (key->stencil_write_mask[0] || + (key->stencil_two_side && key->stencil_write_mask[1])) + ds.ds0.stencil_write_enable = 1; + } + + /* _NEW_DEPTH */ + if (key->depth_test) { + ds.ds2.depth_test_enable = 1; + ds.ds2.depth_test_func = intel_translate_compare_func(key->depth_func); + ds.ds2.depth_write_enable = key->depth_write; + } + + bo = brw_upload_cache(&brw->cache, BRW_DEPTH_STENCIL_STATE, + key, sizeof(*key), + NULL, 0, + &ds, sizeof(ds)); + + return bo; +} + +static void +prepare_depth_stencil_state(struct brw_context *brw) +{ + struct brw_depth_stencil_state_key key; + + depth_stencil_state_populate_key(brw, &key); + + dri_bo_unreference(brw->cc.depth_stencil_state_bo); + brw->cc.depth_stencil_state_bo = brw_search_cache(&brw->cache, + BRW_DEPTH_STENCIL_STATE, + &key, sizeof(key), + NULL, 0, + NULL); + + if (brw->cc.depth_stencil_state_bo == NULL) + brw->cc.depth_stencil_state_bo = + depth_stencil_state_create_from_key(brw, &key); +} + +const struct brw_tracked_state gen6_depth_stencil_state = { + .dirty = { + .mesa = _NEW_DEPTH | _NEW_STENCIL, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_depth_stencil_state, +}; diff --git a/i965/gen6_gs_state.c b/i965/gen6_gs_state.c new file mode 100644 index 0000000..161e7b8 --- /dev/null +++ b/i965/gen6_gs_state.c @@ -0,0 +1,91 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" + +static void +upload_gs_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + /* Disable all the constant buffers. */ + BEGIN_BATCH(5); + OUT_BATCH(CMD_3D_CONSTANT_GS_STATE << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); + + if (brw->gs.prog_bo) { + BEGIN_BATCH(7); + OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2)); + OUT_RELOC(brw->gs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | + (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + OUT_BATCH(0); /* scratch space base offset */ + OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | + (brw->gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT) | + (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) | + GEN6_GS_STATISTICS_ENABLE | + GEN6_GS_RENDERING_ENABLE); + OUT_BATCH(GEN6_GS_ENABLE); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(7); + OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2)); + OUT_BATCH(0); /* prog_bo */ + OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | + (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + OUT_BATCH(0); /* scratch space base offset */ + OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | + (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) | + (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) | + GEN6_GS_STATISTICS_ENABLE | + GEN6_GS_RENDERING_ENABLE); + OUT_BATCH(0); + ADVANCE_BATCH(); + } +} + +const struct brw_tracked_state gen6_gs_state = { + .dirty = { + .mesa = _NEW_TRANSFORM, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE | + BRW_NEW_CONTEXT), + .cache = CACHE_NEW_GS_PROG + }, + .emit = upload_gs_state, +}; diff --git a/i965/gen6_sampler_state.c b/i965/gen6_sampler_state.c new file mode 100644 index 0000000..ab8e751 --- /dev/null +++ b/i965/gen6_sampler_state.c @@ -0,0 +1,71 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" + +static void +upload_sampler_state_pointers(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(4); + OUT_BATCH(CMD_3D_SAMPLER_STATE_POINTERS << 16 | + VS_SAMPLER_STATE_CHANGE | + GS_SAMPLER_STATE_CHANGE | + PS_SAMPLER_STATE_CHANGE | + (4 - 2)); + OUT_BATCH(0); /* VS */ + OUT_BATCH(0); /* GS */ + if (brw->wm.sampler_bo) + OUT_RELOC(brw->wm.sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + else + OUT_BATCH(0); + + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + + +static void +prepare_sampler_state_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->wm.sampler_bo); +} + +const struct brw_tracked_state gen6_sampler_state = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = CACHE_NEW_SAMPLER + }, + .prepare = prepare_sampler_state_pointers, + .emit = upload_sampler_state_pointers, +}; diff --git a/i965/gen6_scissor_state.c b/i965/gen6_scissor_state.c new file mode 100644 index 0000000..2e21e5f --- /dev/null +++ b/i965/gen6_scissor_state.c @@ -0,0 +1,105 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" + +static void +prepare_scissor_state(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); + struct gen6_scissor_state scissor; + + /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */ + + /* The scissor only needs to handle the intersection of drawable and + * scissor rect. Clipping to the boundaries of static shared buffers + * for front/back/depth is covered by looping over cliprects in brw_draw.c. + * + * Note that the hardware's coordinates are inclusive, while Mesa's min is + * inclusive but max is exclusive. + */ + if (render_to_fbo) { + /* texmemory: Y=0=bottom */ + scissor.xmin = ctx->DrawBuffer->_Xmin; + scissor.xmax = ctx->DrawBuffer->_Xmax - 1; + scissor.ymin = ctx->DrawBuffer->_Ymin; + scissor.ymax = ctx->DrawBuffer->_Ymax - 1; + } + else { + /* memory: Y=0=top */ + scissor.xmin = ctx->DrawBuffer->_Xmin; + scissor.xmax = ctx->DrawBuffer->_Xmax - 1; + scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax; + scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1; + } + + drm_intel_bo_unreference(brw->sf.state_bo); + brw->sf.state_bo = brw_cache_data(&brw->cache, BRW_SF_UNIT, + &scissor, sizeof(scissor), + NULL, 0); +} + +const struct brw_tracked_state gen6_scissor_state = { + .dirty = { + .mesa = _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_scissor_state, +}; + +static void upload_scissor_state_pointers(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(2); + OUT_BATCH(CMD_3D_SCISSOR_STATE_POINTERS << 16 | (2 - 2)); + OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + + +static void prepare_scissor_state_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->sf.state_bo); +} + +const struct brw_tracked_state gen6_scissor_state_pointers = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = CACHE_NEW_SF_UNIT + }, + .prepare = prepare_scissor_state_pointers, + .emit = upload_scissor_state_pointers, +}; diff --git a/i965/gen6_sf_state.c b/i965/gen6_sf_state.c new file mode 100644 index 0000000..8d96b44 --- /dev/null +++ b/i965/gen6_sf_state.c @@ -0,0 +1,187 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" +#include "main/macros.h" +#include "intel_batchbuffer.h" + +static uint32_t +get_attr_override(struct brw_context *brw, int attr) +{ + uint32_t attr_override; + int attr_index = 0, i; + + /* Find the source index (0 = first attribute after the 4D position) + * for this output attribute. attr is currently a VERT_RESULT_* but should + * be FRAG_ATTRIB_*. + */ + for (i = 0; i < attr; i++) { + if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(i)) + attr_index++; + } + attr_override = attr_index; + + return attr_index; +} + +static void +upload_sf_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + /* CACHE_NEW_VS_PROG */ + uint32_t num_inputs = brw_count_bits(brw->vs.prog_data->outputs_written); + /* This should probably be FS inputs read */ + uint32_t num_outputs = brw_count_bits(brw->vs.prog_data->outputs_written); + uint32_t dw1, dw2, dw3, dw4; + int i; + /* _NEW_BUFFER */ + GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; + int attr = 0; + + dw1 = + num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT | + (num_inputs + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | + 3 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; + dw2 = GEN6_SF_VIEWPORT_TRANSFORM_ENABLE | + GEN6_SF_STATISTICS_ENABLE; + dw3 = 0; + dw4 = 0; + + /* _NEW_POLYGON */ + if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo) + dw2 |= GEN6_SF_WINDING_CCW; + + /* _NEW_SCISSOR */ + if (ctx->Scissor.Enabled) + dw3 |= GEN6_SF_SCISSOR_ENABLE; + + /* _NEW_POLYGON */ + if (ctx->Polygon.CullFlag) { + switch (ctx->Polygon.CullFaceMode) { + case GL_FRONT: + dw3 |= GEN6_SF_CULL_BOTH; + break; + case GL_BACK: + dw3 |= GEN6_SF_CULL_BACK; + break; + case GL_FRONT_AND_BACK: + dw3 |= GEN6_SF_CULL_BOTH; + break; + default: + assert(0); + break; + } + } else { + dw3 |= GEN6_SF_CULL_NONE; + } + + /* _NEW_LINE */ + dw3 |= U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7) << + GEN6_SF_LINE_WIDTH_SHIFT; + if (ctx->Line.SmoothFlag) { + dw3 |= GEN6_SF_LINE_AA_ENABLE; + dw3 |= GEN6_SF_LINE_AA_MODE_TRUE; + dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0; + } + + /* _NEW_POINT */ + if (ctx->Point._Attenuated) + dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH; + + dw4 |= U_FIXED(CLAMP(ctx->Point.Size, 0.125, 225.875), 3) << + GEN6_SF_POINT_WIDTH_SHIFT; + if (render_to_fbo) + dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT; + + /* _NEW_LIGHT */ + if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) { + dw4 |= + (2 << GEN6_SF_TRI_PROVOKE_SHIFT) | + (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) | + (1 << GEN6_SF_LINE_PROVOKE_SHIFT); + } else { + dw4 |= + (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT); + } + + BEGIN_BATCH(20); + OUT_BATCH(CMD_3D_SF_STATE << 16 | (20 - 2)); + OUT_BATCH(dw1); + OUT_BATCH(dw2); + OUT_BATCH(dw3); + OUT_BATCH(dw4); + OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant. copied from gen4 */ + OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */ + OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */ + for (i = 0; i < 8; i++) { + uint32_t attr_overrides = 0; + + /* These should be generating FS inputs read instead of VS + * outputs written + */ + for (; attr < 64; attr++) { + if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(attr)) { + attr_overrides |= get_attr_override(brw, attr); + attr++; + break; + } + } + + for (; attr < 64; attr++) { + if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(attr)) { + attr_overrides |= get_attr_override(brw, attr) << 16; + attr++; + break; + } + } + OUT_BATCH(attr_overrides); + } + OUT_BATCH(0); /* point sprite texcoord bitmask */ + OUT_BATCH(0); /* constant interp bitmask */ + OUT_BATCH(0); /* wrapshortest enables 0-7 */ + OUT_BATCH(0); /* wrapshortest enables 8-15 */ + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_sf_state = { + .dirty = { + .mesa = (_NEW_LIGHT | + _NEW_POLYGON | + _NEW_LINE | + _NEW_SCISSOR | + _NEW_BUFFERS), + .brw = BRW_NEW_CONTEXT, + .cache = CACHE_NEW_VS_PROG + }, + .emit = upload_sf_state, +}; diff --git a/i965/gen6_urb.c b/i965/gen6_urb.c new file mode 100644 index 0000000..5445e40 --- /dev/null +++ b/i965/gen6_urb.c @@ -0,0 +1,83 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "main/macros.h" +#include "intel_batchbuffer.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +static void +prepare_urb( struct brw_context *brw ) +{ + brw->urb.nr_vs_entries = 24; + if (brw->gs.prog_bo) + brw->urb.nr_gs_entries = 4; + else + brw->urb.nr_gs_entries = 0; + /* CACHE_NEW_VS_PROG */ + brw->urb.vs_size = MIN2(brw->vs.prog_data->urb_entry_size, 1); + + /* Check that the number of URB rows (8 floats each) allocated is less + * than the URB space. + */ + assert((brw->urb.nr_vs_entries + + brw->urb.nr_gs_entries) * brw->urb.vs_size * 8 < 64 * 1024); +} + +static void +upload_urb(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + assert(brw->urb.nr_vs_entries % 4 == 0); + assert(brw->urb.nr_gs_entries % 4 == 0); + /* GS requirement */ + assert(!brw->gs.prog_bo || brw->urb.vs_size < 5); + + intel_batchbuffer_emit_mi_flush(intel->batch); + + BEGIN_BATCH(3); + OUT_BATCH(CMD_URB << 16 | (3 - 2)); + OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) | + ((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT)); + OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) | + ((brw->urb.nr_gs_entries) << GEN6_URB_GS_ENTRIES_SHIFT)); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_urb = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CONTEXT, + .cache = CACHE_NEW_VS_PROG, + }, + .prepare = prepare_urb, + .emit = upload_urb, +}; diff --git a/i965/gen6_viewport_state.c b/i965/gen6_viewport_state.c new file mode 100644 index 0000000..0c2aa42 --- /dev/null +++ b/i965/gen6_viewport_state.c @@ -0,0 +1,173 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" +#include "main/macros.h" + +/* The clip VP defines the guardband region where expensive clipping is skipped + * and fragments are allowed to be generated and clipped out cheaply by the SF. + * + * By setting it to NDC bounds of [-1,1], we don't do GB clipping. It's + * supposed to cause seams to become visible in apps due to shared edges taking + * different clip/no clip paths depending on whether the rest of the prim ends + * up in the guardband or not. + */ +static void +prepare_clip_vp(struct brw_context *brw) +{ + struct brw_clipper_viewport vp; + + vp.xmin = -1.0; + vp.xmax = 1.0; + vp.ymin = -1.0; + vp.ymax = 1.0; + + drm_intel_bo_unreference(brw->clip.vp_bo); + brw->clip.vp_bo = brw_cache_data(&brw->cache, BRW_CLIP_VP, + &vp, sizeof(vp), + NULL, 0); +} + +const struct brw_tracked_state gen6_clip_vp = { + .dirty = { + .mesa = _NEW_VIEWPORT, /* XXX: not really, but we need nonzero */ + .brw = 0, + .cache = 0, + }, + .prepare = prepare_clip_vp, +}; + +static void +prepare_sf_vp(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; + struct brw_sf_viewport sfv; + GLfloat y_scale, y_bias; + const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); + const GLfloat *v = ctx->Viewport._WindowMap.m; + + memset(&sfv, 0, sizeof(sfv)); + + /* _NEW_BUFFERS */ + if (render_to_fbo) { + y_scale = 1.0; + y_bias = 0; + } else { + y_scale = -1.0; + y_bias = ctx->DrawBuffer->Height; + } + + /* _NEW_VIEWPORT */ + sfv.viewport.m00 = v[MAT_SX]; + sfv.viewport.m11 = v[MAT_SY] * y_scale; + sfv.viewport.m22 = v[MAT_SZ] * depth_scale; + sfv.viewport.m30 = v[MAT_TX]; + sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias; + sfv.viewport.m32 = v[MAT_TZ] * depth_scale; + + drm_intel_bo_unreference(brw->sf.vp_bo); + brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, + &sfv, sizeof(sfv), + NULL, 0); +} + +const struct brw_tracked_state gen6_sf_vp = { + .dirty = { + .mesa = _NEW_VIEWPORT | _NEW_BUFFERS, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_sf_vp, +}; + +static void +prepare_cc_vp(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + struct brw_cc_viewport ccv; + + /* _NEW_TRANSOFORM */ + if (ctx->Transform.DepthClamp) { + /* _NEW_VIEWPORT */ + ccv.min_depth = MIN2(ctx->Viewport.Near, ctx->Viewport.Far); + ccv.max_depth = MAX2(ctx->Viewport.Near, ctx->Viewport.Far); + } else { + ccv.min_depth = 0.0; + ccv.max_depth = 1.0; + } + + drm_intel_bo_unreference(brw->cc.vp_bo); + brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv), + NULL, 0); +} + +const struct brw_tracked_state gen6_cc_vp = { + .dirty = { + .mesa = _NEW_VIEWPORT | _NEW_TRANSFORM, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_cc_vp, +}; + +static void prepare_viewport_state_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->sf.state_bo); +} + +static void upload_viewport_state_pointers(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(4); + OUT_BATCH(CMD_VIEWPORT_STATE_POINTERS << 16 | (4 - 2) | + GEN6_CC_VIEWPORT_MODIFY | + GEN6_SF_VIEWPORT_MODIFY | + GEN6_CLIP_VIEWPORT_MODIFY); + OUT_RELOC(brw->clip.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->sf.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->cc.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_viewport_state = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = (CACHE_NEW_CLIP_VP | + CACHE_NEW_SF_VP | + CACHE_NEW_CC_VP) + }, + .prepare = prepare_viewport_state_pointers, + .emit = upload_viewport_state_pointers, +}; diff --git a/i965/gen6_vs_state.c b/i965/gen6_vs_state.c new file mode 100644 index 0000000..fe597df --- /dev/null +++ b/i965/gen6_vs_state.c @@ -0,0 +1,119 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" +#include "shader/prog_parameter.h" +#include "shader/prog_statevars.h" +#include "intel_batchbuffer.h" + +static void +upload_vs_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + const struct brw_vertex_program *vp = + brw_vertex_program_const(brw->vertex_program); + unsigned int nr_params = vp->program.Base.Parameters->NumParameters; + drm_intel_bo *constant_bo; + int i; + + if (vp->use_const_buffer || nr_params == 0) { + /* Disable the push constant buffers. */ + BEGIN_BATCH(5); + OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } else { + if (brw->vertex_program->IsNVProgram) + _mesa_load_tracked_matrices(ctx); + + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ + _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); + + constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo", + nr_params * 4 * sizeof(float), + 4096); + drm_intel_gem_bo_map_gtt(constant_bo); + for (i = 0; i < nr_params; i++) { + memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float), + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } + drm_intel_gem_bo_unmap_gtt(constant_bo); + + BEGIN_BATCH(5); + OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | + GEN6_CONSTANT_BUFFER_0_ENABLE | + (5 - 2)); + OUT_RELOC(constant_bo, + I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ + ALIGN(nr_params, 2) / 2 - 1); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + drm_intel_bo_unreference(constant_bo); + } + + intel_batchbuffer_emit_mi_flush(intel->batch); + + BEGIN_BATCH(6); + OUT_BATCH(CMD_3D_VS_STATE << 16 | (6 - 2)); + OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | + (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + OUT_BATCH(0); /* scratch space base offset */ + OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | + (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | + (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH((0 << GEN6_VS_MAX_THREADS_SHIFT) | + GEN6_VS_STATISTICS_ENABLE); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_vs_state = { + .dirty = { + .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_NR_VS_SURFACES | + BRW_NEW_URB_FENCE | + BRW_NEW_CONTEXT), + .cache = CACHE_NEW_VS_PROG + }, + .emit = upload_vs_state, +}; diff --git a/i965/gen6_wm_state.c b/i965/gen6_wm_state.c new file mode 100644 index 0000000..1eb17ca --- /dev/null +++ b/i965/gen6_wm_state.c @@ -0,0 +1,160 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" +#include "shader/prog_parameter.h" +#include "shader/prog_statevars.h" +#include "intel_batchbuffer.h" + +static void +upload_wm_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + const struct brw_fragment_program *fp = + brw_fragment_program_const(brw->fragment_program); + unsigned int nr_params = fp->program.Base.Parameters->NumParameters; + drm_intel_bo *constant_bo; + int i; + uint32_t dw2, dw4, dw5, dw6; + + if (fp->use_const_buffer || nr_params == 0) { + /* Disable the push constant buffers. */ + BEGIN_BATCH(5); + OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } else { + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ + _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); + + constant_bo = drm_intel_bo_alloc(intel->bufmgr, "WM constant_bo", + nr_params * 4 * sizeof(float), + 4096); + drm_intel_gem_bo_map_gtt(constant_bo); + for (i = 0; i < nr_params; i++) { + memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float), + fp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } + drm_intel_gem_bo_unmap_gtt(constant_bo); + + BEGIN_BATCH(5); + OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | + GEN6_CONSTANT_BUFFER_0_ENABLE | + (5 - 2)); + OUT_RELOC(constant_bo, + I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ + ALIGN(nr_params, 2) / 2 - 1); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + drm_intel_bo_unreference(constant_bo); + } + + intel_batchbuffer_emit_mi_flush(intel->batch); + + dw2 = dw4 = dw5 = dw6 = 0; + dw4 |= GEN6_WM_STATISTICS_ENABLE; + dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0; + dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5; + + /* BRW_NEW_NR_SURFACES */ + dw2 |= brw->wm.nr_surfaces << GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT; + + /* CACHE_NEW_SAMPLER */ + dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT; + dw4 |= (1 << GEN6_WM_DISPATCH_START_GRF_SHIFT_0); + + dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT; + dw5 |= GEN6_WM_DISPATCH_ENABLE; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + if (fp->isGLSL) + dw5 |= GEN6_WM_8_DISPATCH_ENABLE; + else + dw5 |= GEN6_WM_16_DISPATCH_ENABLE; + + /* _NEW_LINE */ + if (ctx->Line.StippleFlag) + dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE; + + /* _NEW_POLYGONSTIPPLE */ + if (ctx->Polygon.StippleFlag) + dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + if (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) + dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W; + if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) + dw5 |= GEN6_WM_COMPUTED_DEPTH; + + /* _NEW_COLOR */ + if (fp->program.UsesKill || ctx->Color.AlphaEnabled) + dw5 |= GEN6_WM_KILL_ENABLE; + + /* This should probably be FS inputs read */ + dw6 |= brw_count_bits(brw->vs.prog_data->outputs_written) << + GEN6_WM_NUM_SF_OUTPUTS_SHIFT; + + BEGIN_BATCH(9); + OUT_BATCH(CMD_3D_WM_STATE << 16 | (9 - 2)); + OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(dw2); + OUT_BATCH(0); /* scratch space base offset */ + OUT_BATCH(dw4); + OUT_BATCH(dw5); + OUT_BATCH(dw6); + OUT_BATCH(0); /* kernel 1 pointer */ + OUT_BATCH(0); /* kernel 2 pointer */ + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_wm_state = { + .dirty = { + .mesa = _NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_NR_WM_SURFACES | + BRW_NEW_URB_FENCE | + BRW_NEW_BATCH), + .cache = CACHE_NEW_SAMPLER + }, + .emit = upload_wm_state, +}; |