diff options
author | Luc Verhaegen <libv@skynet.be> | 2010-03-14 05:54:10 +0100 |
---|---|---|
committer | Luc Verhaegen <libv@skynet.be> | 2010-03-14 05:54:10 +0100 |
commit | 4cfe98db595fd3876e252bd23ed48bf82836f795 (patch) | |
tree | 4a05e56b9c3ebba1f42460ffe2e6cf6e7e08eafe | |
parent | da7796a7d40ca2cd4a03e99ddf38e3a7256a401f (diff) |
Import i915 and i965 dri drivers from mesa 7.6.0.7.6.0
106 files changed, 5429 insertions, 2809 deletions
diff --git a/configure.ac b/configure.ac index b73fe88..af47813 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ # Process this file with autoconf to produce a configure script AC_PREREQ(2.57) -AC_INIT([mesa-dri-i9xx], 7.5.0, [], mesa-dri-i9xx) +AC_INIT([mesa-dri-i9xx], 7.6.0, [], mesa-dri-i9xx) AM_INIT_AUTOMAKE([dist-bzip2]) @@ -16,9 +16,8 @@ AC_PROG_CC AC_HEADER_STDC PKG_CHECK_MODULES([DRM], [libdrm >= 2.4.3]) -# changes in struct gl_fragment_program. -PKG_CHECK_MODULES([DRI], [libmesadri >= 7.5.0 libmesadri < 7.6.0 - libmesadricommon >= 7.5.0 libmesadricommon < 7.6.0]) +PKG_CHECK_MODULES([DRI], [libmesadri >= 7.6.0 libmesadri < 7.7.0 + libmesadricommon >= 7.6.0 libmesadricommon < 7.7.0]) AC_OUTPUT([ Makefile diff --git a/i915/Makefile.am b/i915/Makefile.am index 2dc608c..451e9fd 100644 --- a/i915/Makefile.am +++ b/i915/Makefile.am @@ -20,6 +20,7 @@ i915_dri_la_SOURCES = \ ../shared/intel_batchbuffer.c \ ../shared/intel_clear.c \ ../shared/intel_extensions.c \ + ../shared/intel_generatemipmap.c \ ../shared/intel_mipmap_tree.c \ ../shared/intel_tex_layout.c \ ../shared/intel_tex_image.c \ @@ -32,7 +33,7 @@ i915_dri_la_SOURCES = \ ../shared/intel_pixel_bitmap.c \ ../shared/intel_pixel_copy.c \ ../shared/intel_pixel_draw.c \ - intel_pixel_read.c \ + ../shared/intel_pixel_read.c \ ../shared/intel_buffers.c \ ../shared/intel_blit.c \ ../shared/intel_swapbuffers.c \ @@ -51,5 +52,6 @@ i915_dri_la_SOURCES = \ ../shared/intel_screen.c \ ../shared/intel_span.c \ ../shared/intel_state.c \ + ../shared/intel_syncobj.c \ intel_tris.c \ ../shared/intel_fbo.c diff --git a/i915/i830_context.h b/i915/i830_context.h index 1bdb320..f73cbbf 100644 --- a/i915/i830_context.h +++ b/i915/i830_context.h @@ -40,6 +40,7 @@ #define I830_UPLOAD_BUFFERS 0x2 #define I830_UPLOAD_STIPPLE 0x4 #define I830_UPLOAD_INVARIENT 0x8 +#define I830_UPLOAD_RASTER_RULES 0x10 #define I830_UPLOAD_TEX(i) (0x10<<(i)) #define I830_UPLOAD_TEXBLEND(i) (0x100<<(i)) #define I830_UPLOAD_TEX_ALL (0x0f0) @@ -99,6 +100,11 @@ #define I830_TEXBLEND_SIZE 12 /* (4 args + op) * 2 + COLOR_FACTOR */ +enum { + I830_RASTER_RULES, + I830_RASTER_RULES_SIZE +}; + struct i830_texture_object { struct intel_texture_object intel; @@ -112,6 +118,7 @@ struct i830_hw_state GLuint Ctx[I830_CTX_SETUP_SIZE]; GLuint Buffer[I830_DEST_SETUP_SIZE]; GLuint Stipple[I830_STP_SETUP_SIZE]; + GLuint RasterRules[I830_RASTER_RULES_SIZE]; GLuint Tex[I830_TEX_UNITS][I830_TEX_SETUP_SIZE]; GLuint TexBlend[I830_TEX_UNITS][I830_TEXBLEND_SIZE]; GLuint TexBlendWordsUsed[I830_TEX_UNITS]; @@ -197,6 +204,7 @@ extern void i830InitStateFuncs(struct dd_function_table *functions); extern void i830EmitState(struct i830_context *i830); extern void i830InitState(struct i830_context *i830); +extern void i830_update_provoking_vertex(GLcontext *ctx); /* i830_metaops.c */ diff --git a/i915/i830_reg.h b/i915/i830_reg.h index d210c2d..ae13170 100644 --- a/i915/i830_reg.h +++ b/i915/i830_reg.h @@ -48,19 +48,6 @@ #define AA_LINE_ENABLE ((1<<1) | 1) #define AA_LINE_DISABLE (1<<1) -#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) -/* Dword 1 */ -#define BUF_3D_ID_COLOR_BACK (0x3<<24) -#define BUF_3D_ID_DEPTH (0x7<<24) -#define BUF_3D_USE_FENCE (1<<23) -#define BUF_3D_TILED_SURFACE (1<<22) -#define BUF_3D_TILE_WALK_X 0 -#define BUF_3D_TILE_WALK_Y (1<<21) -#define BUF_3D_PITCH(x) (((x)/4)<<2) -/* Dword 2 */ -#define BUF_3D_ADDR(x) ((x) & ~0x3) - - #define _3DSTATE_COLOR_FACTOR_CMD (CMD_3D | (0x1d<<24) | (0x1<<16)) #define _3DSTATE_COLOR_FACTOR_N_CMD(stage) (CMD_3D | (0x1d<<24) | \ @@ -433,8 +420,11 @@ #define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8) #define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5) #define ENABLE_TRI_STRIP_PROVOKE_VRTX (1<<2) +#define LINE_STRIP_PROVOKE_VRTX_MASK (3<<6) #define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6) +#define TRI_FAN_PROVOKE_VRTX_MASK (3<<3) #define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3) +#define TRI_STRIP_PROVOKE_VRTX_MASK (3<<0) #define TRI_STRIP_PROVOKE_VRTX(x) (x) /* _3DSTATE_SCISSOR_ENABLE, p200 */ diff --git a/i915/i830_state.c b/i915/i830_state.c index 8ef6c91..645ebe3 100644 --- a/i915/i830_state.c +++ b/i915/i830_state.c @@ -1047,6 +1047,16 @@ i830_init_packets(struct i830_context *i830) TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) | TEXBIND_SET0(TEXCOORDSRC_VTXSET_0)); + i830->state.RasterRules[I830_RASTER_RULES] = (_3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + ENABLE_TRI_STRIP_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | + TRI_STRIP_PROVOKE_VRTX(2)); + i830->state.Stipple[I830_STPREG_ST0] = _3DSTATE_STIPPLE; @@ -1058,6 +1068,27 @@ i830_init_packets(struct i830_context *i830) i830->state.Buffer[I830_DESTREG_SR2] = 0; } +void +i830_update_provoking_vertex(GLcontext * ctx) +{ + struct i830_context *i830 = i830_context(ctx); + + I830_STATECHANGE(i830, I830_UPLOAD_RASTER_RULES); + i830->state.RasterRules[I830_RASTER_RULES] &= ~(LINE_STRIP_PROVOKE_VRTX_MASK | + TRI_FAN_PROVOKE_VRTX_MASK | + TRI_STRIP_PROVOKE_VRTX_MASK); + + /* _NEW_LIGHT */ + if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) { + i830->state.RasterRules[I830_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | + TRI_STRIP_PROVOKE_VRTX(2)); + } else { + i830->state.RasterRules[I830_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(0) | + TRI_FAN_PROVOKE_VRTX(1) | + TRI_STRIP_PROVOKE_VRTX(0)); + } +} void i830InitStateFuncs(struct dd_function_table *functions) @@ -1101,6 +1132,7 @@ i830InitState(struct i830_context *i830) i830->current = &i830->state; i830->state.emitted = 0; i830->state.active = (I830_UPLOAD_INVARIENT | + I830_UPLOAD_RASTER_RULES | I830_UPLOAD_TEXBLEND(0) | I830_UPLOAD_STIPPLE | I830_UPLOAD_CTX | I830_UPLOAD_BUFFERS); diff --git a/i915/i830_texstate.c b/i915/i830_texstate.c index 753c25b..6f998fa 100644 --- a/i915/i830_texstate.c +++ b/i915/i830_texstate.c @@ -174,14 +174,16 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) state[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 | (LOAD_TEXTURE_MAP0 << unit) | 4); -/* state[I830_TEXREG_TM0S0] = (TM0S0_USE_FENCE | */ -/* t->intel.TextureOffset); */ - - state[I830_TEXREG_TM0S1] = (((firstImage->Height - 1) << TM0S1_HEIGHT_SHIFT) | ((firstImage->Width - 1) << TM0S1_WIDTH_SHIFT) | format); + if (intelObj->mt->region->tiling != I915_TILING_NONE) { + state[I830_TEXREG_TM0S1] |= TM0S1_TILED_SURFACE; + if (intelObj->mt->region->tiling == I915_TILING_Y) + state[I830_TEXREG_TM0S1] |= TM0S1_TILE_WALK; + } + state[I830_TEXREG_TM0S2] = ((((pitch / 4) - 1) << TM0S2_PITCH_SHIFT) | TM0S2_CUBE_FACE_ENA_MASK); diff --git a/i915/i830_vtbl.c b/i915/i830_vtbl.c index 3bf02de..983f672 100644 --- a/i915/i830_vtbl.c +++ b/i915/i830_vtbl.c @@ -299,7 +299,7 @@ i830_emit_invarient_state(struct intel_context *intel) { BATCH_LOCALS; - BEGIN_BATCH(30, IGNORE_CLIPRECTS); + BEGIN_BATCH(29, IGNORE_CLIPRECTS); OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); OUT_BATCH(0); @@ -351,15 +351,6 @@ i830_emit_invarient_state(struct intel_context *intel) OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM); OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(3)); - OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | - ENABLE_POINT_RASTER_RULE | - OGL_POINT_RASTER_RULE | - ENABLE_LINE_STRIP_PROVOKE_VRTX | - ENABLE_TRI_FAN_PROVOKE_VRTX | - ENABLE_TRI_STRIP_PROVOKE_VRTX | - LINE_STRIP_PROVOKE_VRTX(1) | - TRI_FAN_PROVOKE_VRTX(2) | TRI_STRIP_PROVOKE_VRTX(2)); - OUT_BATCH(_3DSTATE_VERTEX_TRANSFORM); OUT_BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE); @@ -394,6 +385,9 @@ get_state_size(struct i830_hw_state *state) if (dirty & I830_UPLOAD_INVARIENT) sz += 40 * sizeof(int); + if (dirty & I830_UPLOAD_RASTER_RULES) + sz += sizeof(state->RasterRules); + if (dirty & I830_UPLOAD_CTX) sz += sizeof(state->Ctx); @@ -486,6 +480,11 @@ i830_emit_state(struct intel_context *intel) i830_emit_invarient_state(intel); } + if (dirty & I830_UPLOAD_RASTER_RULES) { + DBG("I830_UPLOAD_RASTER_RULES:\n"); + emit(intel, state->RasterRules, sizeof(state->RasterRules)); + } + if (dirty & I830_UPLOAD_CTX) { DBG("I830_UPLOAD_CTX:\n"); emit(intel, state->Ctx, sizeof(state->Ctx)); @@ -552,7 +551,7 @@ i830_emit_state(struct intel_context *intel) if (state->tex_buffer[i]) { OUT_RELOC(state->tex_buffer[i], I915_GEM_DOMAIN_SAMPLER, 0, - state->tex_offset[i] | TM0S0_USE_FENCE); + state->tex_offset[i]); } else if (state == &i830->meta) { assert(i == 0); @@ -634,21 +633,11 @@ i830_state_draw_region(struct intel_context *intel, /* * Set stride/cpp values */ - if (color_region) { - state->Buffer[I830_DESTREG_CBUFADDR0] = _3DSTATE_BUF_INFO_CMD; - state->Buffer[I830_DESTREG_CBUFADDR1] = - (BUF_3D_ID_COLOR_BACK | - BUF_3D_PITCH(color_region->pitch * color_region->cpp) | - BUF_3D_USE_FENCE); - } + i915_set_buf_info_for_region(&state->Buffer[I830_DESTREG_CBUFADDR0], + color_region, BUF_3D_ID_COLOR_BACK); - if (depth_region) { - state->Buffer[I830_DESTREG_DBUFADDR0] = _3DSTATE_BUF_INFO_CMD; - state->Buffer[I830_DESTREG_DBUFADDR1] = - (BUF_3D_ID_DEPTH | - BUF_3D_PITCH(depth_region->pitch * depth_region->cpp) | - BUF_3D_USE_FENCE); - } + i915_set_buf_info_for_region(&state->Buffer[I830_DESTREG_DBUFADDR0], + depth_region, BUF_3D_ID_DEPTH); /* * Compute/set I830_DESTREG_DV1 value @@ -718,26 +707,6 @@ i830_set_draw_region(struct intel_context *intel, i830_state_draw_region(intel, &i830->state, color_regions[0], depth_region); } -#if 0 -static void -i830_update_color_z_regions(intelContextPtr intel, - const intelRegion * colorRegion, - const intelRegion * depthRegion) -{ - i830ContextPtr i830 = I830_CONTEXT(intel); - - i830->state.Buffer[I830_DESTREG_CBUFADDR1] = - (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(colorRegion->pitch) | - BUF_3D_USE_FENCE); - i830->state.Buffer[I830_DESTREG_CBUFADDR2] = colorRegion->offset; - - i830->state.Buffer[I830_DESTREG_DBUFADDR1] = - (BUF_3D_ID_DEPTH | BUF_3D_PITCH(depthRegion->pitch) | BUF_3D_USE_FENCE); - i830->state.Buffer[I830_DESTREG_DBUFADDR2] = depthRegion->offset; -} -#endif - - /* This isn't really handled at the moment. */ static void @@ -768,9 +737,10 @@ i830_assert_not_dirty( struct intel_context *intel ) } static void -i830_note_unlock( struct intel_context *intel ) +i830_invalidate_state(struct intel_context *intel, GLuint new_state) { - /* nothing */ + if (new_state & _NEW_LIGHT) + i830_update_provoking_vertex(&intel->ctx); } void @@ -787,6 +757,6 @@ i830InitVtbl(struct i830_context *i830) i830->intel.vtbl.render_start = i830_render_start; i830->intel.vtbl.render_prevalidate = i830_render_prevalidate; i830->intel.vtbl.assert_not_dirty = i830_assert_not_dirty; - i830->intel.vtbl.note_unlock = i830_note_unlock; i830->intel.vtbl.finish_batch = intel_finish_vb; + i830->intel.vtbl.invalidate_state = i830_invalidate_state; } diff --git a/i915/i915_context.c b/i915/i915_context.c index 1f9f363..3ab7d68 100644 --- a/i915/i915_context.c +++ b/i915/i915_context.c @@ -27,6 +27,7 @@ #include "i915_context.h" #include "main/imports.h" +#include "main/macros.h" #include "intel_tex.h" #include "intel_tris.h" #include "tnl/t_context.h" @@ -73,8 +74,12 @@ i915InvalidateState(GLcontext * ctx, GLuint new_state) p->params_uptodate = 0; } - if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM)) + if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) i915_update_fog(ctx); + if (new_state & (_NEW_STENCIL | _NEW_BUFFERS | _NEW_POLYGON)) + i915_update_stencil(ctx); + if (new_state & (_NEW_LIGHT)) + i915_update_provoking_vertex(ctx); } @@ -162,6 +167,9 @@ i915CreateContext(const __GLcontextModes * mesaVis, ctx->Const.FragmentProgram.MaxNativeTexIndirections = I915_MAX_TEX_INDIRECT; ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* I don't think we have one */ + ctx->Const.FragmentProgram.MaxEnvParams = + MIN2(ctx->Const.FragmentProgram.MaxNativeParameters, + ctx->Const.FragmentProgram.MaxEnvParams); ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; diff --git a/i915/i915_context.h b/i915/i915_context.h index 87bbf5f..8de4a9d 100644 --- a/i915/i915_context.h +++ b/i915/i915_context.h @@ -48,6 +48,7 @@ #define I915_UPLOAD_FOG 0x20 #define I915_UPLOAD_INVARIENT 0x40 #define I915_UPLOAD_DEFAULTS 0x80 +#define I915_UPLOAD_RASTER_RULES 0x100 #define I915_UPLOAD_TEX(i) (0x00010000<<(i)) #define I915_UPLOAD_TEX_ALL (0x00ff0000) #define I915_UPLOAD_TEX_0_SHIFT 16 @@ -82,7 +83,9 @@ #define I915_CTXREG_IAB 6 #define I915_CTXREG_BLENDCOLOR0 7 #define I915_CTXREG_BLENDCOLOR1 8 -#define I915_CTX_SETUP_SIZE 9 +#define I915_CTXREG_BF_STENCIL_OPS 9 +#define I915_CTXREG_BF_STENCIL_MASKS 10 +#define I915_CTX_SETUP_SIZE 11 #define I915_FOGREG_COLOR 0 #define I915_FOGREG_MODE0 1 @@ -110,6 +113,10 @@ #define I915_DEFREG_Z1 5 #define I915_DEF_SETUP_SIZE 6 +enum { + I915_RASTER_RULES, + I915_RASTER_RULES_SETUP_SIZE, +}; #define I915_MAX_CONSTANT 32 #define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT)) @@ -206,6 +213,7 @@ struct i915_hw_state GLuint Stipple[I915_STP_SETUP_SIZE]; GLuint Fog[I915_FOG_SETUP_SIZE]; GLuint Defaults[I915_DEF_SETUP_SIZE]; + GLuint RasterRules[I915_RASTER_RULES_SETUP_SIZE]; GLuint Tex[I915_TEX_UNITS][I915_TEX_SETUP_SIZE]; GLuint Constant[I915_CONSTANT_SIZE]; GLuint ConstantSize; @@ -321,6 +329,8 @@ extern void i915_print_ureg(const char *msg, GLuint ureg); extern void i915InitStateFunctions(struct dd_function_table *functions); extern void i915InitState(struct i915_context *i915); extern void i915_update_fog(GLcontext * ctx); +extern void i915_update_stencil(GLcontext * ctx); +extern void i915_update_provoking_vertex(GLcontext *ctx); /*====================================================================== diff --git a/i915/i915_reg.h b/i915/i915_reg.h index 8891e11..b5fa7fd 100644 --- a/i915/i915_reg.h +++ b/i915/i915_reg.h @@ -86,27 +86,15 @@ #define BFM_ENABLE_STENCIL_WRITE_MASK (1<<16) #define BFM_STENCIL_TEST_MASK_SHIFT 8 #define BFM_STENCIL_TEST_MASK_MASK (0xff<<8) +#define BFM_STENCIL_TEST_MASK(x) (((x)&0xff) << 8) #define BFM_STENCIL_WRITE_MASK_SHIFT 0 #define BFM_STENCIL_WRITE_MASK_MASK (0xff<<0) +#define BFM_STENCIL_WRITE_MASK(x) ((x)&0xff) /* 3DSTATE_BIN_CONTROL p141 */ -/* p143 */ -#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) -/* Dword 1 */ -#define BUF_3D_ID_COLOR_BACK (0x3<<24) -#define BUF_3D_ID_DEPTH (0x7<<24) -#define BUF_3D_USE_FENCE (1<<23) -#define BUF_3D_TILED_SURFACE (1<<22) -#define BUF_3D_TILE_WALK_X 0 -#define BUF_3D_TILE_WALK_Y (1<<21) -#define BUF_3D_PITCH(x) (((x)/4)<<2) -/* Dword 2 */ -#define BUF_3D_ADDR(x) ((x) & ~0x3) - - /* 3DSTATE_CHROMA_KEY */ /* 3DSTATE_CLEAR_PARAMETERS, p150 */ @@ -155,6 +143,7 @@ /* p161 */ #define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) /* Dword 1 */ +#define CLASSIC_EARLY_DEPTH (1<<31) #define TEX_DEFAULT_COLOR_OGL (0<<30) #define TEX_DEFAULT_COLOR_D3D (1<<30) #define ZR_EARLY_DEPTH (1<<29) @@ -308,7 +297,9 @@ #define TEXKILL_4D (1<<9) #define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8) #define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5) +#define LINE_STRIP_PROVOKE_VRTX_MASK (3 << 6) #define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6) +#define TRI_FAN_PROVOKE_VRTX_MASK (3 << 3) #define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3) /* _3DSTATE_SCISSOR_ENABLE, p256 */ diff --git a/i915/i915_state.c b/i915/i915_state.c index 814fb59..b60efea 100644 --- a/i915/i915_state.c +++ b/i915/i915_state.c @@ -48,73 +48,119 @@ #define FILE_DEBUG_FLAG DEBUG_STATE -static void -i915StencilFuncSeparate(GLcontext * ctx, GLenum face, GLenum func, GLint ref, - GLuint mask) +void +i915_update_stencil(GLcontext * ctx) { struct i915_context *i915 = I915_CONTEXT(ctx); - int test = intel_translate_compare_func(func); + GLuint front_ref, front_writemask, front_mask; + GLenum front_func, front_fail, front_pass_z_fail, front_pass_z_pass; + GLuint back_ref, back_writemask, back_mask; + GLenum back_func, back_fail, back_pass_z_fail, back_pass_z_pass; - mask = mask & 0xff; - - DBG("%s : func: %s, ref : 0x%x, mask: 0x%x\n", __FUNCTION__, - _mesa_lookup_enum_by_nr(func), ref, mask); + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + /* The 915 considers CW to be "front" for two-sided stencil, so choose + * appropriately. + */ + /* _NEW_POLYGON | _NEW_STENCIL */ + if (ctx->Polygon.FrontFace == GL_CW) { + front_ref = ctx->Stencil.Ref[0]; + front_mask = ctx->Stencil.ValueMask[0]; + front_writemask = ctx->Stencil.WriteMask[0]; + front_func = ctx->Stencil.Function[0]; + front_fail = ctx->Stencil.FailFunc[0]; + front_pass_z_fail = ctx->Stencil.ZFailFunc[0]; + front_pass_z_pass = ctx->Stencil.ZPassFunc[0]; + back_ref = ctx->Stencil.Ref[ctx->Stencil._BackFace]; + back_mask = ctx->Stencil.ValueMask[ctx->Stencil._BackFace]; + back_writemask = ctx->Stencil.WriteMask[ctx->Stencil._BackFace]; + back_func = ctx->Stencil.Function[ctx->Stencil._BackFace]; + back_fail = ctx->Stencil.FailFunc[ctx->Stencil._BackFace]; + back_pass_z_fail = ctx->Stencil.ZFailFunc[ctx->Stencil._BackFace]; + back_pass_z_pass = ctx->Stencil.ZPassFunc[ctx->Stencil._BackFace]; + } else { + front_ref = ctx->Stencil.Ref[ctx->Stencil._BackFace]; + front_mask = ctx->Stencil.ValueMask[ctx->Stencil._BackFace]; + front_writemask = ctx->Stencil.WriteMask[ctx->Stencil._BackFace]; + front_func = ctx->Stencil.Function[ctx->Stencil._BackFace]; + front_fail = ctx->Stencil.FailFunc[ctx->Stencil._BackFace]; + front_pass_z_fail = ctx->Stencil.ZFailFunc[ctx->Stencil._BackFace]; + front_pass_z_pass = ctx->Stencil.ZPassFunc[ctx->Stencil._BackFace]; + back_ref = ctx->Stencil.Ref[0]; + back_mask = ctx->Stencil.ValueMask[0]; + back_writemask = ctx->Stencil.WriteMask[0]; + back_func = ctx->Stencil.Function[0]; + back_fail = ctx->Stencil.FailFunc[0]; + back_pass_z_fail = ctx->Stencil.ZFailFunc[0]; + back_pass_z_pass = ctx->Stencil.ZPassFunc[0]; + } - I915_STATECHANGE(i915, I915_UPLOAD_CTX); - i915->state.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK; + /* Set front state. */ + i915->state.Ctx[I915_CTXREG_STATE4] &= ~(MODE4_ENABLE_STENCIL_TEST_MASK | + MODE4_ENABLE_STENCIL_WRITE_MASK); i915->state.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK | - STENCIL_TEST_MASK(mask)); + ENABLE_STENCIL_WRITE_MASK | + STENCIL_TEST_MASK(front_mask) | + STENCIL_WRITE_MASK(front_writemask)); i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_REF_MASK | - S5_STENCIL_TEST_FUNC_MASK); + S5_STENCIL_TEST_FUNC_MASK | + S5_STENCIL_FAIL_MASK | + S5_STENCIL_PASS_Z_FAIL_MASK | + S5_STENCIL_PASS_Z_PASS_MASK); + + i915->state.Ctx[I915_CTXREG_LIS5] |= + (front_ref << S5_STENCIL_REF_SHIFT) | + (intel_translate_compare_func(front_func) << S5_STENCIL_TEST_FUNC_SHIFT) | + (intel_translate_stencil_op(front_fail) << S5_STENCIL_FAIL_SHIFT) | + (intel_translate_stencil_op(front_pass_z_fail) << + S5_STENCIL_PASS_Z_FAIL_SHIFT) | + (intel_translate_stencil_op(front_pass_z_pass) << + S5_STENCIL_PASS_Z_PASS_SHIFT); + + /* Set back state if different from front. */ + if (ctx->Stencil._TestTwoSide) { + i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] &= + ~(BFO_STENCIL_REF_MASK | + BFO_STENCIL_TEST_MASK | + BFO_STENCIL_FAIL_MASK | + BFO_STENCIL_PASS_Z_FAIL_MASK | + BFO_STENCIL_PASS_Z_PASS_MASK); + i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] |= BFO_STENCIL_TWO_SIDE | + (back_ref << BFO_STENCIL_REF_SHIFT) | + (intel_translate_compare_func(back_func) << BFO_STENCIL_TEST_SHIFT) | + (intel_translate_stencil_op(back_fail) << BFO_STENCIL_FAIL_SHIFT) | + (intel_translate_stencil_op(back_pass_z_fail) << + BFO_STENCIL_PASS_Z_FAIL_SHIFT) | + (intel_translate_stencil_op(back_pass_z_pass) << + BFO_STENCIL_PASS_Z_PASS_SHIFT); + + i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] &= + ~(BFM_STENCIL_TEST_MASK_MASK | + BFM_STENCIL_WRITE_MASK_MASK); + i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] |= + BFM_STENCIL_TEST_MASK(back_mask) | + BFM_STENCIL_WRITE_MASK(back_writemask); + } else { + i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] &= ~BFO_STENCIL_TWO_SIDE; + } +} - i915->state.Ctx[I915_CTXREG_LIS5] |= ((ref << S5_STENCIL_REF_SHIFT) | - (test << - S5_STENCIL_TEST_FUNC_SHIFT)); +static void +i915StencilFuncSeparate(GLcontext * ctx, GLenum face, GLenum func, GLint ref, + GLuint mask) +{ } static void i915StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) { - struct i915_context *i915 = I915_CONTEXT(ctx); - - DBG("%s : mask 0x%x\n", __FUNCTION__, mask); - - mask = mask & 0xff; - - I915_STATECHANGE(i915, I915_UPLOAD_CTX); - i915->state.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK; - i915->state.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK | - STENCIL_WRITE_MASK(mask)); } - static void i915StencilOpSeparate(GLcontext * ctx, GLenum face, GLenum fail, GLenum zfail, GLenum zpass) { - struct i915_context *i915 = I915_CONTEXT(ctx); - int fop = intel_translate_stencil_op(fail); - int dfop = intel_translate_stencil_op(zfail); - int dpop = intel_translate_stencil_op(zpass); - - - DBG("%s: fail : %s, zfail: %s, zpass : %s\n", __FUNCTION__, - _mesa_lookup_enum_by_nr(fail), - _mesa_lookup_enum_by_nr(zfail), _mesa_lookup_enum_by_nr(zpass)); - - I915_STATECHANGE(i915, I915_UPLOAD_CTX); - - i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_FAIL_MASK | - S5_STENCIL_PASS_Z_FAIL_MASK | - S5_STENCIL_PASS_Z_PASS_MASK); - - i915->state.Ctx[I915_CTXREG_LIS5] |= ((fop << S5_STENCIL_FAIL_SHIFT) | - (dfop << - S5_STENCIL_PASS_Z_FAIL_SHIFT) | - (dpop << - S5_STENCIL_PASS_Z_PASS_SHIFT)); } static void @@ -945,6 +991,17 @@ i915_init_packets(struct i915_context *i915) _3DSTATE_CONST_BLEND_COLOR_CMD; i915->state.Ctx[I915_CTXREG_BLENDCOLOR1] = 0; + i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] = + _3DSTATE_BACKFACE_STENCIL_MASKS | + BFM_ENABLE_STENCIL_TEST_MASK | + BFM_ENABLE_STENCIL_WRITE_MASK | + (0xff << BFM_STENCIL_WRITE_MASK_SHIFT) | + (0xff << BFM_STENCIL_TEST_MASK_SHIFT); + i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] = + _3DSTATE_BACKFACE_STENCIL_OPS | + BFO_ENABLE_STENCIL_REF | + BFO_ENABLE_STENCIL_FUNCS | + BFO_ENABLE_STENCIL_TWO_SIDE; } { @@ -976,6 +1033,13 @@ i915_init_packets(struct i915_context *i915) i915->state.Buffer[I915_DESTREG_SR2] = 0; } + i915->state.RasterRules[I915_RASTER_RULES] = _3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D; #if 0 { @@ -996,7 +1060,33 @@ i915_init_packets(struct i915_context *i915) i915->state.active = (I915_UPLOAD_PROGRAM | I915_UPLOAD_STIPPLE | I915_UPLOAD_CTX | - I915_UPLOAD_BUFFERS | I915_UPLOAD_INVARIENT); + I915_UPLOAD_BUFFERS | + I915_UPLOAD_INVARIENT | + I915_UPLOAD_RASTER_RULES); +} + +void +i915_update_provoking_vertex(GLcontext * ctx) +{ + struct i915_context *i915 = I915_CONTEXT(ctx); + + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + i915->state.Ctx[I915_CTXREG_LIS6] &= ~(S6_TRISTRIP_PV_MASK); + + I915_STATECHANGE(i915, I915_UPLOAD_RASTER_RULES); + i915->state.RasterRules[I915_RASTER_RULES] &= ~(LINE_STRIP_PROVOKE_VRTX_MASK | + TRI_FAN_PROVOKE_VRTX_MASK); + + /* _NEW_LIGHT */ + if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) { + i915->state.RasterRules[I915_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2)); + i915->state.Ctx[I915_CTXREG_LIS6] |= (2 << S6_TRISTRIP_PV_SHIFT); + } else { + i915->state.RasterRules[I915_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(0) | + TRI_FAN_PROVOKE_VRTX(1)); + i915->state.Ctx[I915_CTXREG_LIS6] |= (0 << S6_TRISTRIP_PV_SHIFT); + } } void diff --git a/i915/i915_tex_layout.c b/i915/i915_tex_layout.c index 7cc1c09..d9588e5 100644 --- a/i915/i915_tex_layout.c +++ b/i915/i915_tex_layout.c @@ -55,6 +55,17 @@ static GLint step_offsets[6][2] = { [FACE_NEG_Z] = {-1, 1}, }; + +static GLint bottom_offsets[6] = { + [FACE_POS_X] = 16 + 0 * 8, + [FACE_POS_Y] = 16 + 1 * 8, + [FACE_POS_Z] = 16 + 2 * 8, + [FACE_NEG_X] = 16 + 3 * 8, + [FACE_NEG_Y] = 16 + 4 * 8, + [FACE_NEG_Z] = 16 + 5 * 8, +}; + + /** * Cube texture map layout for i830M-GM915. * @@ -101,7 +112,8 @@ static GLint step_offsets[6][2] = { */ static void i915_miptree_layout_cube(struct intel_context *intel, - struct intel_mipmap_tree * mt) + struct intel_mipmap_tree * mt, + uint32_t tiling) { const GLuint dim = mt->width0; GLuint face; @@ -111,7 +123,7 @@ i915_miptree_layout_cube(struct intel_context *intel, assert(lvlWidth == lvlHeight); /* cubemap images are square */ /* double pitch for cube layouts */ - mt->pitch = intel_miptree_pitch_align (intel, mt, dim * 2); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, dim * 2); mt->total_height = dim * 4; for (level = mt->first_level; level <= mt->last_level; level++) { @@ -145,7 +157,8 @@ i915_miptree_layout_cube(struct intel_context *intel, static void i915_miptree_layout_3d(struct intel_context *intel, - struct intel_mipmap_tree * mt) + struct intel_mipmap_tree * mt, + uint32_t tiling) { GLuint width = mt->width0; GLuint height = mt->height0; @@ -154,7 +167,7 @@ i915_miptree_layout_3d(struct intel_context *intel, GLint level; /* Calculate the size of a single slice. */ - mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0); /* XXX: hardware expects/requires 9 levels at minimum. */ for (level = mt->first_level; level <= MAX2(8, mt->last_level); level++) { @@ -189,14 +202,15 @@ i915_miptree_layout_3d(struct intel_context *intel, static void i915_miptree_layout_2d(struct intel_context *intel, - struct intel_mipmap_tree * mt) + struct intel_mipmap_tree * mt, + uint32_t tiling) { GLuint width = mt->width0; GLuint height = mt->height0; GLuint img_height; GLint level; - mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0); mt->total_height = 0; for (level = mt->first_level; level <= mt->last_level; level++) { @@ -217,19 +231,20 @@ i915_miptree_layout_2d(struct intel_context *intel, } GLboolean -i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt) +i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt, + uint32_t tiling) { switch (mt->target) { case GL_TEXTURE_CUBE_MAP: - i915_miptree_layout_cube(intel, mt); + i915_miptree_layout_cube(intel, mt, tiling); break; case GL_TEXTURE_3D: - i915_miptree_layout_3d(intel, mt); + i915_miptree_layout_3d(intel, mt, tiling); break; case GL_TEXTURE_1D: case GL_TEXTURE_2D: case GL_TEXTURE_RECTANGLE_ARB: - i915_miptree_layout_2d(intel, mt); + i915_miptree_layout_2d(intel, mt, tiling); break; default: _mesa_problem(NULL, "Unexpected tex target in i915_miptree_layout()"); @@ -297,7 +312,7 @@ i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt) * +---+ +---+ +---+ +---+ +---+ +---+ * * The bottom row continues with the remaining 2x2 then the 1x1 mip contents - * in order, with each of them aligned to a 4x4 block boundary. Thus, for + * in order, with each of them aligned to a 8x8 block boundary. Thus, for * 32x32 cube maps and smaller, the bottom row layout is going to dictate the * pitch of the tree. For a tree with 4x4 images, the pitch is at least * 14 * 8 = 112 texels, for 2x2 it is at least 12 * 8 texels, and for 1x1 @@ -306,7 +321,8 @@ i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt) static void i945_miptree_layout_cube(struct intel_context *intel, - struct intel_mipmap_tree * mt) + struct intel_mipmap_tree * mt, + uint32_t tiling) { const GLuint dim = mt->width0; GLuint face; @@ -320,9 +336,9 @@ i945_miptree_layout_cube(struct intel_context *intel, * or the final row of 4x4, 2x2 and 1x1 faces below this. */ if (dim > 32) - mt->pitch = intel_miptree_pitch_align (intel, mt, dim * 2); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, dim * 2); else - mt->pitch = intel_miptree_pitch_align (intel, mt, 14 * 8); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, 14 * 8); if (dim >= 4) mt->total_height = dim * 4 + 4; @@ -375,10 +391,11 @@ i945_miptree_layout_cube(struct intel_context *intel, x = (face - 4) * 8; break; } + break; case 2: y = mt->total_height - 4; - x = 16 + face * 8; + x = bottom_offsets[face]; break; case 1: @@ -396,7 +413,8 @@ i945_miptree_layout_cube(struct intel_context *intel, static void i945_miptree_layout_3d(struct intel_context *intel, - struct intel_mipmap_tree * mt) + struct intel_mipmap_tree * mt, + uint32_t tiling) { GLuint width = mt->width0; GLuint height = mt->height0; @@ -405,7 +423,7 @@ i945_miptree_layout_3d(struct intel_context *intel, GLuint pack_y_pitch; GLuint level; - mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0); mt->total_height = 0; pack_y_pitch = MAX2(mt->height0, 2); @@ -450,22 +468,23 @@ i945_miptree_layout_3d(struct intel_context *intel, } GLboolean -i945_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt) +i945_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt, + uint32_t tiling) { switch (mt->target) { case GL_TEXTURE_CUBE_MAP: if (mt->compressed) - i945_miptree_layout_cube(intel, mt); + i945_miptree_layout_cube(intel, mt, tiling); else - i915_miptree_layout_cube(intel, mt); + i915_miptree_layout_cube(intel, mt, tiling); break; case GL_TEXTURE_3D: - i945_miptree_layout_3d(intel, mt); + i945_miptree_layout_3d(intel, mt, tiling); break; case GL_TEXTURE_1D: case GL_TEXTURE_2D: case GL_TEXTURE_RECTANGLE_ARB: - i945_miptree_layout_2d(intel, mt); + i945_miptree_layout_2d(intel, mt, tiling); break; default: _mesa_problem(NULL, "Unexpected tex target in i945_miptree_layout()"); diff --git a/i915/i915_texstate.c b/i915/i915_texstate.c index a37dd7f..32d4b30 100644 --- a/i915/i915_texstate.c +++ b/i915/i915_texstate.c @@ -185,8 +185,13 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) state[I915_TEXREG_MS3] = (((firstImage->Height - 1) << MS3_HEIGHT_SHIFT) | - ((firstImage->Width - 1) << MS3_WIDTH_SHIFT) | format | - MS3_USE_FENCE_REGS); + ((firstImage->Width - 1) << MS3_WIDTH_SHIFT) | format); + + if (intelObj->mt->region->tiling != I915_TILING_NONE) { + state[I915_TEXREG_MS3] |= MS3_TILED_SURFACE; + if (intelObj->mt->region->tiling == I915_TILING_Y) + state[I915_TEXREG_MS3] |= MS3_TILE_WALK; + } state[I915_TEXREG_MS4] = ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) | MS4_CUBE_FACE_ENA_MASK | diff --git a/i915/i915_vtbl.c b/i915/i915_vtbl.c index 1150046..9a723d3 100644 --- a/i915/i915_vtbl.c +++ b/i915/i915_vtbl.c @@ -42,6 +42,7 @@ #include "intel_regions.h" #include "intel_tris.h" #include "intel_fbo.h" +#include "intel_chipset.h" #include "i915_reg.h" #include "i915_context.h" @@ -175,7 +176,7 @@ i915_emit_invarient_state(struct intel_context *intel) { BATCH_LOCALS; - BEGIN_BATCH(20, IGNORE_CLIPRECTS); + BEGIN_BATCH(17, IGNORE_CLIPRECTS); OUT_BATCH(_3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | @@ -199,14 +200,6 @@ i915_emit_invarient_state(struct intel_context *intel) CSB_TCB(3, 3) | CSB_TCB(4, 4) | CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7)); - OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | - ENABLE_POINT_RASTER_RULE | - OGL_POINT_RASTER_RULE | - ENABLE_LINE_STRIP_PROVOKE_VRTX | - ENABLE_TRI_FAN_PROVOKE_VRTX | - LINE_STRIP_PROVOKE_VRTX(1) | - TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D); - /* Need to initialize this to zero. */ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0)); @@ -224,11 +217,6 @@ i915_emit_invarient_state(struct intel_context *intel) OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); /* disable indirect state */ OUT_BATCH(0); - - /* Don't support twosided stencil yet */ - OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0); - OUT_BATCH(0); - ADVANCE_BATCH(); } @@ -262,6 +250,9 @@ get_state_size(struct i915_hw_state *state) if (dirty & I915_UPLOAD_INVARIENT) sz += 30 * 4; + if (dirty & I915_UPLOAD_RASTER_RULES) + sz += sizeof(state->RasterRules); + if (dirty & I915_UPLOAD_CTX) sz += sizeof(state->Ctx); @@ -370,6 +361,12 @@ i915_emit_state(struct intel_context *intel) i915_emit_invarient_state(intel); } + if (dirty & I915_UPLOAD_RASTER_RULES) { + if (INTEL_DEBUG & DEBUG_STATE) + fprintf(stderr, "I915_UPLOAD_RASTER_RULES:\n"); + emit(intel, state->RasterRules, sizeof(state->RasterRules)); + } + if (dirty & I915_UPLOAD_CTX) { if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "I915_UPLOAD_CTX:\n"); @@ -529,6 +526,23 @@ i915_destroy_context(struct intel_context *intel) _tnl_free_vertices(&intel->ctx); } +void +i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region, + uint32_t buffer_id) +{ + state[0] = _3DSTATE_BUF_INFO_CMD; + state[1] = buffer_id; + + if (region != NULL) { + state[1] |= BUF_3D_PITCH(region->pitch * region->cpp); + + if (region->tiling != I915_TILING_NONE) { + state[1] |= BUF_3D_TILED_SURFACE; + if (region->tiling == I915_TILING_Y) + state[1] |= BUF_3D_TILE_WALK_Y; + } + } +} /** * Set the drawing regions for the color and depth/stencil buffers. @@ -562,21 +576,11 @@ i915_state_draw_region(struct intel_context *intel, /* * Set stride/cpp values */ - if (color_region) { - state->Buffer[I915_DESTREG_CBUFADDR0] = _3DSTATE_BUF_INFO_CMD; - state->Buffer[I915_DESTREG_CBUFADDR1] = - (BUF_3D_ID_COLOR_BACK | - BUF_3D_PITCH(color_region->pitch * color_region->cpp) | - BUF_3D_USE_FENCE); - } + i915_set_buf_info_for_region(&state->Buffer[I915_DESTREG_CBUFADDR0], + color_region, BUF_3D_ID_COLOR_BACK); - if (depth_region) { - state->Buffer[I915_DESTREG_DBUFADDR0] = _3DSTATE_BUF_INFO_CMD; - state->Buffer[I915_DESTREG_DBUFADDR1] = - (BUF_3D_ID_DEPTH | - BUF_3D_PITCH(depth_region->pitch * depth_region->cpp) | - BUF_3D_USE_FENCE); - } + i915_set_buf_info_for_region(&state->Buffer[I915_DESTREG_DBUFADDR0], + depth_region, BUF_3D_ID_DEPTH); /* * Compute/set I915_DESTREG_DV1 value @@ -604,6 +608,14 @@ i915_state_draw_region(struct intel_context *intel, } } + /* This isn't quite safe, thus being hidden behind an option. When changing + * the value of this bit, the pipeline needs to be MI_FLUSHed. And it + * can only be set when a depth buffer is already defined. + */ + if (IS_945(intel->intelScreen->deviceID) && intel->use_early_z && + depth_region->tiling != I915_TILING_NONE) + value |= CLASSIC_EARLY_DEPTH; + if (depth_region && depth_region->cpp == 4) { value |= DEPTH_FRMT_24_FIXED_8_OTHER; } @@ -676,13 +688,6 @@ i915_assert_not_dirty( struct intel_context *intel ) assert(!dirty); } -static void -i915_note_unlock( struct intel_context *intel ) -{ - /* nothing */ -} - - void i915InitVtbl(struct i915_context *i915) { @@ -697,6 +702,5 @@ i915InitVtbl(struct i915_context *i915) i915->intel.vtbl.update_texture_state = i915UpdateTextureState; i915->intel.vtbl.flush_cmd = i915_flush_cmd; i915->intel.vtbl.assert_not_dirty = i915_assert_not_dirty; - i915->intel.vtbl.note_unlock = i915_note_unlock; i915->intel.vtbl.finish_batch = intel_finish_vb; } diff --git a/i915/intel_tris.c b/i915/intel_tris.c index 1d39278..a905455 100644 --- a/i915/intel_tris.c +++ b/i915/intel_tris.c @@ -1255,11 +1255,9 @@ intel_meta_draw_poly(struct intel_context *intel, { union fi *vb; GLint i; - GLboolean was_locked = intel->locked; unsigned int saved_vertex_size = intel->vertex_size; - if (!was_locked) - LOCK_HARDWARE(intel); + LOCK_HARDWARE(intel); intel->vertex_size = 6; @@ -1283,8 +1281,7 @@ intel_meta_draw_poly(struct intel_context *intel, intel->vertex_size = saved_vertex_size; - if (!was_locked) - UNLOCK_HARDWARE(intel); + UNLOCK_HARDWARE(intel); } static void diff --git a/i965/Makefile.am b/i965/Makefile.am index 5a118af..8ada8b5 100644 --- a/i965/Makefile.am +++ b/i965/Makefile.am @@ -17,6 +17,7 @@ i965_dri_la_SOURCES = \ ../shared/intel_decode.c \ ../shared/intel_extensions.c \ ../shared/intel_fbo.c \ + ../shared/intel_generatemipmap.c \ ../shared/intel_mipmap_tree.c \ ../shared/intel_regions.c \ ../shared/intel_screen.c \ @@ -25,8 +26,10 @@ i965_dri_la_SOURCES = \ ../shared/intel_pixel_bitmap.c \ ../shared/intel_pixel_copy.c \ ../shared/intel_pixel_draw.c \ + ../shared/intel_pixel_read.c \ ../shared/intel_state.c \ ../shared/intel_swapbuffers.c \ + ../shared/intel_syncobj.c \ ../shared/intel_tex.c \ ../shared/intel_tex_copy.c \ ../shared/intel_tex_format.c \ @@ -44,6 +47,7 @@ i965_dri_la_SOURCES = \ brw_clip_util.c \ brw_context.c \ brw_curbe.c \ + brw_disasm.c \ brw_draw.c \ brw_draw_upload.c \ brw_eu.c \ @@ -72,6 +76,7 @@ i965_dri_la_SOURCES = \ brw_vs_constval.c \ brw_vs_emit.c \ brw_vs_state.c \ + brw_vs_surface_state.c \ brw_vtbl.c \ brw_wm.c \ brw_wm_debug.c \ diff --git a/i965/brw_clip.c b/i965/brw_clip.c index 5cffceb..20a927c 100644 --- a/i965/brw_clip.c +++ b/i965/brw_clip.c @@ -65,21 +65,31 @@ static void compile_clip_prog( struct brw_context *brw, c.func.single_program_flow = 1; c.key = *key; - + c.need_ff_sync = BRW_IS_IGDNG(brw); /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.header_position_offset = ATTR_SIZE; - for (i = 0, delta = REG_SIZE; i < VERT_RESULT_MAX; i++) + if (BRW_IS_IGDNG(brw)) + delta = 3 * REG_SIZE; + else + delta = REG_SIZE; + + for (i = 0; i < VERT_RESULT_MAX; i++) if (c.key.attrs & (1<<i)) { c.offset[i] = delta; delta += ATTR_SIZE; } c.nr_attrs = brw_count_bits(c.key.attrs); - c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + + if (BRW_IS_IGDNG(brw)) + c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ + else + c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + c.nr_bytes = c.nr_regs * REG_SIZE; c.prog_data.clip_mode = c.key.clip_mode; /* XXX */ @@ -148,11 +158,16 @@ static void upload_clip_prog(struct brw_context *brw) key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); /* _NEW_TRANSFORM */ key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); - key.clip_mode = BRW_CLIPMODE_NORMAL; + + if (BRW_IS_IGDNG(brw)) + key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP; + else + key.clip_mode = BRW_CLIPMODE_NORMAL; /* _NEW_POLYGON */ if (key.primitive == GL_TRIANGLES) { - if (ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) + if (ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) key.clip_mode = BRW_CLIPMODE_REJECT_ALL; else { GLuint fill_front = CLIP_CULL; diff --git a/i965/brw_clip.h b/i965/brw_clip.h index e067478..957df44 100644 --- a/i965/brw_clip.h +++ b/i965/brw_clip.h @@ -100,6 +100,8 @@ struct brw_clip_compile { struct brw_reg fixed_planes; struct brw_reg plane_equation; + + struct brw_reg ff_sync; } reg; /* 3 different ways of expressing vertex size: @@ -117,6 +119,7 @@ struct brw_clip_compile { GLuint header_position_offset; GLuint offset[VERT_ATTRIB_MAX]; + GLboolean need_ff_sync; }; #define ATTR_SIZE (4*4) @@ -171,5 +174,6 @@ struct brw_reg get_tmp( struct brw_clip_compile *c ); void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ); - +void brw_clip_ff_sync(struct brw_clip_compile *c); +void brw_clip_init_ff_sync(struct brw_clip_compile *c); #endif diff --git a/i965/brw_clip_line.c b/i965/brw_clip_line.c index d830e49..048ca62 100644 --- a/i965/brw_clip_line.c +++ b/i965/brw_clip_line.c @@ -85,6 +85,10 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) i++; } + if (c->need_ff_sync) { + c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); + i++; + } c->first_tmp = i; c->last_tmp = i; @@ -130,7 +134,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) struct brw_instruction *plane_loop; struct brw_instruction *plane_active; struct brw_instruction *is_negative; - struct brw_instruction *is_neg2; + struct brw_instruction *is_neg2 = NULL; struct brw_instruction *not_culled; struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); @@ -148,7 +152,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_clip_init_clipmask(c); /* -ve rhw workaround */ - if (!BRW_IS_G4X(p->brw)) { + if (BRW_IS_965(p->brw)) { brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); @@ -185,7 +189,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) * Both can be negative on GM965/G965 due to RHW workaround * if so, this object should be rejected. */ - if (!BRW_IS_G4X(p->brw)) { + if (BRW_IS_965(p->brw)) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0)); is_neg2 = brw_IF(p, BRW_EXECUTE_1); { @@ -210,7 +214,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) /* If both are positive, do nothing */ /* Only on GM965/G965 */ - if (!BRW_IS_G4X(p->brw)) { + if (BRW_IS_965(p->brw)) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); is_neg2 = brw_IF(p, BRW_EXECUTE_1); } @@ -225,7 +229,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_set_predicate_control(p, BRW_PREDICATE_NONE); } - if (!BRW_IS_G4X(p->brw)) { + if (BRW_IS_965(p->brw)) { brw_ENDIF(p, is_neg2); } } @@ -263,6 +267,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) void brw_emit_line_clip( struct brw_clip_compile *c ) { brw_clip_line_alloc_regs(c); + brw_clip_init_ff_sync(c); if (c->key.do_flat_shading) brw_clip_copy_colors(c, 0, 1); diff --git a/i965/brw_clip_point.c b/i965/brw_clip_point.c index d17b199..8458f61 100644 --- a/i965/brw_clip_point.c +++ b/i965/brw_clip_point.c @@ -50,5 +50,7 @@ void brw_emit_point_clip( struct brw_clip_compile *c ) /* Send an empty message to kill the thread: */ brw_clip_tri_alloc_regs(c, 0); + brw_clip_init_ff_sync(c); + brw_clip_kill_thread(c); } diff --git a/i965/brw_clip_state.c b/i965/brw_clip_state.c index 9b0d7ea..5762c95 100644 --- a/i965/brw_clip_state.c +++ b/i965/brw_clip_state.c @@ -95,7 +95,14 @@ clip_unit_create_from_key(struct brw_context *brw, * even number. */ assert(key->nr_urb_entries % 2 == 0); - clip.thread4.max_threads = 2 - 1; + + /* Although up to 16 concurrent Clip threads are allowed on IGDNG, + * only 2 threads can output VUEs at a time. + */ + if (BRW_IS_IGDNG(brw)) + clip.thread4.max_threads = 16 - 1; + else + clip.thread4.max_threads = 2 - 1; } else { assert(key->nr_urb_entries >= 5); clip.thread4.max_threads = 1 - 1; diff --git a/i965/brw_clip_tri.c b/i965/brw_clip_tri.c index 7fd37bd..0efd772 100644 --- a/i965/brw_clip_tri.c +++ b/i965/brw_clip_tri.c @@ -77,6 +77,10 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, if (c->nr_attrs & 1) { for (j = 0; j < 3; j++) { GLuint delta = c->nr_attrs*16 + 32; + + if (BRW_IS_IGDNG(c->func.brw)) + delta = c->nr_attrs * 16 + 32 * 3; + brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); } } @@ -115,6 +119,11 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, i++; } + if (c->need_ff_sync) { + c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); + i++; + } + c->first_tmp = i; c->last_tmp = i; @@ -559,10 +568,11 @@ void brw_emit_tri_clip( struct brw_clip_compile *c ) brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); brw_clip_tri_init_vertices(c); brw_clip_init_clipmask(c); + brw_clip_init_ff_sync(c); /* if -ve rhw workaround bit is set, do cliptest */ - if (!BRW_IS_G4X(p->brw)) { + if (BRW_IS_965(p->brw)) { brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); @@ -579,11 +589,12 @@ void brw_emit_tri_clip( struct brw_clip_compile *c ) if (c->key.do_flat_shading) brw_clip_tri_flat_shade(c); - if (c->key.clip_mode == BRW_CLIPMODE_NORMAL) + if ((c->key.clip_mode == BRW_CLIPMODE_NORMAL) || + (c->key.clip_mode == BRW_CLIPMODE_KERNEL_CLIP)) do_clip_tri(c); else maybe_do_clip_tri(c); - + brw_clip_tri_emit_polygon(c); /* Send an empty message to kill the thread: diff --git a/i965/brw_clip_unfilled.c b/i965/brw_clip_unfilled.c index d7ca517..ad1bfa4 100644 --- a/i965/brw_clip_unfilled.c +++ b/i965/brw_clip_unfilled.c @@ -453,6 +453,7 @@ void brw_emit_unfilled_clip( struct brw_clip_compile *c ) brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); brw_clip_tri_init_vertices(c); + brw_clip_init_ff_sync(c); assert(c->offset[VERT_RESULT_EDGE]); diff --git a/i965/brw_clip_util.c b/i965/brw_clip_util.c index 9d3b0be..5a73abd 100644 --- a/i965/brw_clip_util.c +++ b/i965/brw_clip_util.c @@ -140,6 +140,10 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, /* Just copy the vertex header: */ + /* + * After CLIP stage, only first 256 bits of the VUE are read + * back on IGDNG, so needn't change it + */ brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); /* Iterate over each attribute (could be done in pairs?) @@ -147,6 +151,9 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, for (i = 0; i < c->nr_attrs; i++) { GLuint delta = i*16 + 32; + if (BRW_IS_IGDNG(p->brw)) + delta = i * 16 + 32 * 3; + if (delta == c->offset[VERT_RESULT_EDGE]) { if (force_edgeflag) brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); @@ -177,6 +184,10 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, if (i & 1) { GLuint delta = i*16 + 32; + + if (BRW_IS_IGDNG(p->brw)) + delta = i * 16 + 32 * 3; + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); } @@ -202,6 +213,8 @@ void brw_clip_emit_vue(struct brw_clip_compile *c, struct brw_compile *p = &c->func; GLuint start = c->last_mrf; + brw_clip_ff_sync(c); + assert(!(allocate && eot)); /* Cycle through mrf regs - probably futile as we have to wait for @@ -252,6 +265,7 @@ void brw_clip_kill_thread(struct brw_clip_compile *c) { struct brw_compile *p = &c->func; + brw_clip_ff_sync(c); /* Send an empty message to kill the thread and release any * allocated urb entry: */ @@ -343,3 +357,40 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c ) } } +void brw_clip_ff_sync(struct brw_clip_compile *c) +{ + if (c->need_ff_sync) { + struct brw_compile *p = &c->func; + struct brw_instruction *need_ff_sync; + + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1)); + need_ff_sync = brw_IF(p, BRW_EXECUTE_1); + { + brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1)); + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, + 1, /* used */ + 1, /* msg length */ + 1, /* response length */ + 0, /* eot */ + 1, /* write compelete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); + } + brw_ENDIF(p, need_ff_sync); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } +} + +void brw_clip_init_ff_sync(struct brw_clip_compile *c) +{ + if (c->need_ff_sync) { + struct brw_compile *p = &c->func; + + brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0)); + } +} diff --git a/i965/brw_context.c b/i965/brw_context.c index 4dbe551..c300c33 100644 --- a/i965/brw_context.c +++ b/i965/brw_context.c @@ -125,8 +125,34 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, /* We want the GLSL compiler to emit code that uses condition codes */ ctx->Shader.EmitCondCodes = GL_TRUE; - -/* ctx->Const.MaxNativeVertexProgramTemps = 32; */ + ctx->Shader.EmitNVTempInitialization = GL_TRUE; + + ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024); + ctx->Const.VertexProgram.MaxAluInstructions = 0; + ctx->Const.VertexProgram.MaxTexInstructions = 0; + ctx->Const.VertexProgram.MaxTexIndirections = 0; + ctx->Const.VertexProgram.MaxNativeAluInstructions = 0; + ctx->Const.VertexProgram.MaxNativeTexInstructions = 0; + ctx->Const.VertexProgram.MaxNativeTexIndirections = 0; + ctx->Const.VertexProgram.MaxNativeAttribs = 16; + ctx->Const.VertexProgram.MaxNativeTemps = 256; + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; + ctx->Const.VertexProgram.MaxNativeParameters = 1024; + ctx->Const.VertexProgram.MaxEnvParams = + MIN2(ctx->Const.VertexProgram.MaxNativeParameters, + ctx->Const.VertexProgram.MaxEnvParams); + + ctx->Const.FragmentProgram.MaxNativeInstructions = (16 * 1024); + ctx->Const.FragmentProgram.MaxNativeAluInstructions = (16 * 1024); + ctx->Const.FragmentProgram.MaxNativeTexInstructions = (16 * 1024); + ctx->Const.FragmentProgram.MaxNativeTexIndirections = (16 * 1024); + ctx->Const.FragmentProgram.MaxNativeAttribs = 12; + ctx->Const.FragmentProgram.MaxNativeTemps = 256; + ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; + ctx->Const.FragmentProgram.MaxNativeParameters = 1024; + ctx->Const.FragmentProgram.MaxEnvParams = + MIN2(ctx->Const.FragmentProgram.MaxNativeParameters, + ctx->Const.FragmentProgram.MaxEnvParams); brw_init_state( brw ); diff --git a/i965/brw_context.h b/i965/brw_context.h index 577497b..a5209ac 100644 --- a/i965/brw_context.h +++ b/i965/brw_context.h @@ -129,8 +129,8 @@ struct brw_context; #define BRW_NEW_PRIMITIVE 0x40 #define BRW_NEW_CONTEXT 0x80 #define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 -#define BRW_NEW_INPUT_VARYING 0x200 #define BRW_NEW_PSP 0x800 +#define BRW_NEW_WM_SURFACES 0x1000 #define BRW_NEW_FENCE 0x2000 #define BRW_NEW_INDICES 0x4000 #define BRW_NEW_VERTICES 0x8000 @@ -143,6 +143,7 @@ struct brw_context; #define BRW_NEW_DEPTH_BUFFER 0x20000 #define BRW_NEW_NR_WM_SURFACES 0x40000 #define BRW_NEW_NR_VS_SURFACES 0x80000 +#define BRW_NEW_INDEX_BUFFER 0x100000 struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -173,6 +174,9 @@ struct brw_fragment_program { dri_bo *const_buffer; /** Program constant buffer/surface */ GLboolean use_const_buffer; + + /** for debugging, which texture units are referenced */ + GLbitfield tex_units_used; }; @@ -386,6 +390,8 @@ struct brw_cached_batch_item { struct brw_vertex_element { const struct gl_client_array *glarray; + /** The corresponding Mesa vertex attribute */ + gl_vert_attrib attrib; /** Size of a complete element */ GLuint element_size; /** Number of uploaded elements for this input. */ @@ -401,7 +407,6 @@ struct brw_vertex_element { struct brw_vertex_info { - GLuint varying; /* varying:1[VERT_ATTRIB_MAX] */ GLuint sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */ }; @@ -439,9 +444,13 @@ struct brw_query_object { unsigned int count; }; + +/** + * brw_context is derived from intel_context. + */ struct brw_context { - struct intel_context intel; + struct intel_context intel; /**< base class, must be first field */ GLuint primitive; GLboolean emit_state_always; @@ -450,8 +459,6 @@ struct brw_context struct { struct brw_state_flags dirty; - struct brw_tracked_state **atoms; - GLuint nr_atoms; GLuint nr_color_regions; struct intel_region *color_regions[MAX_DRAW_BUFFERS]; @@ -471,12 +478,16 @@ struct brw_context int validated_bo_count; } state; - struct brw_cache cache; + struct brw_cache cache; /** non-surface items */ + struct brw_cache surface_cache; /* surface items */ struct brw_cached_batch_item *cached_batch_items; struct { struct brw_vertex_element inputs[VERT_ATTRIB_MAX]; + struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; + GLuint nr_enabled; + #define BRW_NR_UPLOAD_BUFS 17 #define BRW_UPLOAD_INIT_SIZE (128*1024) @@ -500,8 +511,15 @@ struct brw_context */ const struct _mesa_index_buffer *ib; + /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */ dri_bo *bo; unsigned int offset; + unsigned int size; + /* Offset to index buffer index to use in CMD_3D_PRIM so that we can + * avoid re-uploading the IB packet over and over if we're actually + * referencing the same index buffer. + */ + unsigned int start_vertex_offset; } ib; /* Active vertex program: @@ -555,11 +573,6 @@ struct brw_context GLuint vs_size; GLuint total_size; - /* Dynamic tracker which changes to reflect the state referenced - * by active fp and vp program parameters: - */ - struct brw_tracked_state tracked_state; - dri_bo *curbe_bo; /** Offset within curbe_bo of space for current curbe entry */ GLuint curbe_offset; @@ -713,6 +726,8 @@ void brw_upload_urb_fence(struct brw_context *brw); */ void brw_upload_cs_urb_state(struct brw_context *brw); +/* brw_disasm.c */ +int brw_disasm (FILE *file, struct brw_instruction *inst); /*====================================================================== * Inline conversion functions. These are better-typed than the diff --git a/i965/brw_curbe.c b/i965/brw_curbe.c index 9197fed..4be6c77 100644 --- a/i965/brw_curbe.c +++ b/i965/brw_curbe.c @@ -36,6 +36,7 @@ #include "main/macros.h" #include "main/enums.h" #include "shader/prog_parameter.h" +#include "shader/prog_print.h" #include "shader/prog_statevars.h" #include "intel_batchbuffer.h" #include "intel_regions.h" @@ -188,13 +189,6 @@ static void prepare_constant_buffer(struct brw_context *brw) GLfloat *buf; GLuint i; - /* Update our own dependency flags. This works because this - * function will also be called whenever fp or vp changes. - */ - brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION); - brw->curbe.tracked_state.dirty.mesa |= vp->program.Base.Parameters->StateFlags; - brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags; - if (sz == 0) { if (brw->curbe.last_buf) { free(brw->curbe.last_buf); @@ -254,6 +248,12 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; + if (brw->vertex_program->IsNVProgram) + _mesa_load_tracked_matrices(ctx); + + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); /* XXX just use a memcpy here */ @@ -335,78 +335,11 @@ static void prepare_constant_buffer(struct brw_context *brw) */ } - -/** - * Copy Mesa program parameters into given constant buffer. - */ -static void -update_constant_buffer(struct brw_context *brw, - const struct gl_program_parameter_list *params, - dri_bo *const_buffer) -{ - struct intel_context *intel = &brw->intel; - const int size = params->NumParameters * 4 * sizeof(GLfloat); - - /* copy Mesa program constants into the buffer */ - if (const_buffer && size > 0) { - - assert(const_buffer); - assert(const_buffer->size >= size); - - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(const_buffer); - memcpy(const_buffer->virtual, params->ParameterValues, size); - drm_intel_gem_bo_unmap_gtt(const_buffer); - } - else { - dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); - } - - if (0) { - int i; - for (i = 0; i < params->NumParameters; i++) { - float *p = params->ParameterValues[i]; - printf("%d: %f %f %f %f\n", i, p[0], p[1], p[2], p[3]); - } - } - } -} - - -/** Copy current vertex program's parameters into the constant buffer */ -static void -update_vertex_constant_buffer(struct brw_context *brw) -{ - struct brw_vertex_program *vp = - (struct brw_vertex_program *) brw->vertex_program; - if (0) { - printf("update VS constants in buffer %p\n", vp->const_buffer); - printf("program %u\n", vp->program.Base.Id); - } - if (vp->use_const_buffer) - update_constant_buffer(brw, vp->program.Base.Parameters, vp->const_buffer); -} - - -/** Copy current fragment program's parameters into the constant buffer */ -static void -update_fragment_constant_buffer(struct brw_context *brw) -{ - struct brw_fragment_program *fp = - (struct brw_fragment_program *) brw->fragment_program; - if (fp->use_const_buffer) - update_constant_buffer(brw, fp->program.Base.Parameters, fp->const_buffer); -} - - static void emit_constant_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLuint sz = brw->curbe.total_size; - update_vertex_constant_buffer(brw); - update_fragment_constant_buffer(brw); - BEGIN_BATCH(2, IGNORE_CLIPRECTS); if (sz == 0) { OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); @@ -428,7 +361,7 @@ static void emit_constant_buffer(struct brw_context *brw) */ const struct brw_tracked_state brw_constant_buffer = { .dirty = { - .mesa = (_NEW_TRANSFORM|_NEW_PROJECTION), /* plus fp and vp flags */ + .mesa = _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_VERTEX_PROGRAM | BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ diff --git a/i965/brw_defines.h b/i965/brw_defines.h index 98fc909..78d457a 100644 --- a/i965/brw_defines.h +++ b/i965/brw_defines.h @@ -139,6 +139,7 @@ #define BRW_CLIPMODE_CLIP_NON_REJECTED 2 #define BRW_CLIPMODE_REJECT_ALL 3 #define BRW_CLIPMODE_ACCEPT_ALL 4 +#define BRW_CLIPMODE_KERNEL_CLIP 5 #define BRW_CLIP_NDCSPACE 0 #define BRW_CLIP_SCREENSPACE 1 @@ -470,8 +471,9 @@ #define BRW_CONDITIONAL_GE 4 #define BRW_CONDITIONAL_L 5 #define BRW_CONDITIONAL_LE 6 -#define BRW_CONDITIONAL_C 7 +#define BRW_CONDITIONAL_R 7 #define BRW_CONDITIONAL_O 8 +#define BRW_CONDITIONAL_U 9 #define BRW_DEBUG_NONE 0 #define BRW_DEBUG_BREAKPOINT 1 @@ -511,6 +513,7 @@ #define BRW_OPCODE_RSL 11 #define BRW_OPCODE_ASR 12 #define BRW_OPCODE_CMP 16 +#define BRW_OPCODE_CMPN 17 #define BRW_OPCODE_JMPI 32 #define BRW_OPCODE_IF 34 #define BRW_OPCODE_IFF 35 @@ -670,6 +673,25 @@ #define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 #define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3 + +/* for IGDNG only */ +#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 +#define BRW_SAMPLER_SIMD_MODE_SIMD8 1 +#define BRW_SAMPLER_SIMD_MODE_SIMD16 2 +#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3 + #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 #define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 @@ -819,8 +841,11 @@ #include "intel_chipset.h" #define BRW_IS_G4X(brw) (IS_G4X((brw)->intel.intelScreen->deviceID)) -#define CMD_PIPELINE_SELECT(brw) (BRW_IS_G4X(brw) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965) -#define CMD_VF_STATISTICS(brw) (BRW_IS_G4X(brw) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965) -#define URB_SIZES(brw) (BRW_IS_G4X(brw) ? 384 : 256) /* 512 bit units */ +#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->intel.intelScreen->deviceID)) +#define BRW_IS_965(brw) (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) +#define CMD_PIPELINE_SELECT(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965) +#define CMD_VF_STATISTICS(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965) +#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \ + (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */ #endif diff --git a/i965/brw_disasm.c b/i965/brw_disasm.c new file mode 100644 index 0000000..9fef230 --- /dev/null +++ b/i965/brw_disasm.c @@ -0,0 +1,903 @@ +/* + * Copyright © 2008 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> +#include <stdarg.h> + +#include "main/mtypes.h" + +#include "brw_context.h" +#include "brw_defines.h" + +struct { + char *name; + int nsrc; + int ndst; +} opcode[128] = { + [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, + + [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 }, + [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 }, +}; + +char *conditional_modifier[16] = { + [BRW_CONDITIONAL_NONE] = "", + [BRW_CONDITIONAL_Z] = ".e", + [BRW_CONDITIONAL_NZ] = ".ne", + [BRW_CONDITIONAL_G] = ".g", + [BRW_CONDITIONAL_GE] = ".ge", + [BRW_CONDITIONAL_L] = ".l", + [BRW_CONDITIONAL_LE] = ".le", + [BRW_CONDITIONAL_R] = ".r", + [BRW_CONDITIONAL_O] = ".o", + [BRW_CONDITIONAL_U] = ".u", +}; + +char *negate[2] = { + [0] = "", + [1] = "-", +}; + +char *_abs[2] = { + [0] = "", + [1] = "(abs)", +}; + +char *vert_stride[16] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4", + [4] = "8", + [5] = "16", + [6] = "32", + [15] = "VxH", +}; + +char *width[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", +}; + +char *horiz_stride[4] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4" +}; + +char *chan_sel[4] = { + [0] = "x", + [1] = "y", + [2] = "z", + [3] = "w", +}; + +char *dest_condmod[16] = { +}; + +char *debug_ctrl[2] = { + [0] = "", + [1] = ".breakpoint" +}; + +char *saturate[2] = { + [0] = "", + [1] = ".sat" +}; + +char *exec_size[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", + [5] = "32" +}; + +char *pred_inv[2] = { + [0] = "+", + [1] = "-" +}; + +char *pred_ctrl_align16[16] = { + [1] = "", + [2] = ".x", + [3] = ".y", + [4] = ".z", + [5] = ".w", + [6] = ".any4h", + [7] = ".all4h", +}; + +char *pred_ctrl_align1[16] = { + [1] = "", + [2] = ".anyv", + [3] = ".allv", + [4] = ".any2h", + [5] = ".all2h", + [6] = ".any4h", + [7] = ".all4h", + [8] = ".any8h", + [9] = ".all8h", + [10] = ".any16h", + [11] = ".all16h", +}; + +char *thread_ctrl[4] = { + [0] = "", + [2] = "switch" +}; + +char *compr_ctrl[4] = { + [0] = "", + [1] = "sechalf", + [2] = "compr", +}; + +char *dep_ctrl[4] = { + [0] = "", + [1] = "NoDDClr", + [2] = "NoDDChk", + [3] = "NoDDClr,NoDDChk", +}; + +char *mask_ctrl[4] = { + [0] = "", + [1] = "nomask", +}; + +char *access_mode[2] = { + [0] = "align1", + [1] = "align16", +}; + +char *reg_encoding[8] = { + [0] = "UD", + [1] = "D", + [2] = "UW", + [3] = "W", + [4] = "UB", + [5] = "B", + [7] = "F" +}; + +char *imm_encoding[8] = { + [0] = "UD", + [1] = "D", + [2] = "UW", + [3] = "W", + [5] = "VF", + [5] = "V", + [7] = "F" +}; + +char *reg_file[4] = { + [0] = "A", + [1] = "g", + [2] = "m", + [3] = "imm", +}; + +char *writemask[16] = { + [0x0] = ".", + [0x1] = ".x", + [0x2] = ".y", + [0x3] = ".xy", + [0x4] = ".z", + [0x5] = ".xz", + [0x6] = ".yz", + [0x7] = ".xyz", + [0x8] = ".w", + [0x9] = ".xw", + [0xa] = ".yw", + [0xb] = ".xyw", + [0xc] = ".zw", + [0xd] = ".xzw", + [0xe] = ".yzw", + [0xf] = "", +}; + +char *end_of_thread[2] = { + [0] = "", + [1] = "EOT" +}; + +char *target_function[16] = { + [BRW_MESSAGE_TARGET_NULL] = "null", + [BRW_MESSAGE_TARGET_MATH] = "math", + [BRW_MESSAGE_TARGET_SAMPLER] = "sampler", + [BRW_MESSAGE_TARGET_GATEWAY] = "gateway", + [BRW_MESSAGE_TARGET_DATAPORT_READ] = "read", + [BRW_MESSAGE_TARGET_DATAPORT_WRITE] = "write", + [BRW_MESSAGE_TARGET_URB] = "urb", + [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner" +}; + +char *math_function[16] = { + [BRW_MATH_FUNCTION_INV] = "inv", + [BRW_MATH_FUNCTION_LOG] = "log", + [BRW_MATH_FUNCTION_EXP] = "exp", + [BRW_MATH_FUNCTION_SQRT] = "sqrt", + [BRW_MATH_FUNCTION_RSQ] = "rsq", + [BRW_MATH_FUNCTION_SIN] = "sin", + [BRW_MATH_FUNCTION_COS] = "cos", + [BRW_MATH_FUNCTION_SINCOS] = "sincos", + [BRW_MATH_FUNCTION_TAN] = "tan", + [BRW_MATH_FUNCTION_POW] = "pow", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intmod", + [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intdiv", +}; + +char *math_saturate[2] = { + [0] = "", + [1] = "sat" +}; + +char *math_signed[2] = { + [0] = "", + [1] = "signed" +}; + +char *math_scalar[2] = { + [0] = "", + [1] = "scalar" +}; + +char *math_precision[2] = { + [0] = "", + [1] = "partial_precision" +}; + +char *urb_swizzle[4] = { + [BRW_URB_SWIZZLE_NONE] = "", + [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave", + [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose", +}; + +char *urb_allocate[2] = { + [0] = "", + [1] = "allocate" +}; + +char *urb_used[2] = { + [0] = "", + [1] = "used" +}; + +char *urb_complete[2] = { + [0] = "", + [1] = "complete" +}; + +char *sampler_target_format[4] = { + [0] = "F", + [2] = "UD", + [3] = "D" +}; + + +static int column; + +static int string (FILE *file, char *string) +{ + fputs (string, file); + column += strlen (string); + return 0; +} + +static int format (FILE *f, char *format, ...) +{ + char buf[1024]; + va_list args; + va_start (args, format); + + vsnprintf (buf, sizeof (buf) - 1, format, args); + string (f, buf); + return 0; +} + +static int newline (FILE *f) +{ + putc ('\n', f); + column = 0; + return 0; +} + +static int pad (FILE *f, int c) +{ + do + string (f, " "); + while (column < c); + return 0; +} + +static int control (FILE *file, char *name, char *ctrl[], GLuint id, int *space) +{ + if (!ctrl[id]) { + fprintf (file, "*** invalid %s value %d ", + name, id); + return 1; + } + if (ctrl[id][0]) + { + if (space && *space) + string (file, " "); + string (file, ctrl[id]); + if (space) + *space = 1; + } + return 0; +} + +static int print_opcode (FILE *file, int id) +{ + if (!opcode[id].name) { + format (file, "*** invalid opcode value %d ", id); + return 1; + } + string (file, opcode[id].name); + return 0; +} + +static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) +{ + int err = 0; + if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) { + switch (_reg_nr & 0xf0) { + case BRW_ARF_NULL: + string (file, "null"); + return -1; + case BRW_ARF_ADDRESS: + format (file, "a%d", _reg_nr & 0x0f); + break; + case BRW_ARF_ACCUMULATOR: + format (file, "acc%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK: + format (file, "mask%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK_STACK: + format (file, "msd%d", _reg_nr & 0x0f); + break; + case BRW_ARF_STATE: + format (file, "sr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_CONTROL: + format (file, "cr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_NOTIFICATION_COUNT: + format (file, "n%d", _reg_nr & 0x0f); + break; + case BRW_ARF_IP: + string (file, "ip"); + return -1; + break; + default: + format (file, "ARF%d", _reg_nr); + break; + } + } else { + err |= control (file, "src reg file", reg_file, _reg_file, NULL); + format (file, "%d", _reg_nr); + } + return err; +} + +static int dest (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + + if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT) + { + err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da1.dest_subreg_nr) + format (file, ".%d", inst->bits1.da1.dest_subreg_nr); + format (file, "<%d>", inst->bits1.da1.dest_horiz_stride); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL); + } + else + { + string (file, "g[a0"); + if (inst->bits1.ia1.dest_subreg_nr) + format (file, ".%d", inst->bits1.ia1.dest_subreg_nr); + if (inst->bits1.ia1.dest_indirect_offset) + format (file, " %d", inst->bits1.ia1.dest_indirect_offset); + string (file, "]"); + format (file, "<%d>", inst->bits1.ia1.dest_horiz_stride); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL); + } + } + else + { + if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT) + { + err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da16.dest_subreg_nr) + format (file, ".%d", inst->bits1.da16.dest_subreg_nr); + string (file, "<1>"); + err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL); + } + else + { + err = 1; + string (file, "Indirect align16 address mode not supported"); + } + } + + return 0; +} + +static int src_align1_region (FILE *file, + GLuint _vert_stride, GLuint _width, GLuint _horiz_stride) +{ + int err = 0; + string (file, "<"); + err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); + string (file, ","); + err |= control (file, "width", width, _width, NULL); + string (file, ","); + err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL); + string (file, ">"); + return err; +} + +static int src_da1 (FILE *file, GLuint type, GLuint _reg_file, + GLuint _vert_stride, GLuint _width, GLuint _horiz_stride, + GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + err |= reg (file, _reg_file, reg_num); + if (err == -1) + return 0; + if (sub_reg_num) + format (file, ".%d", sub_reg_num); + src_align1_region (file, _vert_stride, _width, _horiz_stride); + err |= control (file, "src reg encoding", reg_encoding, type, NULL); + return err; +} + +static int src_ia1 (FILE *file, + GLuint type, + GLuint _reg_file, + GLint _addr_imm, + GLuint _addr_subreg_nr, + GLuint _negate, + GLuint __abs, + GLuint _addr_mode, + GLuint _horiz_stride, + GLuint _width, + GLuint _vert_stride) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + string (file, "g[a0"); + if (_addr_subreg_nr) + format (file, ".%d", _addr_subreg_nr); + if (_addr_imm) + format (file, " %d", _addr_imm); + string (file, "]"); + src_align1_region (file, _vert_stride, _width, _horiz_stride); + err |= control (file, "src reg encoding", reg_encoding, type, NULL); + return err; +} + +static int src_da16 (FILE *file, + GLuint _reg_type, + GLuint _reg_file, + GLuint _vert_stride, + GLuint _reg_nr, + GLuint _subreg_nr, + GLuint __abs, + GLuint _negate, + GLuint swz_x, + GLuint swz_y, + GLuint swz_z, + GLuint swz_w) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + err |= reg (file, _reg_file, _reg_nr); + if (err == -1) + return 0; + if (_subreg_nr) + format (file, ".%d", _subreg_nr); + string (file, "<"); + err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); + string (file, ",1,1>"); + err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + err |= control (file, "channel select", chan_sel, swz_y, NULL); + err |= control (file, "channel select", chan_sel, swz_z, NULL); + err |= control (file, "channel select", chan_sel, swz_w, NULL); + } + return err; +} + + +static int imm (FILE *file, GLuint type, struct brw_instruction *inst) { + switch (type) { + case BRW_REGISTER_TYPE_UD: + format (file, "0x%08xUD", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_D: + format (file, "%dD", inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UW: + format (file, "0x%04xUW", (uint16_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_W: + format (file, "%dW", (int16_t) inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UB: + format (file, "0x%02xUB", (int8_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_VF: + format (file, "Vector Float"); + break; + case BRW_REGISTER_TYPE_V: + format (file, "0x%08xV", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_F: + format (file, "%-gF", inst->bits3.f); + } + return 0; +} + +static int src0 (FILE *file, struct brw_instruction *inst) +{ + if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE) + return imm (file, inst->bits1.da1.src0_reg_type, + inst); + else if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da1 (file, + inst->bits1.da1.src0_reg_type, + inst->bits1.da1.src0_reg_file, + inst->bits2.da1.src0_vert_stride, + inst->bits2.da1.src0_width, + inst->bits2.da1.src0_horiz_stride, + inst->bits2.da1.src0_reg_nr, + inst->bits2.da1.src0_subreg_nr, + inst->bits2.da1.src0_abs, + inst->bits2.da1.src0_negate); + } + else + { + return src_ia1 (file, + inst->bits1.ia1.src0_reg_type, + inst->bits1.ia1.src0_reg_file, + inst->bits2.ia1.src0_indirect_offset, + inst->bits2.ia1.src0_subreg_nr, + inst->bits2.ia1.src0_negate, + inst->bits2.ia1.src0_abs, + inst->bits2.ia1.src0_address_mode, + inst->bits2.ia1.src0_horiz_stride, + inst->bits2.ia1.src0_width, + inst->bits2.ia1.src0_vert_stride); + } + } + else + { + if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da16 (file, + inst->bits1.da16.src0_reg_type, + inst->bits1.da16.src0_reg_file, + inst->bits2.da16.src0_vert_stride, + inst->bits2.da16.src0_reg_nr, + inst->bits2.da16.src0_subreg_nr, + inst->bits2.da16.src0_abs, + inst->bits2.da16.src0_negate, + inst->bits2.da16.src0_swz_x, + inst->bits2.da16.src0_swz_y, + inst->bits2.da16.src0_swz_z, + inst->bits2.da16.src0_swz_w); + } + else + { + string (file, "Indirect align16 address mode not supported"); + return 1; + } + } +} + +static int src1 (FILE *file, struct brw_instruction *inst) +{ + if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE) + return imm (file, inst->bits1.da1.src1_reg_type, + inst); + else if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da1 (file, + inst->bits1.da1.src1_reg_type, + inst->bits1.da1.src1_reg_file, + inst->bits3.da1.src1_vert_stride, + inst->bits3.da1.src1_width, + inst->bits3.da1.src1_horiz_stride, + inst->bits3.da1.src1_reg_nr, + inst->bits3.da1.src1_subreg_nr, + inst->bits3.da1.src1_abs, + inst->bits3.da1.src1_negate); + } + else + { + return src_ia1 (file, + inst->bits1.ia1.src1_reg_type, + inst->bits1.ia1.src1_reg_file, + inst->bits3.ia1.src1_indirect_offset, + inst->bits3.ia1.src1_subreg_nr, + inst->bits3.ia1.src1_negate, + inst->bits3.ia1.src1_abs, + inst->bits3.ia1.src1_address_mode, + inst->bits3.ia1.src1_horiz_stride, + inst->bits3.ia1.src1_width, + inst->bits3.ia1.src1_vert_stride); + } + } + else + { + if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da16 (file, + inst->bits1.da16.src1_reg_type, + inst->bits1.da16.src1_reg_file, + inst->bits3.da16.src1_vert_stride, + inst->bits3.da16.src1_reg_nr, + inst->bits3.da16.src1_subreg_nr, + inst->bits3.da16.src1_abs, + inst->bits3.da16.src1_negate, + inst->bits3.da16.src1_swz_x, + inst->bits3.da16.src1_swz_y, + inst->bits3.da16.src1_swz_z, + inst->bits3.da16.src1_swz_w); + } + else + { + string (file, "Indirect align16 address mode not supported"); + return 1; + } + } +} + +int brw_disasm (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + int space = 0; + + if (inst->header.predicate_control) { + string (file, "("); + err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL); + string (file, "f0"); + if (inst->bits2.da1.flag_reg_nr) + format (file, ".%d", inst->bits2.da1.flag_reg_nr); + if (inst->header.access_mode == BRW_ALIGN_1) + err |= control (file, "predicate control align1", pred_ctrl_align1, + inst->header.predicate_control, NULL); + else + err |= control (file, "predicate control align16", pred_ctrl_align16, + inst->header.predicate_control, NULL); + string (file, ") "); + } + + err |= print_opcode (file, inst->header.opcode); + err |= control (file, "saturate", saturate, inst->header.saturate, NULL); + err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL); + + if (inst->header.opcode != BRW_OPCODE_SEND) + err |= control (file, "conditional modifier", conditional_modifier, + inst->header.destreg__conditionalmod, NULL); + + if (inst->header.opcode != BRW_OPCODE_NOP) { + string (file, "("); + err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL); + string (file, ")"); + } + + if (inst->header.opcode == BRW_OPCODE_SEND) + format (file, " %d", inst->header.destreg__conditionalmod); + + if (opcode[inst->header.opcode].ndst > 0) { + pad (file, 16); + err |= dest (file, inst); + } + if (opcode[inst->header.opcode].nsrc > 0) { + pad (file, 32); + err |= src0 (file, inst); + } + if (opcode[inst->header.opcode].nsrc > 1) { + pad (file, 48); + err |= src1 (file, inst); + } + + if (inst->header.opcode == BRW_OPCODE_SEND) { + newline (file); + pad (file, 16); + space = 0; + err |= control (file, "target function", target_function, + inst->bits3.generic.msg_target, &space); + switch (inst->bits3.generic.msg_target) { + case BRW_MESSAGE_TARGET_MATH: + err |= control (file, "math function", math_function, + inst->bits3.math.function, &space); + err |= control (file, "math saturate", math_saturate, + inst->bits3.math.saturate, &space); + err |= control (file, "math signed", math_signed, + inst->bits3.math.int_type, &space); + err |= control (file, "math scalar", math_scalar, + inst->bits3.math.data_type, &space); + err |= control (file, "math precision", math_precision, + inst->bits3.math.precision, &space); + break; + case BRW_MESSAGE_TARGET_SAMPLER: + format (file, " (%d, %d, ", + inst->bits3.sampler.binding_table_index, + inst->bits3.sampler.sampler); + err |= control (file, "sampler target format", sampler_target_format, + inst->bits3.sampler.return_format, NULL); + string (file, ")"); + break; + case BRW_MESSAGE_TARGET_DATAPORT_WRITE: + format (file, " (%d, %d, %d, %d)", + inst->bits3.dp_write.binding_table_index, + (inst->bits3.dp_write.pixel_scoreboard_clear << 3) | + inst->bits3.dp_write.msg_control, + inst->bits3.dp_write.msg_type, + inst->bits3.dp_write.send_commit_msg); + break; + case BRW_MESSAGE_TARGET_URB: + format (file, " %d", inst->bits3.urb.offset); + space = 1; + err |= control (file, "urb swizzle", urb_swizzle, + inst->bits3.urb.swizzle_control, &space); + err |= control (file, "urb allocate", urb_allocate, + inst->bits3.urb.allocate, &space); + err |= control (file, "urb used", urb_used, + inst->bits3.urb.used, &space); + err |= control (file, "urb complete", urb_complete, + inst->bits3.urb.complete, &space); + break; + case BRW_MESSAGE_TARGET_THREAD_SPAWNER: + break; + default: + format (file, "unsupported target %d", inst->bits3.generic.msg_target); + break; + } + if (space) + string (file, " "); + format (file, "mlen %d", + inst->bits3.generic.msg_length); + format (file, " rlen %d", + inst->bits3.generic.response_length); + } + pad (file, 64); + if (inst->header.opcode != BRW_OPCODE_NOP) { + string (file, "{"); + space = 1; + err |= control(file, "access mode", access_mode, inst->header.access_mode, &space); + err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space); + err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space); + err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space); + err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space); + if (inst->header.opcode == BRW_OPCODE_SEND) + err |= control (file, "end of thread", end_of_thread, + inst->bits3.generic.end_of_thread, &space); + if (space) + string (file, " "); + string (file, "}"); + } + string (file, ";"); + newline (file); + return err; +} diff --git a/i965/brw_draw.c b/i965/brw_draw.c index 5342622..c53bd47 100644 --- a/i965/brw_draw.c +++ b/i965/brw_draw.c @@ -141,6 +141,8 @@ static void brw_emit_prim(struct brw_context *brw, prim_packet.verts_per_instance = trim(prim->mode, prim->count); prim_packet.start_vert_location = prim->start; + if (prim->indexed) + prim_packet.start_vert_location += brw->ib.start_vertex_offset; prim_packet.instance_count = 1; prim_packet.start_instance_location = 0; prim_packet.base_vert_location = 0; @@ -185,21 +187,16 @@ static void brw_merge_inputs( struct brw_context *brw, for (i = 0; i < VERT_ATTRIB_MAX; i++) { brw->vb.inputs[i].glarray = arrays[i]; + brw->vb.inputs[i].attrib = (gl_vert_attrib) i; if (arrays[i]->StrideB != 0) - brw->vb.info.varying |= 1 << i; - brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) << ((i%16) * 2); } - /* Raise statechanges if input sizes and varying have changed: - */ + /* Raise statechanges if input sizes have changed. */ if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0) brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS; - - if (brw->vb.info.varying != old.varying) - brw->state.dirty.brw |= BRW_NEW_INPUT_VARYING; } /* XXX: could split the primitive list to fallback only on the @@ -416,6 +413,8 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, out: UNLOCK_HARDWARE(intel); + brw_state_cache_check_size(brw); + if (warn) fprintf(stderr, "i965: Single primitive emit potentially exceeded " "available aperture space\n"); @@ -426,54 +425,31 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, return retval; } -static GLboolean brw_need_rebase( GLcontext *ctx, - const struct gl_client_array *arrays[], - const struct _mesa_index_buffer *ib, - GLuint min_index ) -{ - if (min_index == 0) - return GL_FALSE; - - if (ib) { - if (!vbo_all_varyings_in_vbos(arrays)) - return GL_TRUE; - else - return GL_FALSE; - } - else { - /* Hmm. This isn't quite what I wanted. BRW can actually - * handle the mixed case well enough that we shouldn't need to - * rebase. However, it's probably not very common, nor hugely - * expensive to do it this way: - */ - if (!vbo_all_varyings_in_vbos(arrays)) - return GL_TRUE; - else - return GL_FALSE; - } -} - - void brw_draw_prims( GLcontext *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, GLuint min_index, GLuint max_index ) { GLboolean retval; - /* Decide if we want to rebase. If so we end up recursing once - * only into this function. - */ - if (brw_need_rebase( ctx, arrays, ib, min_index )) { - vbo_rebase_prims( ctx, arrays, - prim, nr_prims, - ib, min_index, max_index, - brw_draw_prims ); - - return; + if (!vbo_all_varyings_in_vbos(arrays)) { + if (!index_bounds_valid) + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + + /* Decide if we want to rebase. If so we end up recursing once + * only into this function. + */ + if (min_index != 0) { + vbo_rebase_prims(ctx, arrays, + prim, nr_prims, + ib, min_index, max_index, + brw_draw_prims ); + return; + } } /* Make a first attempt at drawing: diff --git a/i965/brw_draw.h b/i965/brw_draw.h index 9aebbdb..2a14db2 100644 --- a/i965/brw_draw.h +++ b/i965/brw_draw.h @@ -39,6 +39,7 @@ void brw_draw_prims( GLcontext *ctx, const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, GLuint min_index, GLuint max_index ); diff --git a/i965/brw_draw_upload.c b/i965/brw_draw_upload.c index b91b20b..4aa17fa 100644 --- a/i965/brw_draw_upload.c +++ b/i965/brw_draw_upload.c @@ -343,16 +343,13 @@ static void brw_prepare_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); - GLuint tmp = brw->vs.prog_data->inputs_read; + GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; GLuint i; const unsigned char *ptr = NULL; GLuint interleave = 0; unsigned int min_index = brw->vb.min_index; unsigned int max_index = brw->vb.max_index; - struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; - GLuint nr_enabled = 0; - struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; @@ -362,12 +359,13 @@ static void brw_prepare_vertices(struct brw_context *brw) _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ - while (tmp) { - GLuint i = _mesa_ffsll(tmp)-1; + brw->vb.nr_enabled = 0; + while (vs_inputs) { + GLuint i = _mesa_ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; - tmp &= ~(1<<i); - enabled[nr_enabled++] = input; + vs_inputs &= ~(1 << i); + brw->vb.enabled[brw->vb.nr_enabled++] = input; } /* XXX: In the rare cases where this happens we fallback all @@ -376,16 +374,15 @@ static void brw_prepare_vertices(struct brw_context *brw) * cases with > 17 vertex attributes enabled, so it probably * isn't an issue at this point. */ - if (nr_enabled >= BRW_VEP_MAX) { + if (brw->vb.nr_enabled >= BRW_VEP_MAX) { intel->Fallback = 1; return; } - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; input->element_size = get_size(input->glarray->Type) * input->glarray->Size; - input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1; if (input->glarray->BufferObj->Name != 0) { struct intel_buffer_object *intel_buffer = @@ -398,7 +395,23 @@ static void brw_prepare_vertices(struct brw_context *brw) dri_bo_reference(input->bo); input->offset = (unsigned long)input->glarray->Ptr; input->stride = input->glarray->StrideB; + input->count = input->glarray->_MaxElement; + + /* This is a common place to reach if the user mistakenly supplies + * a pointer in place of a VBO offset. If we just let it go through, + * we may end up dereferencing a pointer beyond the bounds of the + * GTT. We would hope that the VBO's max_index would save us, but + * Mesa appears to hand us min/max values not clipped to the + * array object's _MaxElement, and _MaxElement frequently appears + * to be wrong anyway. + * + * The VBO spec allows application termination in this case, and it's + * probably a service to the poor programmer to do so rather than + * trying to just not render. + */ + assert(input->offset < input->bo->size); } else { + input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1; if (input->bo != NULL) { /* Already-uploaded vertex data is present from a previous * prepare_vertices, but we had to re-validate state due to @@ -410,7 +423,7 @@ static void brw_prepare_vertices(struct brw_context *brw) /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ - if (i == 0) { + if (input->attrib == VERT_ATTRIB_POS) { /* Position array not properly enabled: */ if (input->glarray->StrideB == 0) { @@ -466,8 +479,8 @@ static void brw_prepare_vertices(struct brw_context *brw) brw_prepare_query_begin(brw); - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; brw_add_validated_bo(brw, input->bo); } @@ -477,34 +490,44 @@ static void brw_emit_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); - GLuint tmp = brw->vs.prog_data->inputs_read; - struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; GLuint i; - GLuint nr_enabled = 0; - /* Accumulate the list of enabled arrays. */ - while (tmp) { - i = _mesa_ffsll(tmp)-1; - struct brw_vertex_element *input = &brw->vb.inputs[i]; + brw_emit_query_begin(brw); - tmp &= ~(1<<i); - enabled[nr_enabled++] = input; + /* If the VS doesn't read any inputs (calculating vertex position from + * a state variable for some reason, for example), emit a single pad + * VERTEX_ELEMENT struct and bail. + * + * The stale VB state stays in place, but they don't do anything unless + * a VE loads from them. + */ + if (brw->vb.nr_enabled == 0) { + BEGIN_BATCH(3, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1); + OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | + (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); + ADVANCE_BATCH(); + return; } - brw_emit_query_begin(brw); - /* Now emit VB and VEP state packets. * * This still defines a hardware VB for each input, even if they * are interleaved or from the same VBO. TBD if this makes a * performance difference. */ - BEGIN_BATCH(1 + nr_enabled * 4, IGNORE_CLIPRECTS); + BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS); OUT_BATCH((CMD_VERTEX_BUFFER << 16) | - ((1 + nr_enabled * 4) - 2)); + ((1 + brw->vb.nr_enabled * 4) - 2)); - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) | BRW_VB0_ACCESS_VERTEXDATA | @@ -512,15 +535,27 @@ static void brw_emit_vertices(struct brw_context *brw) OUT_RELOC(input->bo, I915_GEM_DOMAIN_VERTEX, 0, input->offset); - OUT_BATCH(brw->vb.max_index); + if (BRW_IS_IGDNG(brw)) { + if (input->stride) { + OUT_RELOC(input->bo, + I915_GEM_DOMAIN_VERTEX, 0, + input->offset + input->stride * input->count); + } else { + assert(input->count == 1); + OUT_RELOC(input->bo, + I915_GEM_DOMAIN_VERTEX, 0, + input->offset + input->element_size); + } + } else + OUT_BATCH(input->stride ? input->count : 0); OUT_BATCH(0); /* Instance data step rate */ } ADVANCE_BATCH(); - BEGIN_BATCH(1 + nr_enabled * 2, IGNORE_CLIPRECTS); - OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr_enabled * 2) - 2)); - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2)); + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; uint32_t format = get_surface_type(input->glarray->Type, input->glarray->Size, input->glarray->Format, @@ -542,11 +577,18 @@ static void brw_emit_vertices(struct brw_context *brw) BRW_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | (0 << BRW_VE0_SRC_OFFSET_SHIFT)); - OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | - (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | - (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | - (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | - ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); + + if (BRW_IS_IGDNG(brw)) + OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | + (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | + (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | + (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); + else + OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | + (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | + (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | + (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | + ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); } ADVANCE_BATCH(); } @@ -570,17 +612,20 @@ static void brw_prepare_indices(struct brw_context *brw) dri_bo *bo = NULL; struct gl_buffer_object *bufferobj; GLuint offset; + GLuint ib_type_size; if (index_buffer == NULL) return; - ib_size = get_size(index_buffer->type) * index_buffer->count; + ib_type_size = get_size(index_buffer->type); + ib_size = ib_type_size * index_buffer->count; bufferobj = index_buffer->obj;; /* Turn into a proper VBO: */ if (!bufferobj->Name) { - + brw->ib.start_vertex_offset = 0; + /* Get new bufferobj, offset: */ get_space(brw, ib_size, &bo, &offset); @@ -596,6 +641,7 @@ static void brw_prepare_indices(struct brw_context *brw) } } else { offset = (GLuint) (unsigned long) index_buffer->ptr; + brw->ib.start_vertex_offset = 0; /* If the index buffer isn't aligned to its element size, we have to * rebase it into a temporary. @@ -616,39 +662,62 @@ static void brw_prepare_indices(struct brw_context *brw) bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj), INTEL_READ); dri_bo_reference(bo); + + /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading + * the index buffer state when we're just moving the start index + * of our drawing. + */ + brw->ib.start_vertex_offset = offset / ib_type_size; + offset = 0; + ib_size = bo->size; } } - dri_bo_unreference(brw->ib.bo); - brw->ib.bo = bo; - brw->ib.offset = offset; + if (brw->ib.bo != bo || + brw->ib.offset != offset || + brw->ib.size != ib_size) + { + drm_intel_bo_unreference(brw->ib.bo); + brw->ib.bo = bo; + brw->ib.offset = offset; + brw->ib.size = ib_size; + + brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER; + } else { + drm_intel_bo_unreference(bo); + } brw_add_validated_bo(brw, brw->ib.bo); } -static void brw_emit_indices(struct brw_context *brw) +const struct brw_tracked_state brw_indices = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_INDICES, + .cache = 0, + }, + .prepare = brw_prepare_indices, +}; + +static void brw_emit_index_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; const struct _mesa_index_buffer *index_buffer = brw->ib.ib; - GLuint ib_size; if (index_buffer == NULL) return; - ib_size = get_size(index_buffer->type) * index_buffer->count; - /* Emit the indexbuffer packet: */ { struct brw_indexbuffer ib; memset(&ib, 0, sizeof(ib)); - + ib.header.bits.opcode = CMD_INDEX_BUFFER; ib.header.bits.length = sizeof(ib)/4 - 2; ib.header.bits.index_format = get_index_type(index_buffer->type); ib.header.bits.cut_index_enable = 0; - BEGIN_BATCH(4, IGNORE_CLIPRECTS); OUT_BATCH( ib.header.dword ); @@ -657,18 +726,17 @@ static void brw_emit_indices(struct brw_context *brw) brw->ib.offset); OUT_RELOC(brw->ib.bo, I915_GEM_DOMAIN_VERTEX, 0, - brw->ib.offset + ib_size); + brw->ib.offset + brw->ib.size); OUT_BATCH( 0 ); ADVANCE_BATCH(); } } -const struct brw_tracked_state brw_indices = { +const struct brw_tracked_state brw_index_buffer = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH | BRW_NEW_INDICES, + .brw = BRW_NEW_BATCH | BRW_NEW_INDEX_BUFFER, .cache = 0, }, - .prepare = brw_prepare_indices, - .emit = brw_emit_indices, + .emit = brw_emit_index_buffer, }; diff --git a/i965/brw_eu.c b/i965/brw_eu.c index c53efba..1df5613 100644 --- a/i965/brw_eu.c +++ b/i965/brw_eu.c @@ -62,7 +62,7 @@ void brw_set_predicate_control( struct brw_compile *p, GLuint pc ) void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ) { - p->current->header.destreg__conditonalmod = conditional; + p->current->header.destreg__conditionalmod = conditional; } void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ) diff --git a/i965/brw_eu.h b/i965/brw_eu.h index 003332f..30603bd 100644 --- a/i965/brw_eu.h +++ b/i965/brw_eu.h @@ -97,7 +97,7 @@ struct brw_glsl_call; #define BRW_EU_MAX_INSN_STACK 5 -#define BRW_EU_MAX_INSN 4000 +#define BRW_EU_MAX_INSN 10000 struct brw_compile { struct brw_instruction store[BRW_EU_MAX_INSN]; @@ -171,9 +171,9 @@ static INLINE struct brw_reg brw_reg( GLuint file, { struct brw_reg reg; if (type == BRW_GENERAL_REGISTER_FILE) - assert(nr < 128); + assert(nr < BRW_MAX_GRF); else if (type == BRW_MESSAGE_REGISTER_FILE) - assert(nr < 9); + assert(nr < BRW_MAX_MRF); else if (type == BRW_ARCHITECTURE_REGISTER_FILE) assert(nr <= BRW_ARF_IP); @@ -538,6 +538,7 @@ static INLINE struct brw_reg brw_mask_reg( GLuint subnr ) static INLINE struct brw_reg brw_message_reg( GLuint nr ) { + assert(nr < BRW_MAX_MRF); return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0); @@ -815,6 +816,19 @@ void brw_urb_WRITE(struct brw_compile *p, GLuint offset, GLuint swizzle); +void brw_ff_sync(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean eot, + GLboolean writes_complete, + GLuint offset, + GLuint swizzle); + void brw_fb_WRITE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, @@ -834,7 +848,9 @@ void brw_SAMPLE(struct brw_compile *p, GLuint msg_type, GLuint response_length, GLuint msg_length, - GLboolean eot); + GLboolean eot, + GLuint header_present, + GLuint simd_mode); void brw_math_16( struct brw_compile *p, struct brw_reg dest, diff --git a/i965/brw_eu_emit.c b/i965/brw_eu_emit.c index 2a147fb..241cdc3 100644 --- a/i965/brw_eu_emit.c +++ b/i965/brw_eu_emit.c @@ -241,7 +241,8 @@ void brw_set_src1( struct brw_instruction *insn, -static void brw_set_math_message( struct brw_instruction *insn, +static void brw_set_math_message( struct brw_context *brw, + struct brw_instruction *insn, GLuint msg_length, GLuint response_length, GLuint function, @@ -252,18 +253,35 @@ static void brw_set_math_message( struct brw_instruction *insn, { brw_set_src1(insn, brw_imm_d(0)); - insn->bits3.math.function = function; - insn->bits3.math.int_type = integer_type; - insn->bits3.math.precision = low_precision; - insn->bits3.math.saturate = saturate; - insn->bits3.math.data_type = dataType; - insn->bits3.math.response_length = response_length; - insn->bits3.math.msg_length = msg_length; - insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; - insn->bits3.math.end_of_thread = 0; + if (BRW_IS_IGDNG(brw)) { + insn->bits3.math_igdng.function = function; + insn->bits3.math_igdng.int_type = integer_type; + insn->bits3.math_igdng.precision = low_precision; + insn->bits3.math_igdng.saturate = saturate; + insn->bits3.math_igdng.data_type = dataType; + insn->bits3.math_igdng.snapshot = 0; + insn->bits3.math_igdng.header_present = 0; + insn->bits3.math_igdng.response_length = response_length; + insn->bits3.math_igdng.msg_length = msg_length; + insn->bits3.math_igdng.end_of_thread = 0; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH; + insn->bits2.send_igdng.end_of_thread = 0; + } else { + insn->bits3.math.function = function; + insn->bits3.math.int_type = integer_type; + insn->bits3.math.precision = low_precision; + insn->bits3.math.saturate = saturate; + insn->bits3.math.data_type = dataType; + insn->bits3.math.response_length = response_length; + insn->bits3.math.msg_length = msg_length; + insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; + insn->bits3.math.end_of_thread = 0; + } } -static void brw_set_urb_message( struct brw_instruction *insn, + +static void brw_set_ff_sync_message( struct brw_context *brw, + struct brw_instruction *insn, GLboolean allocate, GLboolean used, GLuint msg_length, @@ -273,21 +291,64 @@ static void brw_set_urb_message( struct brw_instruction *insn, GLuint offset, GLuint swizzle_control ) { - brw_set_src1(insn, brw_imm_d(0)); + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.urb_igdng.opcode = 1; + insn->bits3.urb_igdng.offset = offset; + insn->bits3.urb_igdng.swizzle_control = swizzle_control; + insn->bits3.urb_igdng.allocate = allocate; + insn->bits3.urb_igdng.used = used; + insn->bits3.urb_igdng.complete = complete; + insn->bits3.urb_igdng.header_present = 1; + insn->bits3.urb_igdng.response_length = response_length; + insn->bits3.urb_igdng.msg_length = msg_length; + insn->bits3.urb_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_igdng.end_of_thread = end_of_thread; +} - insn->bits3.urb.opcode = 0; /* ? */ - insn->bits3.urb.offset = offset; - insn->bits3.urb.swizzle_control = swizzle_control; - insn->bits3.urb.allocate = allocate; - insn->bits3.urb.used = used; /* ? */ - insn->bits3.urb.complete = complete; - insn->bits3.urb.response_length = response_length; - insn->bits3.urb.msg_length = msg_length; - insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; - insn->bits3.urb.end_of_thread = end_of_thread; +static void brw_set_urb_message( struct brw_context *brw, + struct brw_instruction *insn, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean end_of_thread, + GLboolean complete, + GLuint offset, + GLuint swizzle_control ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + if (BRW_IS_IGDNG(brw)) { + insn->bits3.urb_igdng.opcode = 0; /* ? */ + insn->bits3.urb_igdng.offset = offset; + insn->bits3.urb_igdng.swizzle_control = swizzle_control; + insn->bits3.urb_igdng.allocate = allocate; + insn->bits3.urb_igdng.used = used; /* ? */ + insn->bits3.urb_igdng.complete = complete; + insn->bits3.urb_igdng.header_present = 1; + insn->bits3.urb_igdng.response_length = response_length; + insn->bits3.urb_igdng.msg_length = msg_length; + insn->bits3.urb_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } else { + insn->bits3.urb.opcode = 0; /* ? */ + insn->bits3.urb.offset = offset; + insn->bits3.urb.swizzle_control = swizzle_control; + insn->bits3.urb.allocate = allocate; + insn->bits3.urb.used = used; /* ? */ + insn->bits3.urb.complete = complete; + insn->bits3.urb.response_length = response_length; + insn->bits3.urb.msg_length = msg_length; + insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; + insn->bits3.urb.end_of_thread = end_of_thread; + } } -static void brw_set_dp_write_message( struct brw_instruction *insn, +static void brw_set_dp_write_message( struct brw_context *brw, + struct brw_instruction *insn, GLuint binding_table_index, GLuint msg_control, GLuint msg_type, @@ -298,18 +359,33 @@ static void brw_set_dp_write_message( struct brw_instruction *insn, { brw_set_src1(insn, brw_imm_d(0)); - insn->bits3.dp_write.binding_table_index = binding_table_index; - insn->bits3.dp_write.msg_control = msg_control; - insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; - insn->bits3.dp_write.msg_type = msg_type; - insn->bits3.dp_write.send_commit_msg = 0; - insn->bits3.dp_write.response_length = response_length; - insn->bits3.dp_write.msg_length = msg_length; - insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; - insn->bits3.urb.end_of_thread = end_of_thread; + if (BRW_IS_IGDNG(brw)) { + insn->bits3.dp_write_igdng.binding_table_index = binding_table_index; + insn->bits3.dp_write_igdng.msg_control = msg_control; + insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write_igdng.msg_type = msg_type; + insn->bits3.dp_write_igdng.send_commit_msg = 0; + insn->bits3.dp_write_igdng.header_present = 1; + insn->bits3.dp_write_igdng.response_length = response_length; + insn->bits3.dp_write_igdng.msg_length = msg_length; + insn->bits3.dp_write_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } else { + insn->bits3.dp_write.binding_table_index = binding_table_index; + insn->bits3.dp_write.msg_control = msg_control; + insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write.msg_type = msg_type; + insn->bits3.dp_write.send_commit_msg = 0; + insn->bits3.dp_write.response_length = response_length; + insn->bits3.dp_write.msg_length = msg_length; + insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits3.dp_write.end_of_thread = end_of_thread; + } } -static void brw_set_dp_read_message( struct brw_instruction *insn, +static void brw_set_dp_read_message( struct brw_context *brw, + struct brw_instruction *insn, GLuint binding_table_index, GLuint msg_control, GLuint msg_type, @@ -320,15 +396,29 @@ static void brw_set_dp_read_message( struct brw_instruction *insn, { brw_set_src1(insn, brw_imm_d(0)); - insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ - insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ - insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ - insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ - insn->bits3.dp_read.response_length = response_length; /*16:19*/ - insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ - insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ - insn->bits3.dp_read.pad1 = 0; /*28:30*/ - insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/ + if (BRW_IS_IGDNG(brw)) { + insn->bits3.dp_read_igdng.binding_table_index = binding_table_index; + insn->bits3.dp_read_igdng.msg_control = msg_control; + insn->bits3.dp_read_igdng.msg_type = msg_type; + insn->bits3.dp_read_igdng.target_cache = target_cache; + insn->bits3.dp_read_igdng.header_present = 1; + insn->bits3.dp_read_igdng.response_length = response_length; + insn->bits3.dp_read_igdng.msg_length = msg_length; + insn->bits3.dp_read_igdng.pad1 = 0; + insn->bits3.dp_read_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } else { + insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ + insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ + insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ + insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ + insn->bits3.dp_read.response_length = response_length; /*16:19*/ + insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ + insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ + insn->bits3.dp_read.pad1 = 0; /*28:30*/ + insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/ + } } static void brw_set_sampler_message(struct brw_context *brw, @@ -338,11 +428,25 @@ static void brw_set_sampler_message(struct brw_context *brw, GLuint msg_type, GLuint response_length, GLuint msg_length, - GLboolean eot) + GLboolean eot, + GLuint header_present, + GLuint simd_mode) { + assert(eot == 0); brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_G4X(brw)) { + if (BRW_IS_IGDNG(brw)) { + insn->bits3.sampler_igdng.binding_table_index = binding_table_index; + insn->bits3.sampler_igdng.sampler = sampler; + insn->bits3.sampler_igdng.msg_type = msg_type; + insn->bits3.sampler_igdng.simd_mode = simd_mode; + insn->bits3.sampler_igdng.header_present = header_present; + insn->bits3.sampler_igdng.response_length = response_length; + insn->bits3.sampler_igdng.msg_length = msg_length; + insn->bits3.sampler_igdng.end_of_thread = eot; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER; + insn->bits2.send_igdng.end_of_thread = eot; + } else if (BRW_IS_G4X(brw)) { insn->bits3.sampler_g4x.binding_table_index = binding_table_index; insn->bits3.sampler_g4x.sampler = sampler; insn->bits3.sampler_g4x.msg_type = msg_type; @@ -377,8 +481,8 @@ static struct brw_instruction *next_insn( struct brw_compile *p, /* Reset this one-shot flag: */ - if (p->current->header.destreg__conditonalmod) { - p->current->header.destreg__conditonalmod = 0; + if (p->current->header.destreg__conditionalmod) { + p->current->header.destreg__conditionalmod = 0; p->current->header.predicate_control = BRW_PREDICATE_NORMAL; } @@ -484,6 +588,10 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p, { struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); + insn->header.execution_size = 1; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.mask_control = BRW_MASK_DISABLE; + p->current->header.predicate_control = BRW_PREDICATE_NONE; return insn; @@ -540,6 +648,10 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, struct brw_instruction *if_insn) { struct brw_instruction *insn; + GLuint br = 1; + + if (BRW_IS_IGDNG(p->brw)) + br = 2; if (p->single_program_flow) { insn = next_insn(p, BRW_OPCODE_ADD); @@ -566,8 +678,8 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, } else { assert(if_insn->header.opcode == BRW_OPCODE_IF); - if_insn->bits3.if_else.jump_count = insn - if_insn; - if_insn->bits3.if_else.pop_count = 1; + if_insn->bits3.if_else.jump_count = br * (insn - if_insn); + if_insn->bits3.if_else.pop_count = 0; if_insn->bits3.if_else.pad0 = 0; } @@ -577,6 +689,11 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, void brw_ENDIF(struct brw_compile *p, struct brw_instruction *patch_insn) { + GLuint br = 1; + + if (BRW_IS_IGDNG(p->brw)) + br = 2; + if (p->single_program_flow) { /* In single program flow mode, there's no need to execute an ENDIF, * since we don't need to do any stack operations, and if we're executing @@ -608,11 +725,11 @@ void brw_ENDIF(struct brw_compile *p, /* Automagically turn it into an IFF: */ patch_insn->header.opcode = BRW_OPCODE_IFF; - patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); patch_insn->bits3.if_else.pop_count = 0; patch_insn->bits3.if_else.pad0 = 0; } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { - patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); patch_insn->bits3.if_else.pop_count = 1; patch_insn->bits3.if_else.pad0 = 0; } else { @@ -686,6 +803,10 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *do_insn) { struct brw_instruction *insn; + GLuint br = 1; + + if (BRW_IS_IGDNG(p->brw)) + br = 2; if (p->single_program_flow) insn = next_insn(p, BRW_OPCODE_ADD); @@ -706,7 +827,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, insn->header.execution_size = do_insn->header.execution_size; assert(do_insn->header.opcode == BRW_OPCODE_DO); - insn->bits3.if_else.jump_count = do_insn - insn + 1; + insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); insn->bits3.if_else.pop_count = 0; insn->bits3.if_else.pad0 = 0; } @@ -725,11 +846,15 @@ void brw_land_fwd_jump(struct brw_compile *p, struct brw_instruction *jmp_insn) { struct brw_instruction *landing = &p->store[p->nr_insn]; + GLuint jmpi = 1; + + if (BRW_IS_IGDNG(p->brw)) + jmpi = 2; assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); - jmp_insn->bits3.ud = (landing - jmp_insn) - 1; + jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); } @@ -746,7 +871,7 @@ void brw_CMP(struct brw_compile *p, { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); - insn->header.destreg__conditonalmod = conditional; + insn->header.destreg__conditionalmod = conditional; brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, src1); @@ -790,11 +915,12 @@ void brw_math( struct brw_compile *p, * instructions. */ insn->header.predicate_control = 0; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); - brw_set_math_message(insn, + brw_set_math_message(p->brw, + insn, msg_length, response_length, function, BRW_MATH_INTEGER_UNSIGNED, @@ -826,11 +952,12 @@ void brw_math_16( struct brw_compile *p, brw_set_compression_control(p, BRW_COMPRESSION_NONE); insn = next_insn(p, BRW_OPCODE_SEND); - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); - brw_set_math_message(insn, + brw_set_math_message(p->brw, + insn, msg_length, response_length, function, BRW_MATH_INTEGER_UNSIGNED, @@ -842,11 +969,12 @@ void brw_math_16( struct brw_compile *p, */ insn = next_insn(p, BRW_OPCODE_SEND); insn->header.compression_control = BRW_COMPRESSION_2NDHALF; - insn->header.destreg__conditonalmod = msg_reg_nr+1; + insn->header.destreg__conditionalmod = msg_reg_nr+1; brw_set_dest(insn, offset(dest,1)); brw_set_src0(insn, src); - brw_set_math_message(insn, + brw_set_math_message(p->brw, + insn, msg_length, response_length, function, BRW_MATH_INTEGER_UNSIGNED, @@ -888,12 +1016,13 @@ void brw_dp_WRITE_16( struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); - brw_set_dp_write_message(insn, + brw_set_dp_write_message(p->brw, + insn, 255, /* binding table index (255=stateless) */ BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ @@ -933,12 +1062,13 @@ void brw_dp_READ_16( struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); /* UW? */ brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); - brw_set_dp_read_message(insn, + brw_set_dp_read_message(p->brw, + insn, 255, /* binding table index (255=stateless) */ 3, /* msg_control (3 means 4 Owords) */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ @@ -986,7 +1116,7 @@ void brw_dp_READ_4( struct brw_compile *p, insn->header.predicate_control = BRW_PREDICATE_NONE; insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; insn->header.mask_control = BRW_MASK_DISABLE; /* cast dest to a uword[8] vector */ @@ -995,7 +1125,8 @@ void brw_dp_READ_4( struct brw_compile *p, brw_set_dest(insn, dest); brw_set_src0(insn, brw_null_reg()); - brw_set_dp_read_message(insn, + brw_set_dp_read_message(p->brw, + insn, bind_table_index, 0, /* msg_control (0 means 1 Oword) */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ @@ -1059,14 +1190,15 @@ void brw_dp_READ_4_vs(struct brw_compile *p, insn->header.predicate_control = BRW_PREDICATE_NONE; insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; insn->header.mask_control = BRW_MASK_DISABLE; /*insn->header.access_mode = BRW_ALIGN_16;*/ brw_set_dest(insn, dest); brw_set_src0(insn, brw_null_reg()); - brw_set_dp_read_message(insn, + brw_set_dp_read_message(p->brw, + insn, bind_table_index, oword, /* 0 = lower Oword, 1 = upper Oword */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ @@ -1092,11 +1224,12 @@ void brw_fb_WRITE(struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src0); - brw_set_dp_write_message(insn, + brw_set_dp_write_message(p->brw, + insn, binding_table_index, BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ @@ -1122,7 +1255,9 @@ void brw_SAMPLE(struct brw_compile *p, GLuint msg_type, GLuint response_length, GLuint msg_length, - GLboolean eot) + GLboolean eot, + GLuint header_present, + GLuint simd_mode) { GLboolean need_stall = 0; @@ -1187,7 +1322,7 @@ void brw_SAMPLE(struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src0); @@ -1197,7 +1332,9 @@ void brw_SAMPLE(struct brw_compile *p, msg_type, response_length, msg_length, - eot); + eot, + header_present, + simd_mode); } if (need_stall) { @@ -1232,15 +1369,16 @@ void brw_urb_WRITE(struct brw_compile *p, { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - assert(msg_length < 16); + assert(msg_length < BRW_MAX_MRF); brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; - brw_set_urb_message(insn, + brw_set_urb_message(p->brw, + insn, allocate, used, msg_length, @@ -1251,3 +1389,37 @@ void brw_urb_WRITE(struct brw_compile *p, swizzle); } +void brw_ff_sync(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean eot, + GLboolean writes_complete, + GLuint offset, + GLuint swizzle) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + assert(msg_length < 16); + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, brw_imm_d(0)); + + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_ff_sync_message(p->brw, + insn, + allocate, + used, + msg_length, + response_length, + eot, + writes_complete, + offset, + swizzle); +} diff --git a/i965/brw_fallback.c b/i965/brw_fallback.c index 2993574..d27c6c2 100644 --- a/i965/brw_fallback.c +++ b/i965/brw_fallback.c @@ -37,6 +37,9 @@ #include "tnl/tnl.h" #include "brw_context.h" #include "brw_fallback.h" +#include "intel_chipset.h" +#include "intel_fbo.h" +#include "intel_regions.h" #include "glapi/glapi.h" @@ -44,6 +47,7 @@ static GLboolean do_check_fallback(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; GLcontext *ctx = &brw->intel.ctx; GLuint i; @@ -81,6 +85,33 @@ static GLboolean do_check_fallback(struct brw_context *brw) return GL_TRUE; } + /* _NEW_BUFFERS */ + if (IS_965(intel->intelScreen->deviceID) && + !IS_G4X(intel->intelScreen->deviceID)) { + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + /* The original gen4 hardware couldn't set up WM surfaces pointing + * at an offset within a tile, which can happen when rendering to + * anything but the base level of a texture or the +X face/0 depth. + * This was fixed with the 4 Series hardware. + * + * For these original chips, you would have to make the depth and + * color destination surfaces include information on the texture + * type, LOD, face, and various limits to use them as a destination. + * I would have done this, but there's also a nasty requirement that + * the depth and the color surfaces all be of the same LOD, which + * may be a worse requirement than this alignment. (Also, we may + * want to just demote the texture to untiled, instead). + */ + if (irb->region && irb->region->tiling != I915_TILING_NONE && + (irb->region->draw_offset & 4095)) { + DBG("FALLBACK: non-tile-aligned destination for tiled FBO\n"); + return GL_TRUE; + } + } + } return GL_FALSE; } diff --git a/i965/brw_gs.c b/i965/brw_gs.c index a8b74a0..48c2b9a 100644 --- a/i965/brw_gs.c +++ b/i965/brw_gs.c @@ -54,12 +54,17 @@ static void compile_gs_prog( struct brw_context *brw, memset(&c, 0, sizeof(c)); c.key = *key; - + c.need_ff_sync = BRW_IS_IGDNG(brw); /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.nr_attrs = brw_count_bits(c.key.attrs); - c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + + if (BRW_IS_IGDNG(brw)) + c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ + else + c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + c.nr_bytes = c.nr_regs * REG_SIZE; diff --git a/i965/brw_gs.h b/i965/brw_gs.h index 18a4537..bbb991e 100644 --- a/i965/brw_gs.h +++ b/i965/brw_gs.h @@ -62,6 +62,7 @@ struct brw_gs_compile { GLuint nr_attrs; GLuint nr_regs; GLuint nr_bytes; + GLboolean need_ff_sync; }; #define ATTR_SIZE (4*4) diff --git a/i965/brw_gs_emit.c b/i965/brw_gs_emit.c index 22e0d25..a9b2aa2 100644 --- a/i965/brw_gs_emit.c +++ b/i965/brw_gs_emit.c @@ -101,6 +101,23 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c, BRW_URB_SWIZZLE_NONE); } +static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim) +{ + struct brw_compile *p = &c->func; + brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim)); + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, + 1, /* used */ + 1, /* msg length */ + 1, /* response length */ + 0, /* eot */ + 1, /* write compelete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} void brw_gs_quads( struct brw_gs_compile *c ) @@ -110,6 +127,8 @@ void brw_gs_quads( struct brw_gs_compile *c ) /* Use polygons for correct edgeflag behaviour. Note that vertex 3 * is the PV for quads, but vertex 0 for polygons: */ + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); @@ -120,6 +139,8 @@ void brw_gs_quad_strip( struct brw_gs_compile *c ) { brw_gs_alloc_regs(c, 4); + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2)); brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); @@ -129,6 +150,9 @@ void brw_gs_quad_strip( struct brw_gs_compile *c ) void brw_gs_tris( struct brw_gs_compile *c ) { brw_gs_alloc_regs(c, 3); + + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START)); brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2)); brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END)); @@ -137,6 +161,9 @@ void brw_gs_tris( struct brw_gs_compile *c ) void brw_gs_lines( struct brw_gs_compile *c ) { brw_gs_alloc_regs(c, 2); + + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START)); brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END)); } @@ -144,6 +171,9 @@ void brw_gs_lines( struct brw_gs_compile *c ) void brw_gs_points( struct brw_gs_compile *c ) { brw_gs_alloc_regs(c, 1); + + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END)); } diff --git a/i965/brw_gs_state.c b/i965/brw_gs_state.c index 27023cf..a761c03 100644 --- a/i965/brw_gs_state.c +++ b/i965/brw_gs_state.c @@ -95,6 +95,9 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) gs.thread4.max_threads = 0; /* Hardware requirement */ + if (BRW_IS_IGDNG(brw)) + gs.thread4.rendering_enable = 1; + if (INTEL_DEBUG & DEBUG_STATS) gs.thread4.stats_enable = 1; diff --git a/i965/brw_misc_state.c b/i965/brw_misc_state.c index 9bc5c35..ea71857 100644 --- a/i965/brw_misc_state.c +++ b/i965/brw_misc_state.c @@ -118,7 +118,10 @@ static void upload_binding_table_pointers(struct brw_context *brw) BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); - OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ + if (brw->vs.bind_bo != NULL) + OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ + else + OUT_BATCH(0); OUT_BATCH(0); /* gs */ OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ @@ -169,6 +172,7 @@ static void prepare_psp_urb_cbs(struct brw_context *brw) brw_add_validated_bo(brw, brw->vs.state_bo); brw_add_validated_bo(brw, brw->gs.state_bo); brw_add_validated_bo(brw, brw->clip.state_bo); + brw_add_validated_bo(brw, brw->sf.state_bo); brw_add_validated_bo(brw, brw->wm.state_bo); brw_add_validated_bo(brw, brw->cc.state_bo); } @@ -208,7 +212,7 @@ static void emit_depthbuffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct intel_region *region = brw->state.depth_region; - unsigned int len = BRW_IS_G4X(brw) ? 6 : 5; + unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5; if (region == NULL) { BEGIN_BATCH(len, IGNORE_CLIPRECTS); @@ -219,7 +223,7 @@ static void emit_depthbuffer(struct brw_context *brw) OUT_BATCH(0); OUT_BATCH(0); - if (BRW_IS_G4X(brw)) + if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) OUT_BATCH(0); ADVANCE_BATCH(); @@ -241,6 +245,8 @@ static void emit_depthbuffer(struct brw_context *brw) return; } + assert(region->tiling != I915_TILING_X); + BEGIN_BATCH(len, IGNORE_CLIPRECTS); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH(((region->pitch * region->cpp) - 1) | @@ -256,7 +262,7 @@ static void emit_depthbuffer(struct brw_context *brw) ((region->height - 1) << 19)); OUT_BATCH(0); - if (BRW_IS_G4X(brw)) + if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) OUT_BATCH(0); ADVANCE_BATCH(); @@ -369,7 +375,7 @@ static void upload_aa_line_parameters(struct brw_context *brw) { struct brw_aa_line_parameters balp; - if (!BRW_IS_G4X(brw)) + if (BRW_IS_965(brw)) return; /* use legacy aa line coverage computation */ @@ -506,14 +512,27 @@ static void upload_state_base_address( struct brw_context *brw ) /* Output the structure (brw_state_base_address) directly to the * batchbuffer, so we can emit relocations inline. */ - BEGIN_BATCH(6, IGNORE_CLIPRECTS); - OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); - OUT_BATCH(1); /* General state base address */ - OUT_BATCH(1); /* Surface state base address */ - OUT_BATCH(1); /* Indirect object base address */ - OUT_BATCH(1); /* General state upper bound */ - OUT_BATCH(1); /* Indirect object upper bound */ - ADVANCE_BATCH(); + if (BRW_IS_IGDNG(brw)) { + BEGIN_BATCH(8, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); + OUT_BATCH(1); /* General state base address */ + OUT_BATCH(1); /* Surface state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* Instruction base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + OUT_BATCH(1); /* Instruction access upper bound */ + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(6, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); + OUT_BATCH(1); /* General state base address */ + OUT_BATCH(1); /* Surface state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + ADVANCE_BATCH(); + } } const struct brw_tracked_state brw_state_base_address = { diff --git a/i965/brw_queryobj.c b/i965/brw_queryobj.c index cb9169e..a195bc3 100644 --- a/i965/brw_queryobj.c +++ b/i965/brw_queryobj.c @@ -146,17 +146,12 @@ static void brw_wait_query(GLcontext *ctx, struct gl_query_object *q) static void brw_check_query(GLcontext *ctx, struct gl_query_object *q) { - /* XXX: Need to expose dri_bo_is_idle from bufmgr. */ -#if 0 struct brw_query_object *query = (struct brw_query_object *)q; - if (dri_bo_is_idle(query->bo)) { + if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) { brw_queryobj_get_results(query); query->Base.Ready = GL_TRUE; } -#else - brw_wait_query(ctx, q); -#endif } /** Called to set up the query BO and account for its aperture space */ diff --git a/i965/brw_sf.c b/i965/brw_sf.c index c3c8597..e1c2c77 100644 --- a/i965/brw_sf.c +++ b/i965/brw_sf.c @@ -166,6 +166,9 @@ static void upload_sf_prog(struct brw_context *brw) key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); + /* _NEW_HINT */ + key.linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); + /* _NEW_POLYGON */ if (key.do_twoside_color) { /* If we're rendering to a FBO, we have to invert the polygon @@ -188,7 +191,7 @@ static void upload_sf_prog(struct brw_context *brw) const struct brw_tracked_state brw_sf_prog = { .dirty = { - .mesa = (_NEW_LIGHT|_NEW_POLYGON|_NEW_POINT), + .mesa = (_NEW_HINT | _NEW_LIGHT | _NEW_POLYGON | _NEW_POINT), .brw = (BRW_NEW_REDUCED_PRIMITIVE), .cache = CACHE_NEW_VS_PROG }, diff --git a/i965/brw_sf.h b/i965/brw_sf.h index 1c0fb70..6426b6d 100644 --- a/i965/brw_sf.h +++ b/i965/brw_sf.h @@ -51,7 +51,8 @@ struct brw_sf_prog_key { GLuint do_flat_shading:1; GLuint frontface_ccw:1; GLuint do_point_sprite:1; - GLuint pad:10; + GLuint linear_color:1; /**< linear interp vs. perspective interp */ + GLuint pad:25; GLenum SpriteOrigin; }; diff --git a/i965/brw_sf_emit.c b/i965/brw_sf_emit.c index 862835f..ca8f97f 100644 --- a/i965/brw_sf_emit.c +++ b/i965/brw_sf_emit.c @@ -151,6 +151,8 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); + GLuint jmpi = 1; + if (!nr) return; @@ -159,18 +161,21 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) if (c->key.primitive == SF_UNFILLED_TRIS) return; + if (BRW_IS_IGDNG(p->brw)) + jmpi = 2; + brw_push_insn_state(p); - brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr*2+1)); + brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1))); brw_JMPI(p, ip, ip, c->pv); copy_colors(c, c->vert[1], c->vert[0]); copy_colors(c, c->vert[2], c->vert[0]); - brw_JMPI(p, ip, ip, brw_imm_ud(nr*4+1)); + brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1))); copy_colors(c, c->vert[0], c->vert[1]); copy_colors(c, c->vert[2], c->vert[1]); - brw_JMPI(p, ip, ip, brw_imm_ud(nr*2)); + brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2)); copy_colors(c, c->vert[0], c->vert[2]); copy_colors(c, c->vert[1], c->vert[2]); @@ -184,7 +189,8 @@ static void do_flatshade_line( struct brw_sf_compile *c ) struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); - + GLuint jmpi = 1; + if (!nr) return; @@ -193,13 +199,16 @@ static void do_flatshade_line( struct brw_sf_compile *c ) if (c->key.primitive == SF_UNFILLED_TRIS) return; + if (BRW_IS_IGDNG(p->brw)) + jmpi = 2; + brw_push_insn_state(p); - brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr+1)); + brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1))); brw_JMPI(p, ip, ip, c->pv); copy_colors(c, c->vert[1], c->vert[0]); - brw_JMPI(p, ip, ip, brw_imm_ud(nr)); + brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr)); copy_colors(c, c->vert[0], c->vert[1]); brw_pop_insn_state(p); @@ -218,7 +227,7 @@ static void alloc_regs( struct brw_sf_compile *c ) /* Values computed by fixed function unit: */ - c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_UD); + c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D); c->det = brw_vec1_grf(1, 2); c->dx0 = brw_vec1_grf(1, 3); c->dx2 = brw_vec1_grf(1, 4); @@ -295,9 +304,6 @@ static void invert_det( struct brw_sf_compile *c) } -#define NON_PERPECTIVE_ATTRS (FRAG_BIT_WPOS | \ - FRAG_BIT_COL0 | \ - FRAG_BIT_COL1) static GLboolean calculate_masks( struct brw_sf_compile *c, GLuint reg, @@ -306,9 +312,16 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, GLushort *pc_linear) { GLboolean is_last_attr = (reg == c->nr_setup_regs - 1); - GLuint persp_mask = c->key.attrs & ~NON_PERPECTIVE_ATTRS; + GLuint persp_mask; GLuint linear_mask; + if (c->key.do_flat_shading || c->key.linear_color) + persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS | + FRAG_BIT_COL0 | + FRAG_BIT_COL1); + else + persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS); + if (c->key.do_flat_shading) linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1); else @@ -674,7 +687,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) (1<<_3DPRIM_POLYGON) | (1<<_3DPRIM_RECTLIST) | (1<<_3DPRIM_TRIFAN_NOSTIPPLE))); - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { saveflag = p->flag_value; brw_push_insn_state(p); @@ -695,7 +708,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) (1<<_3DPRIM_LINESTRIP_CONT) | (1<<_3DPRIM_LINESTRIP_BF) | (1<<_3DPRIM_LINESTRIP_CONT_BF))); - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { saveflag = p->flag_value; brw_push_insn_state(p); @@ -708,7 +721,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE)); - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { saveflag = p->flag_value; brw_push_insn_state(p); diff --git a/i965/brw_sf_state.c b/i965/brw_sf_state.c index c999187..bc0f076 100644 --- a/i965/brw_sf_state.c +++ b/i965/brw_sf_state.c @@ -113,7 +113,7 @@ struct brw_sf_unit_key { unsigned int nr_urb_entries, urb_size, sfsize; - GLenum front_face, cull_face; + GLenum front_face, cull_face, provoking_vertex; unsigned scissor:1; unsigned line_smooth:1; unsigned point_sprite:1; @@ -153,6 +153,9 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); key->point_attenuated = ctx->Point._Attenuated; + /* _NEW_LIGHT */ + key->provoking_vertex = ctx->Light.ProvokingVertex; + key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; } @@ -162,7 +165,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, { struct brw_sf_unit_state sf; dri_bo *bo; - + int chipset_max_threads; memset(&sf, 0, sizeof(sf)); sf.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; @@ -171,13 +174,26 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; sf.thread3.dispatch_grf_start_reg = 3; - sf.thread3.urb_entry_read_offset = 1; + + if (BRW_IS_IGDNG(brw)) + sf.thread3.urb_entry_read_offset = 3; + else + sf.thread3.urb_entry_read_offset = 1; + sf.thread3.urb_entry_read_length = key->urb_entry_read_length; sf.thread4.nr_urb_entries = key->nr_urb_entries; sf.thread4.urb_entry_allocation_size = key->sfsize - 1; - /* Each SF thread produces 1 PUE, and there can be up to 24 threads */ - sf.thread4.max_threads = MIN2(24, key->nr_urb_entries) - 1; + + /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or + * 48(IGDNG) threads + */ + if (BRW_IS_IGDNG(brw)) + chipset_max_threads = 48; + else + chipset_max_threads = 24; + + sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1; if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) sf.thread4.max_threads = 0; @@ -233,7 +249,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, else if (sf.sf6.line_width <= 0x2) sf.sf6.line_width = 0; - /* _NEW_POINT */ + /* _NEW_BUFFERS */ key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; if (!key->render_to_fbo) { /* Rendering to an OpenGL window */ @@ -263,6 +279,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, } /* XXX clamp max depends on AA vs. non-AA */ + /* _NEW_POINT */ sf.sf7.sprite_point = key->point_sprite; sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3); sf.sf7.use_point_size_state = !key->point_attenuated; @@ -270,9 +287,15 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: */ - sf.sf7.trifan_pv = 2; - sf.sf7.linestrip_pv = 1; - sf.sf7.tristrip_pv = 2; + if (key->provoking_vertex == GL_LAST_VERTEX_CONVENTION) { + sf.sf7.trifan_pv = 2; + sf.sf7.linestrip_pv = 1; + sf.sf7.tristrip_pv = 2; + } else { + sf.sf7.trifan_pv = 1; + sf.sf7.linestrip_pv = 0; + sf.sf7.tristrip_pv = 0; + } sf.sf7.line_last_pixel_enable = 0; /* Set bias for OpenGL rasterization rules: @@ -286,6 +309,9 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, &sf, sizeof(sf), NULL, NULL); + /* STATE_PREFETCH command description describes this state as being + * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain. + */ /* Emit SF program relocation */ dri_bo_emit_reloc(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, @@ -326,9 +352,11 @@ static void upload_sf_unit( struct brw_context *brw ) const struct brw_tracked_state brw_sf_unit = { .dirty = { .mesa = (_NEW_POLYGON | + _NEW_LIGHT | _NEW_LINE | _NEW_POINT | - _NEW_SCISSOR), + _NEW_SCISSOR | + _NEW_BUFFERS), .brw = BRW_NEW_URB_FENCE, .cache = (CACHE_NEW_SF_VP | CACHE_NEW_SF_PROG) diff --git a/i965/brw_state.h b/i965/brw_state.h index 81b0a45..5335eac 100644 --- a/i965/brw_state.h +++ b/i965/brw_state.h @@ -72,11 +72,13 @@ const struct brw_tracked_state brw_sf_vp; const struct brw_tracked_state brw_state_base_address; const struct brw_tracked_state brw_urb_fence; const struct brw_tracked_state brw_vertex_state; +const struct brw_tracked_state brw_vs_surfaces; const struct brw_tracked_state brw_vs_prog; const struct brw_tracked_state brw_vs_unit; const struct brw_tracked_state brw_wm_input_sizes; const struct brw_tracked_state brw_wm_prog; const struct brw_tracked_state brw_wm_samplers; +const struct brw_tracked_state brw_wm_constant_surface; const struct brw_tracked_state brw_wm_surfaces; const struct brw_tracked_state brw_wm_unit; @@ -84,12 +86,24 @@ const struct brw_tracked_state brw_psp_urb_cbs; const struct brw_tracked_state brw_pipe_control; -const struct brw_tracked_state brw_clear_surface_cache; -const struct brw_tracked_state brw_clear_batch_cache; - const struct brw_tracked_state brw_drawing_rect; const struct brw_tracked_state brw_indices; const struct brw_tracked_state brw_vertices; +const struct brw_tracked_state brw_index_buffer; + +/** + * Use same key for WM and VS surfaces. + */ +struct brw_surface_key { + GLenum target, depthmode; + dri_bo *bo; + GLint format, internal_format; + GLint first_level, last_level; + GLint width, height, depth; + GLint pitch, cpp; + uint32_t tiling; + GLuint offset; +}; /*********************************************************************** * brw_state.c @@ -135,8 +149,8 @@ dri_bo *brw_search_cache( struct brw_cache *cache, void *aux_return); void brw_state_cache_check_size( struct brw_context *brw ); -void brw_init_cache( struct brw_context *brw ); -void brw_destroy_cache( struct brw_context *brw ); +void brw_init_caches( struct brw_context *brw ); +void brw_destroy_caches( struct brw_context *brw ); /*********************************************************************** * brw_state_batch.c @@ -148,6 +162,11 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, const void *data, GLuint sz ); void brw_destroy_batch_cache( struct brw_context *brw ); -void brw_clear_batch_cache_flush( struct brw_context *brw ); +void brw_clear_batch_cache( struct brw_context *brw ); + +/* brw_wm_surface_state.c */ +dri_bo * +brw_create_constant_surface( struct brw_context *brw, + struct brw_surface_key *key ); #endif diff --git a/i965/brw_state_batch.c b/i965/brw_state_batch.c index 811940e..7821898 100644 --- a/i965/brw_state_batch.c +++ b/i965/brw_state_batch.c @@ -79,7 +79,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, return GL_TRUE; } -static void clear_batch_cache( struct brw_context *brw ) +void brw_clear_batch_cache( struct brw_context *brw ) { struct brw_cached_batch_item *item = brw->cached_batch_items; @@ -93,18 +93,7 @@ static void clear_batch_cache( struct brw_context *brw ) brw->cached_batch_items = NULL; } -void brw_clear_batch_cache_flush( struct brw_context *brw ) -{ - clear_batch_cache(brw); - - brw->state.dirty.mesa |= ~0; - brw->state.dirty.brw |= ~0; - brw->state.dirty.cache |= ~0; -} - - - void brw_destroy_batch_cache( struct brw_context *brw ) { - clear_batch_cache(brw); + brw_clear_batch_cache(brw); } diff --git a/i965/brw_state_cache.c b/i965/brw_state_cache.c index d5b5166..e40d7a0 100644 --- a/i965/brw_state_cache.c +++ b/i965/brw_state_cache.c @@ -56,9 +56,9 @@ * incorrect program is run for the other instance. */ +#include "main/imports.h" #include "brw_state.h" #include "intel_batchbuffer.h" -#include "main/imports.h" /* XXX: Fixme - have to include these to get the sizes of the prog_key * structs: @@ -69,8 +69,10 @@ #include "brw_sf.h" #include "brw_gs.h" -static GLuint hash_key( const void *key, GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs) + +static GLuint +hash_key(const void *key, GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs) { GLuint *ikey = (GLuint *)key; GLuint hash = 0, i; @@ -95,6 +97,7 @@ static GLuint hash_key( const void *key, GLuint key_size, return hash; } + /** * Marks a new buffer as being chosen for the given cache id. */ @@ -111,6 +114,7 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, cache->brw->state.dirty.cache |= 1 << cache_id; } + static struct brw_cache_item * search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, GLuint hash, const void *key, GLuint key_size, @@ -143,7 +147,8 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, } -static void rehash( struct brw_cache *cache ) +static void +rehash(struct brw_cache *cache) { struct brw_cache_item **items; struct brw_cache_item *c, *next; @@ -164,15 +169,17 @@ static void rehash( struct brw_cache *cache ) cache->size = size; } + /** * Returns the buffer object matching cache_id and key, or NULL. */ -dri_bo *brw_search_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs, - void *aux_return ) +dri_bo * +brw_search_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs, + void *aux_return) { struct brw_cache_item *item; GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); @@ -192,6 +199,7 @@ dri_bo *brw_search_cache( struct brw_cache *cache, return item->bo; } + dri_bo * brw_upload_cache( struct brw_cache *cache, enum brw_cache_id cache_id, @@ -265,7 +273,9 @@ brw_upload_cache( struct brw_cache *cache, return bo; } -/* This doesn't really work with aux data. Use search/upload instead + +/** + * This doesn't really work with aux data. Use search/upload instead */ dri_bo * brw_cache_data_sz(struct brw_cache *cache, @@ -296,6 +306,7 @@ brw_cache_data_sz(struct brw_cache *cache, return bo; } + /** * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. * @@ -319,21 +330,22 @@ enum pool_type { DW_GENERAL_STATE }; + static void -brw_init_cache_id( struct brw_context *brw, - const char *name, - enum brw_cache_id id, - GLuint key_size, - GLuint aux_size) +brw_init_cache_id(struct brw_cache *cache, + const char *name, + enum brw_cache_id id, + GLuint key_size, + GLuint aux_size) { - struct brw_cache *cache = &brw->cache; - cache->name[id] = strdup(name); cache->key_size[id] = key_size; cache->aux_size[id] = aux_size; } -void brw_init_cache( struct brw_context *brw ) + +static void +brw_init_non_surface_cache(struct brw_context *brw) { struct brw_cache *cache = &brw->cache; @@ -342,114 +354,136 @@ void brw_init_cache( struct brw_context *brw ) cache->size = 7; cache->n_items = 0; cache->items = (struct brw_cache_item **) - _mesa_calloc(cache->size * - sizeof(struct brw_cache_item)); + _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CC_VP", BRW_CC_VP, sizeof(struct brw_cc_viewport), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT, sizeof(struct brw_cc_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG, sizeof(struct brw_wm_prog_key), sizeof(struct brw_wm_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SAMPLER_DEFAULT_COLOR", BRW_SAMPLER_DEFAULT_COLOR, sizeof(struct brw_sampler_default_color), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER, 0, /* variable key/data size */ 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT, sizeof(struct brw_wm_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG, sizeof(struct brw_sf_prog_key), sizeof(struct brw_sf_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_VP", BRW_SF_VP, sizeof(struct brw_sf_viewport), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT, sizeof(struct brw_sf_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT, sizeof(struct brw_vs_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG, sizeof(struct brw_vs_prog_key), sizeof(struct brw_vs_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT, sizeof(struct brw_clip_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG, sizeof(struct brw_clip_prog_key), sizeof(struct brw_clip_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT, sizeof(struct brw_gs_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG, sizeof(struct brw_gs_prog_key), sizeof(struct brw_gs_prog_data)); +} + + +static void +brw_init_surface_cache(struct brw_context *brw) +{ + struct brw_cache *cache = &brw->surface_cache; + + cache->brw = brw; - brw_init_cache_id(brw, + cache->size = 7; + cache->n_items = 0; + cache->items = (struct brw_cache_item **) + _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); + + brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE, sizeof(struct brw_surface_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND, 0, 0); } + +void +brw_init_caches(struct brw_context *brw) +{ + brw_init_non_surface_cache(brw); + brw_init_surface_cache(brw); +} + + static void -brw_clear_cache( struct brw_context *brw ) +brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) { struct brw_cache_item *c, *next; GLuint i; @@ -457,8 +491,8 @@ brw_clear_cache( struct brw_context *brw ) if (INTEL_DEBUG & DEBUG_STATE) _mesa_printf("%s\n", __FUNCTION__); - for (i = 0; i < brw->cache.size; i++) { - for (c = brw->cache.items[i]; c; c = next) { + for (i = 0; i < cache->size; i++) { + for (c = cache->items[i]; c; c = next) { int j; next = c->next; @@ -468,10 +502,10 @@ brw_clear_cache( struct brw_context *brw ) free((void *)c->key); free(c); } - brw->cache.items[i] = NULL; + cache->items[i] = NULL; } - brw->cache.n_items = 0; + cache->n_items = 0; if (brw->curbe.last_buf) { _mesa_free(brw->curbe.last_buf); @@ -483,25 +517,46 @@ brw_clear_cache( struct brw_context *brw ) brw->state.dirty.cache |= ~0; } -void brw_state_cache_check_size( struct brw_context *brw ) + +void +brw_state_cache_check_size(struct brw_context *brw) { + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); + /* un-tuned guess. We've got around 20 state objects for a total of around * 32k, so 1000 of them is around 1.5MB. */ if (brw->cache.n_items > 1000) - brw_clear_cache(brw); + brw_clear_cache(brw, &brw->cache); + + if (brw->surface_cache.n_items > 1000) + brw_clear_cache(brw, &brw->surface_cache); } -void brw_destroy_cache( struct brw_context *brw ) + +static void +brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) { GLuint i; - brw_clear_cache(brw); + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s\n", __FUNCTION__); + + brw_clear_cache(brw, cache); for (i = 0; i < BRW_MAX_CACHE; i++) { - dri_bo_unreference(brw->cache.last_bo[i]); - free(brw->cache.name[i]); + dri_bo_unreference(cache->last_bo[i]); + free(cache->name[i]); } - free(brw->cache.items); - brw->cache.items = NULL; - brw->cache.size = 0; + free(cache->items); + cache->items = NULL; + cache->size = 0; +} + + +void +brw_destroy_caches(struct brw_context *brw) +{ + brw_destroy_cache(brw, &brw->cache); + brw_destroy_cache(brw, &brw->surface_cache); } diff --git a/i965/brw_state_dump.c b/i965/brw_state_dump.c index a713262..e94fa7d 100644 --- a/i965/brw_state_dump.c +++ b/i965/brw_state_dump.c @@ -126,6 +126,8 @@ static void dump_wm_surface_state(struct brw_context *brw) surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not "); state_out(name, surf, surfoff, 4, "mip base %d\n", surf->ss4.min_lod); + state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n", + surf->ss5.x_offset, surf->ss5.y_offset); dri_bo_unmap(surf_bo); } diff --git a/i965/brw_state_upload.c b/i965/brw_state_upload.c index 5de1450..b817b74 100644 --- a/i965/brw_state_upload.c +++ b/i965/brw_state_upload.c @@ -59,11 +59,12 @@ const struct brw_tracked_state *atoms[] = &brw_curbe_offsets, &brw_recalculate_urb_fence, - &brw_cc_vp, &brw_cc_unit, - &brw_wm_surfaces, /* must do before samplers */ + &brw_vs_surfaces, /* must do before unit */ + &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ + &brw_wm_surfaces, /* must do before samplers and unit */ &brw_wm_samplers, &brw_wm_unit, @@ -88,54 +89,27 @@ const struct brw_tracked_state *atoms[] = &brw_line_stipple, &brw_aa_line_parameters, - /* Ordering of the commands below is documented as fixed. - */ -#if 0 - &brw_pipelined_state_pointers, - &brw_urb_fence, - &brw_constant_buffer_state, -#else + &brw_psp_urb_cbs, -#endif &brw_drawing_rect, &brw_indices, + &brw_index_buffer, &brw_vertices, - NULL, /* brw_constant_buffer */ + &brw_constant_buffer }; void brw_init_state( struct brw_context *brw ) { - GLuint i; - - brw_init_cache(brw); - - brw->state.atoms = _mesa_malloc(sizeof(atoms)); - brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms); - _mesa_memcpy(brw->state.atoms, atoms, sizeof(atoms)); - - /* Patch in a pointer to the dynamic state atom: - */ - for (i = 0; i < brw->state.nr_atoms; i++) - if (brw->state.atoms[i] == NULL) - brw->state.atoms[i] = &brw->curbe.tracked_state; - - _mesa_memcpy(&brw->curbe.tracked_state, - &brw_constant_buffer, - sizeof(brw_constant_buffer)); + brw_init_caches(brw); } void brw_destroy_state( struct brw_context *brw ) { - if (brw->state.atoms) { - _mesa_free(brw->state.atoms); - brw->state.atoms = NULL; - } - - brw_destroy_cache(brw); + brw_destroy_caches(brw); brw_destroy_batch_cache(brw); } @@ -218,6 +192,7 @@ static struct dirty_bit_map mesa_bits[] = { DEFINE_BIT(_NEW_MULTISAMPLE), DEFINE_BIT(_NEW_TRACK_MATRIX), DEFINE_BIT(_NEW_PROGRAM), + DEFINE_BIT(_NEW_PROGRAM_CONSTANTS), {0, 0, 0} }; @@ -231,10 +206,10 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_PRIMITIVE), DEFINE_BIT(BRW_NEW_CONTEXT), DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS), - DEFINE_BIT(BRW_NEW_INPUT_VARYING), DEFINE_BIT(BRW_NEW_PSP), DEFINE_BIT(BRW_NEW_FENCE), DEFINE_BIT(BRW_NEW_INDICES), + DEFINE_BIT(BRW_NEW_INDEX_BUFFER), DEFINE_BIT(BRW_NEW_VERTICES), DEFINE_BIT(BRW_NEW_BATCH), DEFINE_BIT(BRW_NEW_DEPTH_BUFFER), @@ -312,6 +287,7 @@ void brw_validate_state( struct brw_context *brw ) if (brw->emit_state_always) { state->mesa |= ~0; state->brw |= ~0; + state->cache |= ~0; } if (brw->fragment_program != ctx->FragmentProgram._Current) { @@ -330,13 +306,13 @@ void brw_validate_state( struct brw_context *brw ) return; if (brw->state.dirty.brw & BRW_NEW_CONTEXT) - brw_clear_batch_cache_flush(brw); + brw_clear_batch_cache(brw); brw->intel.Fallback = 0; /* do prepare stage for all atoms */ for (i = 0; i < Elements(atoms); i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) break; @@ -347,6 +323,19 @@ void brw_validate_state( struct brw_context *brw ) } } } + + /* Make sure that the textures which are referenced by the current + * brw fragment program are actually present/valid. + * If this fails, we can experience GPU lock-ups. + */ + { + const struct brw_fragment_program *fp; + fp = brw_fragment_program_const(brw->fragment_program); + if (fp) { + assert((fp->tex_units_used & ctx->Texture._EnabledUnits) + == fp->tex_units_used); + } + } } @@ -367,8 +356,8 @@ void brw_upload_state(struct brw_context *brw) _mesa_memset(&examined, 0, sizeof(examined)); prev = *state; - for (i = 0; i < brw->state.nr_atoms; i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; struct brw_state_flags generated; assert(atom->dirty.mesa || @@ -397,7 +386,7 @@ void brw_upload_state(struct brw_context *brw) } else { for (i = 0; i < Elements(atoms); i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) break; diff --git a/i965/brw_structs.h b/i965/brw_structs.h index 89e2981..66d4127 100644 --- a/i965/brw_structs.h +++ b/i965/brw_structs.h @@ -33,6 +33,14 @@ #ifndef BRW_STRUCTS_H #define BRW_STRUCTS_H + +/** Number of general purpose registers (VS, WM, etc) */ +#define BRW_MAX_GRF 128 + +/** Number of message register file registers */ +#define BRW_MAX_MRF 16 + + /* Command packets: */ struct header @@ -434,8 +442,8 @@ struct brw_urb_fence { GLuint sf_fence:10; GLuint vf_fence:10; - GLuint cs_fence:10; - GLuint pad:2; + GLuint cs_fence:11; + GLuint pad:1; } bits1; }; @@ -815,7 +823,9 @@ struct brw_gs_unit_state struct { - GLuint pad0:10; + GLuint pad0:8; + GLuint rendering_enable:1; /* for IGDNG */ + GLuint pad4:1; GLuint stats_enable:1; GLuint nr_urb_entries:7; GLuint pad1:1; @@ -923,6 +933,28 @@ struct brw_wm_unit_state GLfloat global_depth_offset_constant; GLfloat global_depth_offset_scale; + + /* for IGDNG only */ + struct { + GLuint pad0:1; + GLuint grf_reg_count_1:3; + GLuint pad1:2; + GLuint kernel_start_pointer_1:26; + } wm8; + + struct { + GLuint pad0:1; + GLuint grf_reg_count_2:3; + GLuint pad1:2; + GLuint kernel_start_pointer_2:26; + } wm9; + + struct { + GLuint pad0:1; + GLuint grf_reg_count_3:3; + GLuint pad1:2; + GLuint kernel_start_pointer_3:26; + } wm10; }; struct brw_sampler_default_color { @@ -1075,7 +1107,7 @@ struct brw_surface_state GLuint y_offset:4; GLuint pad0:1; GLuint x_offset:7; - } ss5; /* NEW in Integrated Graphics Device */ + } ss5; /* New in G4X */ }; @@ -1168,7 +1200,7 @@ struct brw_instruction GLuint predicate_control:4; GLuint predicate_inverse:1; GLuint execution_size:3; - GLuint destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */ + GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */ GLuint pad0:2; GLuint debug_control:1; GLuint saturate:1; @@ -1196,7 +1228,9 @@ struct brw_instruction GLuint dest_reg_type:3; GLuint src0_reg_file:2; GLuint src0_reg_type:3; - GLuint pad:6; + GLuint src1_reg_file:2; /* 0x00000c00 */ + GLuint src1_reg_type:3; /* 0x00007000 */ + GLuint pad:1; GLint dest_indirect_offset:10; /* offset against the deref'd address reg */ GLuint dest_subreg_nr:3; /* subnr for the address reg a0.x */ GLuint dest_horiz_stride:2; @@ -1211,7 +1245,7 @@ struct brw_instruction GLuint src0_reg_type:3; GLuint src1_reg_file:2; GLuint src1_reg_type:3; - GLuint pad0:1; + GLuint pad:1; GLuint dest_writemask:4; GLuint dest_subreg_nr:1; GLuint dest_reg_nr:8; @@ -1298,6 +1332,14 @@ struct brw_instruction GLuint pad1:6; } ia16; + struct + { + GLuint pad:26; + GLuint end_of_thread:1; + GLuint pad1:1; + GLuint sfid:4; + } send_igdng; /* for IGDNG only */ + } bits2; union @@ -1308,7 +1350,7 @@ struct brw_instruction GLuint src1_reg_nr:8; GLuint src1_abs:1; GLuint src1_negate:1; - GLuint pad:1; + GLuint src1_address_mode:1; GLuint src1_horiz_stride:2; GLuint src1_width:3; GLuint src1_vert_stride:4; @@ -1323,7 +1365,7 @@ struct brw_instruction GLuint src1_reg_nr:8; GLuint src1_abs:1; GLuint src1_negate:1; - GLuint pad0:1; + GLuint src1_address_mode:1; GLuint src1_swz_z:2; GLuint src1_swz_w:2; GLuint pad1:1; @@ -1337,7 +1379,7 @@ struct brw_instruction GLuint src1_subreg_nr:3; GLuint src1_abs:1; GLuint src1_negate:1; - GLuint pad0:1; + GLuint src1_address_mode:1; GLuint src1_horiz_stride:2; GLuint src1_width:3; GLuint src1_vert_stride:4; @@ -1385,6 +1427,21 @@ struct brw_instruction } math; struct { + GLuint function:4; + GLuint int_type:1; + GLuint precision:1; + GLuint saturate:1; + GLuint data_type:1; + GLuint snapshot:1; + GLuint pad0:10; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } math_igdng; + + struct { GLuint binding_table_index:8; GLuint sampler:4; GLuint return_format:2; @@ -1407,9 +1464,38 @@ struct brw_instruction GLuint end_of_thread:1; } sampler_g4x; + struct { + GLuint binding_table_index:8; + GLuint sampler:4; + GLuint msg_type:4; + GLuint simd_mode:2; + GLuint pad0:1; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } sampler_igdng; + struct brw_urb_immediate urb; struct { + GLuint opcode:4; + GLuint offset:6; + GLuint swizzle_control:2; + GLuint pad:1; + GLuint allocate:1; + GLuint used:1; + GLuint complete:1; + GLuint pad0:3; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } urb_igdng; + + struct { GLuint binding_table_index:8; GLuint msg_control:4; GLuint msg_type:2; @@ -1423,6 +1509,19 @@ struct brw_instruction struct { GLuint binding_table_index:8; + GLuint msg_control:3; + GLuint msg_type:3; + GLuint target_cache:2; + GLuint pad0:3; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } dp_read_igdng; + + struct { + GLuint binding_table_index:8; GLuint msg_control:3; GLuint pixel_scoreboard_clear:1; GLuint msg_type:3; @@ -1435,6 +1534,20 @@ struct brw_instruction } dp_write; struct { + GLuint binding_table_index:8; + GLuint msg_control:3; + GLuint pixel_scoreboard_clear:1; + GLuint msg_type:3; + GLuint send_commit_msg:1; + GLuint pad0:3; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } dp_write_igdng; + + struct { GLuint pad:16; GLuint response_length:4; GLuint msg_length:4; @@ -1443,8 +1556,18 @@ struct brw_instruction GLuint end_of_thread:1; } generic; + struct { + GLuint pad:19; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } generic_igdng; + GLint d; GLuint ud; + float f; } bits3; }; diff --git a/i965/brw_tex_layout.c b/i965/brw_tex_layout.c index 51a617f..5986cbf 100644 --- a/i965/brw_tex_layout.c +++ b/i965/brw_tex_layout.c @@ -28,7 +28,6 @@ * Authors: * Keith Whitwell <keith@tungstengraphics.com> */ - /* Code to layout images in a mipmap tree for i965. */ @@ -37,17 +36,93 @@ #include "intel_tex_layout.h" #include "intel_context.h" #include "main/macros.h" +#include "intel_chipset.h" #define FILE_DEBUG_FLAG DEBUG_MIPTREE -GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_tree *mt ) +GLboolean brw_miptree_layout(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t tiling) { - /* XXX: these vary depending on image format: - */ -/* GLint align_w = 4; */ + /* XXX: these vary depending on image format: */ + /* GLint align_w = 4; */ switch (mt->target) { - case GL_TEXTURE_CUBE_MAP: + case GL_TEXTURE_CUBE_MAP: + if (IS_IGDNG(intel->intelScreen->deviceID)) { + GLuint align_h = 2, align_w = 4; + GLuint level; + GLuint x = 0; + GLuint y = 0; + GLuint width = mt->width0; + GLuint height = mt->height0; + GLuint qpitch = 0; + GLuint y_pitch = 0; + + mt->pitch = mt->width0; + intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); + y_pitch = ALIGN(height, align_h); + + if (mt->compressed) { + mt->pitch = ALIGN(mt->width0, align_w); + } + + if (mt->first_level != mt->last_level) { + GLuint mip1_width; + + if (mt->compressed) { + mip1_width = ALIGN(minify(mt->width0), align_w) + + ALIGN(minify(minify(mt->width0)), align_w); + } else { + mip1_width = ALIGN(minify(mt->width0), align_w) + + minify(minify(mt->width0)); + } + + if (mip1_width > mt->pitch) { + mt->pitch = mip1_width; + } + } + + mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch); + + if (mt->compressed) { + qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp; + mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6; + } else { + qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp; + mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6; + } + + for (level = mt->first_level; level <= mt->last_level; level++) { + GLuint img_height; + GLuint nr_images = 6; + GLuint q = 0; + + intel_miptree_set_level_info(mt, level, nr_images, x, y, width, + height, 1); + + for (q = 0; q < nr_images; q++) + intel_miptree_set_image_offset_ex(mt, level, q, x, y, q * qpitch); + + if (mt->compressed) + img_height = MAX2(1, height/4); + else + img_height = ALIGN(height, align_h); + + if (level == mt->first_level + 1) { + x += ALIGN(width, align_w); + } + else { + y += img_height; + } + + width = minify(width); + height = minify(height); + } + + break; + } + case GL_TEXTURE_3D: { GLuint width = mt->width0; GLuint height = mt->height0; @@ -59,25 +134,25 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t GLuint align_w = 4; mt->total_height = 0; - + intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); + if (mt->compressed) { - align_w = intel_compressed_alignment(mt->internal_format); mt->pitch = ALIGN(width, align_w); pack_y_pitch = (height + 3) / 4; } else { - mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0); - pack_y_pitch = ALIGN(mt->height0, align_h); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0); + pack_y_pitch = ALIGN(mt->height0, align_h); } - pack_x_pitch = mt->pitch; + pack_x_pitch = width; pack_x_nr = 1; - for ( level = mt->first_level ; level <= mt->last_level ; level++ ) { + for (level = mt->first_level ; level <= mt->last_level ; level++) { GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6; GLint x = 0; GLint y = 0; GLint q, j; - + intel_miptree_set_level_info(mt, level, nr_images, 0, mt->total_height, width, height, depth); @@ -89,7 +164,7 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t } x = 0; - y += pack_y_pitch; + y += pack_y_pitch; } @@ -98,40 +173,50 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t height = minify(height); depth = minify(depth); - if (mt->compressed) { - pack_y_pitch = (height + 3) / 4; - - if (pack_x_pitch > ALIGN(width, align_w)) { - pack_x_pitch = ALIGN(width, align_w); - pack_x_nr <<= 1; - } - } else { - if (pack_x_pitch > 4) { - pack_x_pitch >>= 1; - pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr <= mt->pitch); - } - - if (pack_y_pitch > 2) { - pack_y_pitch >>= 1; - pack_y_pitch = ALIGN(pack_y_pitch, align_h); - } - } + if (mt->compressed) { + pack_y_pitch = (height + 3) / 4; + + if (pack_x_pitch > ALIGN(width, align_w)) { + pack_x_pitch = ALIGN(width, align_w); + pack_x_nr <<= 1; + } + } else { + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr <= mt->pitch); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + pack_y_pitch = ALIGN(pack_y_pitch, align_h); + } + } } + /* The 965's sampler lays cachelines out according to how accesses + * in the texture surfaces run, so they may be "vertical" through + * memory. As a result, the docs say in Surface Padding Requirements: + * Sampling Engine Surfaces that two extra rows of padding are required. + * We don't know of similar requirements for pre-965, but given that + * those docs are silent on padding requirements in general, let's play + * it safe. + */ + if (mt->target == GL_TEXTURE_CUBE_MAP) + mt->total_height += 2; break; } default: - i945_miptree_layout_2d(intel, mt); + i945_miptree_layout_2d(intel, mt, tiling); break; } - DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, - mt->pitch, + DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + mt->pitch, mt->total_height, mt->cpp, mt->pitch * mt->total_height * mt->cpp ); - + return GL_TRUE; } diff --git a/i965/brw_urb.c b/i965/brw_urb.c index 7673dd3..8c6f435 100644 --- a/i965/brw_urb.c +++ b/i965/brw_urb.c @@ -143,7 +143,29 @@ static void recalculate_urb_fence( struct brw_context *brw ) brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries; brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries; - + + brw->urb.constrained = 0; + + if (BRW_IS_IGDNG(brw)) { + brw->urb.nr_vs_entries = 128; + brw->urb.nr_sf_entries = 48; + if (check_urb_layout(brw)) { + goto done; + } else { + brw->urb.constrained = 1; + brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; + brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; + } + } else if (BRW_IS_G4X(brw)) { + brw->urb.nr_vs_entries = 64; + if (check_urb_layout(brw)) { + goto done; + } else { + brw->urb.constrained = 1; + brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; + } + } + if (!check_urb_layout(brw)) { brw->urb.nr_vs_entries = limits[VS].min_nr_entries; brw->urb.nr_gs_entries = limits[GS].min_nr_entries; @@ -169,9 +191,8 @@ static void recalculate_urb_fence( struct brw_context *brw ) if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) _mesa_printf("URB CONSTRAINED\n"); } - else - brw->urb.constrained = 0; +done: if (INTEL_DEBUG & DEBUG_URB) _mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", brw->urb.vs_start, diff --git a/i965/brw_vs.c b/i965/brw_vs.c index e3111c6..f0c79ef 100644 --- a/i965/brw_vs.c +++ b/i965/brw_vs.c @@ -90,8 +90,6 @@ static void brw_upload_vs_prog(struct brw_context *brw) struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; - assert (vp && !vp->program.IsNVProgram); - memset(&key, 0, sizeof(key)); /* Just upload the program verbatim for now. Always send it all diff --git a/i965/brw_vs.h b/i965/brw_vs.h index 1e4f660..4a59136 100644 --- a/i965/brw_vs.h +++ b/i965/brw_vs.h @@ -58,6 +58,7 @@ struct brw_vs_compile { GLuint first_output; GLuint nr_outputs; + GLuint first_overflow_output; /**< VERT_ATTRIB_x */ GLuint first_tmp; GLuint last_tmp; diff --git a/i965/brw_vs_emit.c b/i965/brw_vs_emit.c index b69616d..108e19c 100644 --- a/i965/brw_vs_emit.c +++ b/i965/brw_vs_emit.c @@ -68,14 +68,20 @@ static void release_tmps( struct brw_vs_compile *c ) static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { GLuint i, reg = 0, mrf; + int attributes_in_vue; -#if 0 - if (c->vp->program.Base.Parameters->NumParameters >= 6) - c->vp->use_const_buffer = 1; + /* Determine whether to use a real constant buffer or use a block + * of GRF registers for constants. The later is faster but only + * works if everything fits in the GRF. + * XXX this heuristic/check may need some fine tuning... + */ + if (c->vp->program.Base.Parameters->NumParameters + + c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF) + c->vp->use_const_buffer = GL_TRUE; else -#endif c->vp->use_const_buffer = GL_FALSE; - /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/ + + /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/ /* r0 -- reserved as usual */ @@ -123,16 +129,27 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) reg++; } } + /* If there are no inputs, we'll still be reading one attribute's worth + * because it's required -- see urb_read_length setting. + */ + if (c->nr_inputs == 0) + reg++; - /* Allocate outputs: TODO: could organize the non-position outputs - * to go straight into message regs. + /* Allocate outputs. The non-position outputs go straight into message regs. */ c->nr_outputs = 0; c->first_output = reg; - mrf = 4; + c->first_overflow_output = 0; + + if (BRW_IS_IGDNG(c->func.brw)) + mrf = 8; + else + mrf = 4; + for (i = 0; i < VERT_RESULT_MAX; i++) { if (c->prog_data.outputs_written & (1 << i)) { c->nr_outputs++; + assert(i < Elements(c->regs[PROGRAM_OUTPUT])); if (i == VERT_RESULT_HPOS) { c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); reg++; @@ -143,8 +160,17 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) mrf++; /* just a placeholder? XXX fix later stages & remove this */ } else { - c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf); - mrf++; + if (mrf < 16) { + c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf); + mrf++; + } + else { + /* too many vertex results to fit in MRF, use GRF for overflow */ + if (!c->first_overflow_output) + c->first_overflow_output = i; + c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } } } } @@ -200,8 +226,23 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) * vertex urb, so is half the amount: */ c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2; + /* Setting this field to 0 leads to undefined behavior according to the + * the VS_STATE docs. Our VUEs will always have at least one attribute + * sitting in them, even if it's padding. + */ + if (c->prog_data.urb_read_length == 0) + c->prog_data.urb_read_length = 1; + + /* The VS VUEs are shared by VF (outputting our inputs) and VS, so size + * them to fit the biggest thing they need to. + */ + attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs); + + if (BRW_IS_IGDNG(c->func.brw)) + c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; + else + c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; - c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4; c->prog_data.total_grf = reg; if (INTEL_DEBUG & DEBUG_VS) { @@ -709,10 +750,11 @@ get_constant(struct brw_vs_compile *c, struct brw_compile *p = &c->func; struct brw_reg const_reg; struct brw_reg const2_reg; + const GLboolean relAddr = src->RelAddr; assert(argIndex < 3); - if (c->current_const[argIndex].index != src->Index || src->RelAddr) { + if (c->current_const[argIndex].index != src->Index || relAddr) { struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; c->current_const[argIndex].index = src->Index; @@ -725,13 +767,13 @@ get_constant(struct brw_vs_compile *c, brw_dp_READ_4_vs(p, c->current_const[argIndex].reg,/* writeback dest */ 0, /* oword */ - src->RelAddr, /* relative indexing? */ + relAddr, /* relative indexing? */ addrReg, /* address register */ 16 * src->Index, /* byte offset */ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ ); - if (src->RelAddr) { + if (relAddr) { /* second read */ const2_reg = get_tmp(c); @@ -742,7 +784,7 @@ get_constant(struct brw_vs_compile *c, brw_dp_READ_4_vs(p, const2_reg, /* writeback dest */ 1, /* oword */ - src->RelAddr, /* relative indexing? */ + relAddr, /* relative indexing? */ addrReg, /* address register */ 16 * src->Index, /* byte offset */ SURF_INDEX_VERT_CONST_BUFFER @@ -752,7 +794,7 @@ get_constant(struct brw_vs_compile *c, const_reg = c->current_const[argIndex].reg; - if (src->RelAddr) { + if (relAddr) { /* merge the two Owords into the constant register */ /* const_reg[7..4] = const2_reg[7..4] */ brw_MOV(p, @@ -869,6 +911,7 @@ get_src_reg( struct brw_vs_compile *c, case PROGRAM_STATE_VAR: case PROGRAM_CONSTANT: case PROGRAM_UNIFORM: + case PROGRAM_ENV_PARAM: if (c->vp->use_const_buffer) { return get_constant(c, inst, argIndex); } @@ -888,7 +931,6 @@ get_src_reg( struct brw_vs_compile *c, return brw_null_reg(); case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: case PROGRAM_WRITE_ONLY: default: assert(0); @@ -1061,6 +1103,8 @@ static void emit_vertex_write( struct brw_vs_compile *c) struct brw_reg m0 = brw_message_reg(0); struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS]; struct brw_reg ndc; + int eot; + GLuint len_vertext_header = 2; if (c->key.copy_edgeflag) { brw_MOV(p, @@ -1070,14 +1114,16 @@ static void emit_vertex_write( struct brw_vs_compile *c) /* Build ndc coords */ ndc = get_tmp(c); + /* ndc = 1.0 / pos.w */ emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); + /* ndc.xyz = pos * ndc */ brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc); /* Update the header for point size, user clipping flags, and -ve rhw * workaround. */ if ((c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) || - c->key.nr_userclip || !BRW_IS_G4X(p->brw)) + c->key.nr_userclip || BRW_IS_965(p->brw)) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); GLuint i; @@ -1108,7 +1154,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) * Later, clipping will detect ucp[6] and ensure the primitive is * clipped against all fixed planes. */ - if (!BRW_IS_G4X(p->brw)) { + if (BRW_IS_965(p->brw)) { brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_L, @@ -1135,7 +1181,23 @@ static void emit_vertex_write( struct brw_vs_compile *c) */ brw_set_access_mode(p, BRW_ALIGN_1); brw_MOV(p, offset(m0, 2), ndc); - brw_MOV(p, offset(m0, 3), pos); + + if (BRW_IS_IGDNG(p->brw)) { + /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */ + brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */ + /* m4, m5 contain the distances from vertex to the user clip planeXXX. + * Seems it is useless for us. + * m6 is used for aligning, so that the remainder of vertex element is + * reg-aligned. + */ + brw_MOV(p, offset(m0, 7), pos); /* the remainder of vertex element */ + len_vertext_header = 6; + } else { + brw_MOV(p, offset(m0, 3), pos); + len_vertext_header = 2; + } + + eot = (c->first_overflow_output == 0); brw_urb_WRITE(p, brw_null_reg(), /* dest */ @@ -1143,12 +1205,43 @@ static void emit_vertex_write( struct brw_vs_compile *c) c->r0, /* src */ 0, /* allocate */ 1, /* used */ - c->nr_outputs + 3, /* msg len */ + MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */ 0, /* response len */ - 1, /* eot */ + eot, /* eot */ 1, /* writes complete */ 0, /* urb destination offset */ BRW_URB_SWIZZLE_INTERLEAVE); + + if (c->first_overflow_output > 0) { + /* Not all of the vertex outputs/results fit into the MRF. + * Move the overflowed attributes from the GRF to the MRF and + * issue another brw_urb_WRITE(). + */ + /* XXX I'm not 100% sure about which MRF regs to use here. Starting + * at mrf[4] atm... + */ + GLuint i, mrf = 0; + for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) { + if (c->prog_data.outputs_written & (1 << i)) { + /* move from GRF to MRF */ + brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]); + mrf++; + } + } + + brw_urb_WRITE(p, + brw_null_reg(), /* dest */ + 4, /* starting mrf reg nr */ + c->r0, /* src */ + 0, /* allocate */ + 1, /* used */ + mrf+1, /* msg len */ + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + BRW_MAX_MRF-1, /* urb destination offset */ + BRW_URB_SWIZZLE_INTERLEAVE); + } } @@ -1169,28 +1262,49 @@ post_vs_emit( struct brw_vs_compile *c, /* patch up the END code to jump past subroutines, etc */ offset = last_inst - end_inst; - brw_set_src1(end_inst, brw_imm_d(offset * 16)); + if (offset > 1) { + brw_set_src1(end_inst, brw_imm_d(offset * 16)); + } else { + end_inst->header.opcode = BRW_OPCODE_NOP; + } } +static uint32_t +get_predicate(uint32_t swizzle) +{ + switch (swizzle) { + case SWIZZLE_XXXX: + return BRW_PREDICATE_ALIGN16_REPLICATE_X; + case SWIZZLE_YYYY: + return BRW_PREDICATE_ALIGN16_REPLICATE_Y; + case SWIZZLE_ZZZZ: + return BRW_PREDICATE_ALIGN16_REPLICATE_Z; + case SWIZZLE_WWWW: + return BRW_PREDICATE_ALIGN16_REPLICATE_W; + default: + _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n", swizzle); + return BRW_PREDICATE_NORMAL; + } +} /* Emit the vertex program instructions here. */ void brw_vs_emit(struct brw_vs_compile *c ) { -#define MAX_IFSN 32 +#define MAX_IF_DEPTH 32 +#define MAX_LOOP_DEPTH 32 struct brw_compile *p = &c->func; - GLuint nr_insns = c->vp->program.Base.NumInstructions; - GLuint insn, if_insn = 0; + const GLuint nr_insns = c->vp->program.Base.NumInstructions; + GLuint insn, if_depth = 0, loop_depth = 0; GLuint end_offset = 0; struct brw_instruction *end_inst, *last_inst; - struct brw_instruction *if_inst[MAX_IFSN]; - struct brw_indirect stack_index = brw_indirect(0, 0); - + struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; + const struct brw_indirect stack_index = brw_indirect(0, 0); GLuint index; GLuint file; if (INTEL_DEBUG & DEBUG_VS) { - _mesa_printf("vs-emit:\n"); + _mesa_printf("vs-mesa:\n"); _mesa_print_program(&c->vp->program.Base); _mesa_printf("\n"); } @@ -1219,7 +1333,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) for (insn = 0; insn < nr_insns; insn++) { - struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; + const struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; struct brw_reg args[3], dst; GLuint i; @@ -1232,7 +1346,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) */ if (inst->Opcode != OPCODE_SWZ) for (i = 0; i < 3; i++) { - struct prog_src_register *src = &inst->SrcReg[i]; + const struct prog_src_register *src = &inst->SrcReg[i]; index = src->Index; file = src->File; if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) @@ -1376,16 +1490,54 @@ void brw_vs_emit(struct brw_vs_compile *c ) emit_xpd(p, dst, args[0], args[1]); break; case OPCODE_IF: - assert(if_insn < MAX_IFSN); - if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + assert(if_depth < MAX_IF_DEPTH); + if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8); + if_inst[if_depth]->header.predicate_control = + get_predicate(inst->DstReg.CondSwizzle); + if_depth++; break; case OPCODE_ELSE: - if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); break; case OPCODE_ENDIF: - assert(if_insn > 0); - brw_ENDIF(p, if_inst[--if_insn]); + assert(if_depth > 0); + brw_ENDIF(p, if_inst[--if_depth]); break; +#if 0 + case OPCODE_BGNLOOP: + loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); + break; + case OPCODE_BRK: + brw_BREAK(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_CONT: + brw_CONT(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_ENDLOOP: + { + struct brw_instruction *inst0, *inst1; + loop_depth--; + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + /* patch all the BREAK/CONT instructions from last BEGINLOOP */ + while (inst0 > loop_inst[loop_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK) { + inst0->bits3.if_else.jump_count = inst1 - inst0 + 1; + inst0->bits3.if_else.pop_count = 0; + } + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + inst0->bits3.if_else.jump_count = inst1 - inst0; + inst0->bits3.if_else.pop_count = 0; + } + } + } + break; +#else + (void) loop_inst; + (void) loop_depth; +#endif case OPCODE_BRA: brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); @@ -1430,6 +1582,19 @@ void brw_vs_emit(struct brw_vs_compile *c ) "unknown"); } + /* Set the predication update on the last instruction of the native + * instruction sequence. + * + * This would be problematic if it was set on a math instruction, + * but that shouldn't be the case with the current GLSL compiler. + */ + if (inst->CondUpdate) { + struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1]; + + assert(hw_insn->header.destreg__conditionalmod == 0); + hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ; + } + if ((inst->DstReg.File == PROGRAM_OUTPUT) && (inst->DstReg.Index != VERT_RESULT_HPOS) && c->output_regs[inst->DstReg.Index].used_in_src) { @@ -1467,4 +1632,13 @@ void brw_vs_emit(struct brw_vs_compile *c ) emit_vertex_write(c); post_vs_emit(c, end_inst, last_inst); + + if (INTEL_DEBUG & DEBUG_VS) { + int i; + + _mesa_printf("vs-native:\n"); + for (i = 0; i < p->nr_insn; i++) + brw_disasm(stderr, &p->store[i]); + _mesa_printf("\n"); + } } diff --git a/i965/brw_vs_state.c b/i965/brw_vs_state.c index 3d29538..d790ab6 100644 --- a/i965/brw_vs_state.c +++ b/i965/brw_vs_state.c @@ -97,7 +97,11 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) * brw_urb_WRITE() results. */ vs.thread1.single_program_flow = 0; - vs.thread1.binding_table_entry_count = key->nr_surfaces; + + if (BRW_IS_IGDNG(brw)) + vs.thread1.binding_table_entry_count = 0; /* hardware requirement */ + else + vs.thread1.binding_table_entry_count = key->nr_surfaces; vs.thread3.urb_entry_read_length = key->urb_entry_read_length; vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length; @@ -105,10 +109,16 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) vs.thread3.urb_entry_read_offset = 0; vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - vs.thread4.nr_urb_entries = key->nr_urb_entries; + if (BRW_IS_IGDNG(brw)) + vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2; + else + vs.thread4.nr_urb_entries = key->nr_urb_entries; + vs.thread4.urb_entry_allocation_size = key->urb_size - 1; - if (BRW_IS_G4X(brw)) + if (BRW_IS_IGDNG(brw)) + chipset_max_threads = 72; + else if (BRW_IS_G4X(brw)) chipset_max_threads = 32; else chipset_max_threads = 16; @@ -120,6 +130,8 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) /* No samplers for ARB_vp programs: */ + /* It has to be set to 0 for IGDNG + */ vs.vs5.sampler_count = 0; if (INTEL_DEBUG & DEBUG_STATS) diff --git a/i965/brw_vs_surface_state.c b/i965/brw_vs_surface_state.c new file mode 100644 index 0000000..89f4752 --- /dev/null +++ b/i965/brw_vs_surface_state.c @@ -0,0 +1,226 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "main/mtypes.h" +#include "main/texformat.h" +#include "main/texstore.h" +#include "shader/prog_parameter.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +/* Creates a new VS constant buffer reflecting the current VS program's + * constants, if needed by the VS program. + * + * Otherwise, constants go through the CURBEs using the brw_constant_buffer + * state atom. + */ +static drm_intel_bo * +brw_vs_update_constant_buffer(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + const struct gl_program_parameter_list *params = vp->program.Base.Parameters; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + drm_intel_bo *const_buffer; + + /* BRW_NEW_VERTEX_PROGRAM */ + if (!vp->use_const_buffer) + return NULL; + + const_buffer = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", + size, 64); + + /* _NEW_PROGRAM_CONSTANTS */ + dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); + + return const_buffer; +} + +/** + * Update the surface state for a VS constant buffer. + * + * Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer. + */ +static void +brw_update_vs_constant_surface( GLcontext *ctx, + GLuint surf) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_surface_key key; + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + const struct gl_program_parameter_list *params = vp->program.Base.Parameters; + + assert(surf == 0); + + /* If we're in this state update atom, we need to update VS constants, so + * free the old buffer and create a new one for the new contents. + */ + dri_bo_unreference(vp->const_buffer); + vp->const_buffer = brw_vs_update_constant_buffer(brw); + + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (vp->const_buffer == 0) { + drm_intel_bo_unreference(brw->vs.surf_bo[surf]); + brw->vs.surf_bo[surf] = NULL; + return; + } + + memset(&key, 0, sizeof(key)); + + key.format = MESA_FORMAT_RGBA_FLOAT32; + key.internal_format = GL_RGBA; + key.bo = vp->const_buffer; + key.depthmode = GL_NONE; + key.pitch = params->NumParameters; + key.width = params->NumParameters; + key.height = 1; + key.depth = 1; + key.cpp = 16; + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + drm_intel_bo_unreference(brw->vs.surf_bo[surf]); + brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->vs.surf_bo[surf] == NULL) { + brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); + } +} + + +/** + * Constructs the binding table for the VS surface state. + */ +static dri_bo * +brw_vs_get_binding_table(struct brw_context *brw) +{ + dri_bo *bind_bo; + + bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, BRW_VS_MAX_SURF, + NULL); + + if (bind_bo == NULL) { + GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint); + uint32_t *data = malloc(data_size); + int i; + + for (i = 0; i < BRW_VS_MAX_SURF; i++) + if (brw->vs.surf_bo[i]) + data[i] = brw->vs.surf_bo[i]->offset; + else + data[i] = 0; + + bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, BRW_VS_MAX_SURF, + data, data_size, + NULL, NULL); + + /* Emit binding table relocations to surface state */ + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + if (brw->vs.surf_bo[i] != NULL) { + /* The presumed offsets were set in the data values for + * brw_upload_cache. + */ + drm_intel_bo_emit_reloc(bind_bo, i * 4, + brw->vs.surf_bo[i], 0, + I915_GEM_DOMAIN_INSTRUCTION, 0); + } + } + + free(data); + } + + return bind_bo; +} + +/** + * Vertex shader surfaces (constant buffer). + * + * This consumes the state updates for the constant buffer needing + * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and + * CACHE_NEW_SURF_BIND for the binding table upload. + */ +static void prepare_vs_surfaces(struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + int i; + int nr_surfaces = 0; + + brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER); + + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + if (brw->vs.surf_bo[i] != NULL) { + nr_surfaces = i + 1; + } + } + + if (brw->vs.nr_surfaces != nr_surfaces) { + brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; + brw->vs.nr_surfaces = nr_surfaces; + } + + /* Note that we don't end up updating the bind_bo if we don't have a + * surface to be pointing at. This should be relatively harmless, as it + * just slightly increases our working set size. + */ + if (brw->vs.nr_surfaces != 0) { + dri_bo_unreference(brw->vs.bind_bo); + brw->vs.bind_bo = brw_vs_get_binding_table(brw); + } +} + +const struct brw_tracked_state brw_vs_surfaces = { + .dirty = { + .mesa = (_NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_VERTEX_PROGRAM), + .cache = 0 + }, + .prepare = prepare_vs_surfaces, +}; + + + diff --git a/i965/brw_vtbl.c b/i965/brw_vtbl.c index ba03afd..ac11790 100644 --- a/i965/brw_vtbl.c +++ b/i965/brw_vtbl.c @@ -177,14 +177,6 @@ static void brw_note_fence( struct intel_context *intel, GLuint fence ) brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; } - -static void brw_note_unlock( struct intel_context *intel ) -{ - struct brw_context *brw = brw_context(&intel->ctx); - brw_state_cache_check_size(brw); -} - - /* called from intelWaitForIdle() and intelFlush() * * For now, just flush everything. Could be smarter later. @@ -194,7 +186,7 @@ static GLuint brw_flush_cmd( void ) struct brw_mi_flush flush; flush.opcode = CMD_MI_FLUSH; flush.pad = 0; - flush.flags = BRW_FLUSH_READ_CACHE | BRW_FLUSH_STATE_CACHE; + flush.flags = BRW_FLUSH_STATE_CACHE; return *(GLuint *)&flush; } @@ -215,7 +207,6 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.invalidate_state = brw_invalidate_state; brw->intel.vtbl.note_fence = brw_note_fence; - brw->intel.vtbl.note_unlock = brw_note_unlock; brw->intel.vtbl.new_batch = brw_new_batch; brw->intel.vtbl.finish_batch = brw_finish_batch; brw->intel.vtbl.destroy = brw_destroy_context; diff --git a/i965/brw_wm.c b/i965/brw_wm.c index 8a3b7df..2292de9 100644 --- a/i965/brw_wm.c +++ b/i965/brw_wm.c @@ -41,13 +41,13 @@ GLuint brw_wm_nr_args( GLuint opcode ) { switch (opcode) { case WM_FRONTFACING: - return 0; case WM_PIXELXY: + return 0; case WM_CINTERP: case WM_WPOSXY: + case WM_DELTAXY: return 1; case WM_LINTERP: - case WM_DELTAXY: case WM_PIXELW: return 2; case WM_FB_WRITE: @@ -171,9 +171,11 @@ static void do_wm_prog( struct brw_context *brw, * differently from "simple" shaders. */ if (fp->isGLSL) { + c->dispatch_width = 8; brw_wm_glsl_emit(brw, c); } else { + c->dispatch_width = 16; brw_wm_non_glsl_emit(brw, c); } @@ -202,6 +204,7 @@ static void brw_wm_populate_key( struct brw_context *brw, /* BRW_NEW_FRAGMENT_PROGRAM */ const struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; + GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; GLuint lookup = 0; GLuint line_aa; GLuint i; @@ -263,6 +266,7 @@ static void brw_wm_populate_key( struct brw_context *brw, brw_wm_lookup_iz(line_aa, lookup, + uses_depth, key); @@ -272,6 +276,9 @@ static void brw_wm_populate_key( struct brw_context *brw, /* _NEW_LIGHT */ key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); + /* _NEW_HINT */ + key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); + /* _NEW_TEXTURE */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { const struct gl_texture_unit *unit = &ctx->Texture.Unit[i]; @@ -351,6 +358,7 @@ const struct brw_tracked_state brw_wm_prog = { .dirty = { .mesa = (_NEW_COLOR | _NEW_DEPTH | + _NEW_HINT | _NEW_STENCIL | _NEW_POLYGON | _NEW_LINE | diff --git a/i965/brw_wm.h b/i965/brw_wm.h index 295fed8..ae98b54 100644 --- a/i965/brw_wm.h +++ b/i965/brw_wm.h @@ -63,6 +63,7 @@ struct brw_wm_prog_key { GLuint computes_depth:1; /* could be derived from program string */ GLuint source_depth_to_render_target:1; GLuint flat_shade:1; + GLuint linear_color:1; /**< linear interpolation vs perspective interp */ GLuint runtime_check_aads_emit:1; GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */ @@ -241,19 +242,25 @@ struct brw_wm_compile { GLuint max_wm_grf; GLuint last_scratch; + GLuint cur_inst; /**< index of current instruction */ + + GLboolean out_of_regs; /**< ran out of GRF registers? */ + /** Mapping from Mesa registers to hardware registers */ struct { GLboolean inited; struct brw_reg reg; } wm_regs[PROGRAM_PAYLOAD+1][256][4]; + GLboolean used_grf[BRW_WM_MAX_GRF]; + GLuint first_free_grf; struct brw_reg stack; struct brw_reg emit_mask_reg; - GLuint reg_index; /**< Index of next free GRF register */ GLuint tmp_regs[BRW_WM_MAX_GRF]; GLuint tmp_index; GLuint tmp_max; GLuint subroutines[BRW_WM_MAX_SUBROUTINE]; + GLuint dispatch_width; /** we may need up to 3 constants per instruction (if use_const_buffer) */ struct { @@ -286,6 +293,7 @@ void brw_wm_print_program( struct brw_wm_compile *c, void brw_wm_lookup_iz( GLuint line_aa, GLuint lookup, + GLboolean ps_uses_depth, struct brw_wm_prog_key *key ); GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); diff --git a/i965/brw_wm_emit.c b/i965/brw_wm_emit.c index a870e75..268f796 100644 --- a/i965/brw_wm_emit.c +++ b/i965/brw_wm_emit.c @@ -65,8 +65,7 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) static void emit_pixel_xy(struct brw_compile *p, const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) + GLuint mask) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); @@ -98,8 +97,7 @@ static void emit_pixel_xy(struct brw_compile *p, static void emit_delta_xy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1) + const struct brw_reg *arg0) { struct brw_reg r1 = brw_vec1_grf(1, 0); @@ -353,6 +351,19 @@ static void emit_mad( struct brw_compile *p, } } +static void emit_trunc( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_RNDZ(p, dst[i], arg0[i]); + } + } +} static void emit_lrp( struct brw_compile *p, const struct brw_reg *dst, @@ -532,16 +543,18 @@ static void emit_dp3( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); + assert(is_power_of_two(mask & WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MAC(p, dst[0], arg0[2], arg1[2]); + brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]); brw_set_saturate(p, 0); } @@ -552,17 +565,19 @@ static void emit_dp4( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); + assert(is_power_of_two(mask & WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MAC(p, dst[0], arg0[3], arg1[3]); + brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]); brw_set_saturate(p, 0); } @@ -573,17 +588,19 @@ static void emit_dph( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { + const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); + assert(is_power_of_two(mask & WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); - brw_MAC(p, dst[0], arg0[2], arg1[2]); + brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_ADD(p, dst[0], dst[0], arg1[3]); + brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]); brw_set_saturate(p, 0); } @@ -619,18 +636,19 @@ static void emit_math1( struct brw_compile *p, GLuint mask, const struct brw_reg *arg0 ) { + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ - //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X || - // function == BRW_MATH_FUNCTION_SINCOS); - + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + brw_MOV(p, brw_message_reg(2), arg0[0]); /* Send two messages to perform all 16 operations: */ brw_math_16(p, - dst[0], + dst[dst_chan], function, (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 2, @@ -646,10 +664,12 @@ static void emit_math2( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1) { + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); + assert(is_power_of_two(mask & WRITEMASK_XYZW)); brw_push_insn_state(p); @@ -668,7 +688,7 @@ static void emit_math2( struct brw_compile *p, */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math(p, - dst[0], + dst[dst_chan], function, (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 2, @@ -678,7 +698,7 @@ static void emit_math2( struct brw_compile *p, brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_math(p, - offset(dst[0],1), + offset(dst[dst_chan],1), function, (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 4, @@ -701,6 +721,7 @@ static void emit_tex( struct brw_wm_compile *c, GLuint msgLength, responseLength; GLuint i, nr; GLuint emit; + GLuint msg_type; /* How many input regs are there? */ @@ -714,10 +735,14 @@ static void emit_tex( struct brw_wm_compile *c, emit = WRITEMASK_XY; nr = 2; break; - default: + case TEXTURE_3D_INDEX: + case TEXTURE_CUBE_INDEX: emit = WRITEMASK_XYZ; nr = 3; break; + default: + /* unexpected target */ + abort(); } if (inst->tex_shadow) { @@ -738,6 +763,18 @@ static void emit_tex( struct brw_wm_compile *c, responseLength = 8; /* always */ + if (BRW_IS_IGDNG(p->brw)) { + if (inst->tex_shadow) + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG; + } else { + if (inst->tex_shadow) + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; + } + brw_SAMPLE(p, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, @@ -745,12 +782,12 @@ static void emit_tex( struct brw_wm_compile *c, SURF_INDEX_TEXTURE(inst->tex_unit), inst->tex_unit, /* sampler */ inst->writemask, - (inst->tex_shadow ? - BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE : - BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE), + msg_type, responseLength, msgLength, - 0); + 0, + 1, + BRW_SAMPLER_SIMD_MODE_SIMD16); } @@ -762,7 +799,7 @@ static void emit_txb( struct brw_wm_compile *c, { struct brw_compile *p = &c->func; GLuint msgLength; - + GLuint msg_type; /* Shadow ignored for txb. */ switch (inst->tex_idx) { @@ -777,16 +814,25 @@ static void emit_txb( struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(4), arg[1]); brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); break; - default: + case TEXTURE_3D_INDEX: + case TEXTURE_CUBE_INDEX: brw_MOV(p, brw_message_reg(2), arg[0]); brw_MOV(p, brw_message_reg(4), arg[1]); brw_MOV(p, brw_message_reg(6), arg[2]); break; + default: + /* unexpected target */ + abort(); } brw_MOV(p, brw_message_reg(8), arg[3]); msgLength = 9; + if (BRW_IS_IGDNG(p->brw)) + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + brw_SAMPLE(p, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, @@ -794,10 +840,12 @@ static void emit_txb( struct brw_wm_compile *c, SURF_INDEX_TEXTURE(inst->tex_unit), inst->tex_unit, /* sampler */ inst->writemask, - BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, + msg_type, 8, /* responseLength */ msgLength, - 0); + 0, + 1, + BRW_SAMPLER_SIMD_MODE_SIMD16); } @@ -1009,7 +1057,7 @@ static void emit_fb_write( struct brw_wm_compile *c, get_element_ud(brw_vec8_grf(1,0), 6), brw_imm_ud(1<<26)); - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { emit_aa(c, arg1, 2); fire_fb_write(c, 0, nr, target, eot); @@ -1161,11 +1209,11 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* Generated instructions for calculating triangle interpolants: */ case WM_PIXELXY: - emit_pixel_xy(p, dst, dst_flags, args[0]); + emit_pixel_xy(p, dst, dst_flags); break; case WM_DELTAXY: - emit_delta_xy(p, dst, dst_flags, args[0], args[1]); + emit_delta_xy(p, dst, dst_flags, args[0]); break; case WM_WPOSXY: @@ -1222,6 +1270,10 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_dph(p, dst, dst_flags, args[0], args[1]); break; + case OPCODE_TRUNC: + emit_trunc(p, dst, dst_flags, args[0]); + break; + case OPCODE_LRP: emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); break; @@ -1348,4 +1400,13 @@ void brw_wm_emit( struct brw_wm_compile *c ) inst->dst[i]->hw_reg, inst->dst[i]->spill_slot); } + + if (INTEL_DEBUG & DEBUG_WM) { + int i; + + _mesa_printf("wm-native:\n"); + for (i = 0; i < p->nr_insn; i++) + brw_disasm(stderr, &p->store[i]); + _mesa_printf("\n"); + } } diff --git a/i965/brw_wm_fp.c b/i965/brw_wm_fp.c index 49aad28..123fe84 100644 --- a/i965/brw_wm_fp.c +++ b/i965/brw_wm_fp.c @@ -42,6 +42,12 @@ #include "shader/prog_statevars.h" +/** An invalid texture target */ +#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS + +/** An invalid texture unit */ +#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT + #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS #define X 0 @@ -199,6 +205,15 @@ static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c, { struct prog_instruction *inst = get_fp_inst(c); + assert(tex_src_unit < BRW_MAX_TEX_UNIT || + tex_src_unit == TEX_UNIT_NONE); + assert(tex_src_target < NUM_TEXTURE_TARGETS || + tex_src_target == TEX_TARGET_NONE); + + /* update mask of which texture units are referenced by this program */ + if (tex_src_unit != TEX_UNIT_NONE) + c->fp->tex_units_used |= (1 << tex_src_unit); + memset(inst, 0, sizeof(*inst)); inst->Opcode = op; @@ -223,12 +238,45 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c, struct prog_src_register src2 ) { return emit_tex_op(c, op, dest, saturate, - 0, 0, 0, /* tex unit, target, shadow */ + TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */ src0, src1, src2); } - +/* Many Mesa opcodes produce the same value across all the result channels. + * We'd rather not have to support that splatting in the opcode implementations, + * and brw_wm_pass*.c wants to optimize them out by shuffling references around + * anyway. We can easily get both by emitting the opcode to one channel, and + * then MOVing it to the others, which brw_wm_pass*.c already understands. + */ +static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c, + const struct prog_instruction *inst0) +{ + struct prog_instruction *inst; + unsigned int dst_chan; + unsigned int other_channel_mask; + + if (inst0->DstReg.WriteMask == 0) + return NULL; + + dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1; + inst = get_fp_inst(c); + *inst = *inst0; + inst->DstReg.WriteMask = 1 << dst_chan; + + other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan); + if (other_channel_mask != 0) { + inst = emit_op(c, + OPCODE_MOV, + dst_mask(inst0->DstReg, other_channel_mask), + 0, + src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan), + src_undef(), + src_undef()); + } + return inst; +} + /*********************************************************************** * Special instructions for interpolation and other tasks @@ -354,24 +402,28 @@ static void emit_interp( struct brw_wm_compile *c, src_undef()); } else { - emit_op(c, - WM_LINTERP, - dst, - 0, - interp, - deltas, - src_undef()); + if (c->key.linear_color) { + emit_op(c, + WM_LINTERP, + dst, + 0, + interp, + deltas, + src_undef()); + } + else { + /* perspective-corrected color interpolation */ + emit_op(c, + WM_PINTERP, + dst, + 0, + interp, + deltas, + get_pixel_w(c)); + } } break; case FRAG_ATTRIB_FOGC: - /* The FOGC input is really special. When a program uses glFogFragCoord, - * the results returned are supposed to be (f,0,0,1). But for Mesa GLSL, - * the glFrontFacing and glPointCoord values are also stashed in FOGC. - * So, write the interpolated fog value to X, then either 0, 1, or the - * stashed values to Y, Z, W. Note that this means that - * glFogFragCoord.yzw can be wrong in those cases! - */ - /* Interpolate the fog coordinate */ emit_op(c, WM_PINTERP, @@ -381,26 +433,40 @@ static void emit_interp( struct brw_wm_compile *c, deltas, get_pixel_w(c)); - /* Move the front facing value into FOGC.y if it's needed. */ - if (c->fp->program.UsesFrontFacing) { - emit_op(c, - WM_FRONTFACING, - dst_mask(dst, WRITEMASK_Y), - 0, - src_undef(), - src_undef(), - src_undef()); - } else { - emit_op(c, - OPCODE_MOV, - dst_mask(dst, WRITEMASK_Y), - 0, - src_swizzle1(interp, SWIZZLE_ZERO), - src_undef(), - src_undef()); - } + emit_op(c, + OPCODE_MOV, + dst_mask(dst, WRITEMASK_YZW), + 0, + src_swizzle(interp, + SWIZZLE_ZERO, + SWIZZLE_ZERO, + SWIZZLE_ZERO, + SWIZZLE_ONE), + src_undef(), + src_undef()); + break; + + case FRAG_ATTRIB_FACE: + /* XXX review/test this case */ + emit_op(c, + WM_FRONTFACING, + dst_mask(dst, WRITEMASK_X), + 0, + src_undef(), + src_undef(), + src_undef()); + break; + + case FRAG_ATTRIB_PNTC: + /* XXX review/test this case */ + emit_op(c, + WM_PINTERP, + dst_mask(dst, WRITEMASK_XY), + 0, + interp, + deltas, + get_pixel_w(c)); - /* Should do the PointCoord thing here. */ emit_op(c, OPCODE_MOV, dst_mask(dst, WRITEMASK_ZW), @@ -413,6 +479,7 @@ static void emit_interp( struct brw_wm_compile *c, src_undef(), src_undef()); break; + default: emit_op(c, WM_PINTERP, @@ -631,6 +698,8 @@ static void precalc_tex( struct brw_wm_compile *c, struct prog_dst_register tmpcoord; const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + assert(unit < BRW_MAX_TEX_UNIT); + if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) { struct prog_instruction *out; struct prog_dst_register tmp0 = get_temp(c); @@ -671,7 +740,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmp0 = 1 / tmp1 */ emit_op(c, OPCODE_RCP, - tmp0, + dst_mask(tmp0, WRITEMASK_X), 0, tmp1src, src_undef(), @@ -682,7 +751,7 @@ static void precalc_tex( struct brw_wm_compile *c, tmpcoord, 0, src0, - tmp0src, + src_swizzle1(tmp0src, SWIZZLE_X), src_undef()); release_temp(c, tmp0); @@ -705,7 +774,11 @@ static void precalc_tex( struct brw_wm_compile *c, tmpcoord, 0, inst->SrcReg[0], - scale, + src_swizzle(scale, + SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_ONE, + SWIZZLE_ONE), src_undef()); coord = src_reg_from_dst(tmpcoord); @@ -1029,6 +1102,7 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) c->delta_xy = src_undef(); c->pixel_w = src_undef(); c->nr_fp_insns = 0; + c->fp->tex_units_used = 0x0; /* Emit preamble instructions. This is where special instructions such as * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to @@ -1096,6 +1170,7 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) case OPCODE_TXB: out = emit_insn(c, inst); out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT); break; case OPCODE_XPD: @@ -1122,9 +1197,11 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) break; case OPCODE_PRINT: break; - default: - emit_insn(c, inst); + if (brw_wm_is_scalar_result(inst->Opcode)) + emit_scalar_insn(c, inst); + else + emit_insn(c, inst); break; } } diff --git a/i965/brw_wm_glsl.c b/i965/brw_wm_glsl.c index a907e1b..7c210ab 100644 --- a/i965/brw_wm_glsl.c +++ b/i965/brw_wm_glsl.c @@ -1,5 +1,7 @@ #include "main/macros.h" #include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/prog_optimize.h" #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" @@ -8,6 +10,9 @@ enum _subroutine { SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4 }; +static struct brw_reg get_dst_reg(struct brw_wm_compile *c, + const struct prog_instruction *inst, + GLuint component); /** * Determine if the given fragment program uses GLSL features such @@ -20,8 +25,8 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) for (i = 0; i < fp->Base.NumInstructions; i++) { const struct prog_instruction *inst = &fp->Base.Instructions[i]; switch (inst->Opcode) { + case OPCODE_ARL: case OPCODE_IF: - case OPCODE_TRUNC: case OPCODE_ENDIF: case OPCODE_CAL: case OPCODE_BRK: @@ -42,6 +47,83 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) } + +static void +reclaim_temps(struct brw_wm_compile *c); + + +/** Mark GRF register as used. */ +static void +prealloc_grf(struct brw_wm_compile *c, int r) +{ + c->used_grf[r] = GL_TRUE; +} + + +/** Mark given GRF register as not in use. */ +static void +release_grf(struct brw_wm_compile *c, int r) +{ + /*assert(c->used_grf[r]);*/ + c->used_grf[r] = GL_FALSE; + c->first_free_grf = MIN2(c->first_free_grf, r); +} + + +/** Return index of a free GRF, mark it as used. */ +static int +alloc_grf(struct brw_wm_compile *c) +{ + GLuint r; + for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { + if (!c->used_grf[r]) { + c->used_grf[r] = GL_TRUE; + c->first_free_grf = r + 1; /* a guess */ + return r; + } + } + + /* no free temps, try to reclaim some */ + reclaim_temps(c); + c->first_free_grf = 0; + + /* try alloc again */ + for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { + if (!c->used_grf[r]) { + c->used_grf[r] = GL_TRUE; + c->first_free_grf = r + 1; /* a guess */ + return r; + } + } + + for (r = 0; r < BRW_WM_MAX_GRF; r++) { + assert(c->used_grf[r]); + } + + /* really, no free GRF regs found */ + if (!c->out_of_regs) { + /* print warning once per compilation */ + _mesa_warning(NULL, "i965: ran out of registers for fragment program"); + c->out_of_regs = GL_TRUE; + } + + return -1; +} + + +/** Return number of GRF registers used */ +static int +num_grf_used(const struct brw_wm_compile *c) +{ + int r; + for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--) + if (c->used_grf[r]) + return r + 1; + return 0; +} + + + /** * Record the mapping of a Mesa register to a hardware register. */ @@ -52,27 +134,26 @@ static void set_reg(struct brw_wm_compile *c, int file, int index, c->wm_regs[file][index][component].inited = GL_TRUE; } -/** - * Examine instruction's write mask to find index of first component - * enabled for writing. - */ -static int get_scalar_dst_index(const struct prog_instruction *inst) -{ - int i; - for (i = 0; i < 4; i++) - if (inst->DstReg.WriteMask & (1<<i)) - break; - return i; -} - static struct brw_reg alloc_tmp(struct brw_wm_compile *c) { struct brw_reg reg; - if(c->tmp_index == c->tmp_max) - c->tmp_regs[ c->tmp_max++ ] = c->reg_index++; - + + /* if we need to allocate another temp, grow the tmp_regs[] array */ + if (c->tmp_index == c->tmp_max) { + int r = alloc_grf(c); + if (r < 0) { + /*printf("Out of temps in %s\n", __FUNCTION__);*/ + r = 50; /* XXX random register! */ + } + c->tmp_regs[ c->tmp_max++ ] = r; + } + + /* form the GRF register */ reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0); + /*printf("alloc_temp %d\n", reg.nr);*/ + assert(reg.nr < BRW_WM_MAX_GRF); return reg; + } /** @@ -130,35 +211,29 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, return brw_null_reg(); } + assert(index < 256); + assert(component < 4); + /* see if we've already allocated a HW register for this Mesa register */ if (c->wm_regs[file][index][component].inited) { - /* yes, re-use */ - reg = c->wm_regs[file][index][component].reg; + /* yes, re-use */ + reg = c->wm_regs[file][index][component].reg; } else { /* no, allocate new register */ - reg = brw_vec8_grf(c->reg_index, 0); - } + int grf = alloc_grf(c); + /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/ + if (grf < 0) { + /* totally out of temps */ + grf = 51; /* XXX random register! */ + } - /* if this is a new register allocation, record it in the table */ - if (!c->wm_regs[file][index][component].inited) { - set_reg(c, file, index, component, reg); - c->reg_index++; - } + reg = brw_vec8_grf(grf, 0); + /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/ - if (c->reg_index >= BRW_WM_MAX_GRF - 12) { - /* ran out of temporary registers! */ -#if 1 - /* This is a big hack for now. - * Return bad register index, just don't hang the GPU. - */ - _mesa_fprintf(stderr, "out of regs %d\n", c->reg_index); - c->reg_index = BRW_WM_MAX_GRF - 13; -#else - return brw_null_reg(); -#endif + set_reg(c, file, index, component, reg); } - + if (neg & (1 << component)) { reg = negate(reg); } @@ -168,6 +243,46 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, } + +/** + * This is called if we run out of GRF registers. Examine the live intervals + * of temp regs in the program and free those which won't be used again. + */ +static void +reclaim_temps(struct brw_wm_compile *c) +{ + GLint intBegin[MAX_PROGRAM_TEMPS]; + GLint intEnd[MAX_PROGRAM_TEMPS]; + int index; + + /*printf("Reclaim temps:\n");*/ + + _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns, + intBegin, intEnd); + + for (index = 0; index < MAX_PROGRAM_TEMPS; index++) { + if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) { + /* program temp[i] can be freed */ + int component; + /*printf(" temp[%d] is dead\n", index);*/ + for (component = 0; component < 4; component++) { + if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) { + int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr; + release_grf(c, r); + /* + printf(" Reclaim temp %d, reg %d at inst %d\n", + index, r, c->cur_inst); + */ + c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE; + } + } + } + } +} + + + + /** * Preallocate registers. This sets up the Mesa to hardware register * mapping for certain registers, such as constants (uniforms/state vars) @@ -179,6 +294,10 @@ static void prealloc_reg(struct brw_wm_compile *c) struct brw_reg reg; int urb_read_length = 0; GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; + GLuint reg_index = 0; + + memset(c->used_grf, GL_FALSE, sizeof(c->used_grf)); + c->first_free_grf = 0; for (i = 0; i < 4; i++) { if (i < c->key.nr_depth_regs) @@ -187,14 +306,20 @@ static void prealloc_reg(struct brw_wm_compile *c) reg = brw_vec8_grf(0, 0); set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); } - c->reg_index += 2 * c->key.nr_depth_regs; + reg_index += 2 * c->key.nr_depth_regs; /* constants */ { - const int nr_params = c->fp->program.Base.Parameters->NumParameters; + const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters; + const GLuint nr_temps = c->fp->program.Base.NumTemporaries; /* use a real constant buffer, or just use a section of the GRF? */ - c->fp->use_const_buffer = GL_FALSE; /* (nr_params > 8);*/ + /* XXX this heuristic may need adjustment... */ + if ((nr_params + nr_temps) * 4 + reg_index > 80) + c->fp->use_const_buffer = GL_TRUE; + else + c->fp->use_const_buffer = GL_FALSE; + /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/ if (c->fp->use_const_buffer) { /* We'll use a real constant buffer and fetch constants from @@ -216,7 +341,7 @@ static void prealloc_reg(struct brw_wm_compile *c) for (i = 0; i < nr_params; i++) { /* loop over XYZW channels */ for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(c->reg_index + index / 8, index % 8); + reg = brw_vec1_grf(reg_index + index / 8, index % 8); /* Save pointer to parameter/constant value. * Constants will be copied in prepare_constant_buffer() */ @@ -226,7 +351,7 @@ static void prealloc_reg(struct brw_wm_compile *c) } /* number of constant regs used (each reg is float[8]) */ c->nr_creg = 2 * ((4 * nr_params + 15) / 16); - c->reg_index += c->nr_creg; + reg_index += c->nr_creg; } } @@ -242,23 +367,52 @@ static void prealloc_reg(struct brw_wm_compile *c) fp_input = -1; if (fp_input >= 0 && inputs & (1 << fp_input)) { - urb_read_length = c->reg_index; - reg = brw_vec8_grf(c->reg_index, 0); + urb_read_length = reg_index; + reg = brw_vec8_grf(reg_index, 0); for (j = 0; j < 4; j++) set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); } if (c->key.vp_outputs_written & (1 << i)) { - c->reg_index += 2; + reg_index += 2; } } c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; c->prog_data.urb_read_length = urb_read_length; c->prog_data.curb_read_length = c->nr_creg; - c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index++; - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index += 2; + c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); + reg_index++; + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); + reg_index += 2; + + /* mark GRF regs [0..reg_index-1] as in-use */ + for (i = 0; i < reg_index; i++) + prealloc_grf(c, i); + + /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */ + prealloc_grf(c, 126); + prealloc_grf(c, 127); + + for (i = 0; i < c->nr_fp_insns; i++) { + const struct prog_instruction *inst = &c->prog_instructions[i]; + struct brw_reg dst[4]; + + switch (inst->Opcode) { + case OPCODE_TEX: + case OPCODE_TXB: + /* Allocate the channels of texture results contiguously, + * since they are written out that way by the sampler unit. + */ + for (j = 0; j < 4; j++) { + dst[j] = get_dst_reg(c, inst, j); + if (j != 0) + assert(dst[j].nr == dst[j - 1].nr + 1); + } + break; + default: + break; + } + } /* An instruction may reference up to three constants. * They'll be found in these registers. @@ -267,12 +421,12 @@ static void prealloc_reg(struct brw_wm_compile *c) if (c->fp->use_const_buffer) { for (i = 0; i < 3; i++) { c->current_const[i].index = -1; - c->current_const[i].reg = alloc_tmp(c); + c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0); } } #if 0 printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer); - printf("AFTER PRE_ALLOC, reg_index = %d\n", c->reg_index); + printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index); #endif } @@ -376,6 +530,14 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, const GLuint nr = 1; const GLuint component = GET_SWZ(src->Swizzle, channel); + /* Extended swizzle terms */ + if (component == SWIZZLE_ZERO) { + return brw_imm_f(0.0F); + } + else if (component == SWIZZLE_ONE) { + return brw_imm_f(1.0F); + } + if (c->fp->use_const_buffer && (src->File == PROGRAM_STATE_VAR || src->File == PROGRAM_CONSTANT || @@ -489,23 +651,6 @@ static void invoke_subroutine( struct brw_wm_compile *c, } } -static void emit_abs( struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - int i; - struct brw_compile *p = &c->func; - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for (i = 0; i < 4; i++) { - if (inst->DstReg.WriteMask & (1<<i)) { - struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i); - src = get_src_reg(c, inst, 0, i); - brw_MOV(p, dst, brw_abs(src)); - } - } - brw_set_saturate(p, 0); -} - static void emit_trunc( struct brw_wm_compile *c, const struct prog_instruction *inst) { @@ -673,27 +818,26 @@ static void emit_fb_write(struct brw_wm_compile *c, } if (c->key.dest_depth_reg) { - GLuint comp = c->key.dest_depth_reg / 2; - GLuint off = c->key.dest_depth_reg % 2; + const GLuint comp = c->key.dest_depth_reg / 2; + const GLuint off = c->key.dest_depth_reg % 2; - assert(comp == 1); - assert(off == 0); -#if 0 - /* XXX do we need this code? comp always 1, off always 0, it seems */ if (off != 0) { + /* XXX this code needs review/testing */ + struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp); + struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1); + brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1)); + brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1)); /* 2nd half? */ - brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]); + brw_MOV(p, brw_message_reg(nr+1), arg1_1); brw_pop_insn_state(p); } else -#endif { - struct brw_reg src = get_src_reg(c, inst, 1, 1); - brw_MOV(p, brw_message_reg(nr), src); + struct brw_reg src = get_src_reg(c, inst, 1, 1); + brw_MOV(p, brw_message_reg(nr), src); } nr += 2; } @@ -882,12 +1026,20 @@ static void emit_dp3(struct brw_wm_compile *c, struct brw_reg src0[3], src1[3], dst; int i; struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + for (i = 0; i < 3; i++) { src0[i] = get_src_reg(c, inst, 0, i); src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + dst = get_dst_reg(c, inst, dst_chan); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); @@ -901,11 +1053,19 @@ static void emit_dp4(struct brw_wm_compile *c, struct brw_reg src0[4], src1[4], dst; int i; struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + for (i = 0; i < 4; i++) { src0[i] = get_src_reg(c, inst, 0, i); src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + dst = get_dst_reg(c, inst, dst_chan); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, brw_null_reg(), src0[2], src1[2]); @@ -920,11 +1080,19 @@ static void emit_dph(struct brw_wm_compile *c, struct brw_reg src0[4], src1[4], dst; int i; struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + for (i = 0; i < 4; i++) { src0[i] = get_src_reg(c, inst, 0, i); src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + dst = get_dst_reg(c, inst, dst_chan); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, dst, src0[2], src1[2]); @@ -942,37 +1110,28 @@ static void emit_math1(struct brw_wm_compile *c, const struct prog_instruction *inst, GLuint func) { struct brw_compile *p = &c->func; - struct brw_reg src0, dst, tmp; - const int mark = mark_tmps( c ); - int i; + struct brw_reg src0, dst; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; - tmp = alloc_tmp(c); + assert(is_power_of_two(mask & WRITEMASK_XYZW)); /* Get first component of source register */ + dst = get_dst_reg(c, inst, dst_chan); src0 = get_src_reg(c, inst, 0, 0); - /* tmp = func(src0) */ brw_MOV(p, brw_message_reg(2), src0); brw_math(p, - tmp, + dst, func, (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 2, brw_null_reg(), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); - - /*tmp.dw1.bits.swizzle = SWIZZLE_XXXX;*/ - - /* replicate tmp value across enabled dest channels */ - for (i = 0; i < 4; i++) { - if (inst->DstReg.WriteMask & (1 << i)) { - dst = get_dst_reg(c, inst, i); - brw_MOV(p, dst, tmp); - } - } - - release_tmps(c, mark); } static void emit_rcp(struct brw_wm_compile *c, @@ -1043,24 +1202,6 @@ static void emit_arl(struct brw_wm_compile *c, brw_set_saturate(p, 0); } -static void emit_sub(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg(c, inst, 0, i); - src1 = get_src_reg_imm(c, inst, 1, i); - brw_ADD(p, dst, src0, negate(src1)); - } - } - brw_set_saturate(p, 0); -} static void emit_mul(struct brw_wm_compile *c, const struct prog_instruction *inst) @@ -1172,7 +1313,15 @@ static void emit_pow(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; struct brw_reg dst, src0, src1; - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + + dst = get_dst_reg(c, inst, dst_chan); src0 = get_src_reg_imm(c, inst, 0, 0); src1 = get_src_reg_imm(c, inst, 1, 0); @@ -2474,8 +2623,12 @@ static void emit_txb(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; - GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + /* Note: TexSrcUnit was already looked up through SamplerTextures[] */ + const GLuint unit = inst->TexSrcUnit; GLuint i; + GLuint msg_type; + + assert(unit < BRW_MAX_TEX_UNIT); payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); @@ -2496,14 +2649,26 @@ static void emit_txb(struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(3), src[1]); brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); break; - default: + case TEXTURE_3D_INDEX: + case TEXTURE_CUBE_INDEX: brw_MOV(p, brw_message_reg(2), src[0]); brw_MOV(p, brw_message_reg(3), src[1]); brw_MOV(p, brw_message_reg(4), src[2]); break; + default: + /* invalid target */ + abort(); } brw_MOV(p, brw_message_reg(5), src[3]); /* bias */ brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */ + + if (BRW_IS_IGDNG(p->brw)) { + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG; + } else { + /* Does it work well on SIMD8? */ + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + } + brw_SAMPLE(p, retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ 1, /* msg_reg_nr */ @@ -2511,10 +2676,12 @@ static void emit_txb(struct brw_wm_compile *c, SURF_INDEX_TEXTURE(unit), unit, /* sampler */ inst->DstReg.WriteMask, /* writemask */ - BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, /* msg_type */ + msg_type, /* msg_type */ 4, /* response_length */ 4, /* msg_length */ - 0); /* eot */ + 0, /* eot */ + 1, + BRW_SAMPLER_SIMD_MODE_SIMD8); } @@ -2523,11 +2690,15 @@ static void emit_tex(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; - GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + /* Note: TexSrcUnit was already looked up through SamplerTextures[] */ + const GLuint unit = inst->TexSrcUnit; GLuint msg_len; GLuint i, nr; GLuint emit; GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0; + GLuint msg_type; + + assert(unit < BRW_MAX_TEX_UNIT); payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); @@ -2546,10 +2717,14 @@ static void emit_tex(struct brw_wm_compile *c, emit = WRITEMASK_XY; nr = 2; break; - default: + case TEXTURE_3D_INDEX: + case TEXTURE_CUBE_INDEX: emit = WRITEMASK_XYZ; nr = 3; break; + default: + /* invalid target */ + abort(); } msg_len = 1; @@ -2568,6 +2743,16 @@ static void emit_tex(struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */ } + if (BRW_IS_IGDNG(p->brw)) { + if (shadow) + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG; + } else { + /* Does it work for shadow on SIMD8 ? */ + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; + } + brw_SAMPLE(p, retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ 1, /* msg_reg_nr */ @@ -2575,10 +2760,12 @@ static void emit_tex(struct brw_wm_compile *c, SURF_INDEX_TEXTURE(unit), unit, /* sampler */ inst->DstReg.WriteMask, /* writemask */ - BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, /* msg_type */ + msg_type, /* msg_type */ 4, /* response_length */ shadow ? 6 : 4, /* msg_length */ - 0); /* eot */ + 0, /* eot */ + 1, + BRW_SAMPLER_SIMD_MODE_SIMD8); if (shadow) brw_MOV(p, dst[3], brw_imm_f(1.0)); @@ -2595,15 +2782,15 @@ static void post_wm_emit( struct brw_wm_compile *c ) static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) { -#define MAX_IFSN 32 +#define MAX_IF_DEPTH 32 #define MAX_LOOP_DEPTH 32 - struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH]; - struct brw_instruction *inst0, *inst1; - int i, if_insn = 0, loop_insn = 0; + struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; + GLuint i, if_depth = 0, loop_depth = 0; struct brw_compile *p = &c->func; struct brw_indirect stack_index = brw_indirect(0, 0); - c->reg_index = 0; + c->out_of_regs = GL_FALSE; + prealloc_reg(c); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); @@ -2611,6 +2798,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) for (i = 0; i < c->nr_fp_insns; i++) { const struct prog_instruction *inst = &c->prog_instructions[i]; + c->cur_inst = i; + #if 0 _mesa_printf("Inst %d: ", i); _mesa_print_instruction(inst); @@ -2653,18 +2842,12 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case WM_FRONTFACING: emit_frontfacing(c, inst); break; - case OPCODE_ABS: - emit_abs(c, inst); - break; case OPCODE_ADD: emit_add(c, inst); break; case OPCODE_ARL: emit_arl(c, inst); break; - case OPCODE_SUB: - emit_sub(c, inst); - break; case OPCODE_FRC: emit_frc(c, inst); break; @@ -2770,15 +2953,15 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_kil(c); break; case OPCODE_IF: - assert(if_insn < MAX_IFSN); - if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + assert(if_depth < MAX_IF_DEPTH); + if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); break; case OPCODE_ELSE: - if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); break; case OPCODE_ENDIF: - assert(if_insn > 0); - brw_ENDIF(p, if_inst[--if_insn]); + assert(if_depth > 0); + brw_ENDIF(p, if_inst[--if_depth]); break; case OPCODE_BGNSUB: brw_save_label(p, inst->Comment, p->nr_insn); @@ -2812,7 +2995,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) break; case OPCODE_BGNLOOP: /* XXX may need to invalidate the current_constant regs */ - loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8); + loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); break; case OPCODE_BRK: brw_BREAK(p); @@ -2823,25 +3006,34 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_ENDLOOP: - loop_insn--; - inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]); - /* patch all the BREAK instructions from - last BEGINLOOP */ - while (inst0 > loop_inst[loop_insn]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK) { - inst0->bits3.if_else.jump_count = inst1 - inst0 + 1; + { + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + if (BRW_IS_IGDNG(brw)) + br = 2; + + loop_depth--; + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + /* patch all the BREAK/CONT instructions from last BGNLOOP */ + while (inst0 > loop_inst[loop_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); inst0->bits3.if_else.pop_count = 0; - } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { - inst0->bits3.if_else.jump_count = inst1 - inst0; + } + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); inst0->bits3.if_else.pop_count = 0; - } - } - break; + } + } + } + break; default: _mesa_printf("unsupported IR in fragment shader %d\n", inst->Opcode); } + if (inst->CondUpdate) brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); else @@ -2849,13 +3041,14 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) } post_wm_emit(c); - if (c->reg_index >= BRW_WM_MAX_GRF) { - _mesa_problem(NULL, "Ran out of registers in brw_wm_emit_glsl()"); - /* XXX we need to do some proper error recovery here */ + if (INTEL_DEBUG & DEBUG_WM) { + _mesa_printf("wm-native:\n"); + for (i = 0; i < p->nr_insn; i++) + brw_disasm(stderr, &p->store[i]); + _mesa_printf("\n"); } } - /** * Do GPU code generation for shaders that use GLSL features such as * flow control. Other shaders will be compiled with the @@ -2876,6 +3069,6 @@ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) brw_wm_print_program(c, "brw_wm_glsl_emit done"); } - c->prog_data.total_grf = c->reg_index; + c->prog_data.total_grf = num_grf_used(c); c->prog_data.total_scratch = 0; } diff --git a/i965/brw_wm_iz.c b/i965/brw_wm_iz.c index bd60ac9..5e399ac 100644 --- a/i965/brw_wm_iz.c +++ b/i965/brw_wm_iz.c @@ -116,8 +116,13 @@ const struct { { C, 0, 1, 1, 1 } }; +/** + * \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES + * \param lookup bitmask of IZ_* flags + */ void brw_wm_lookup_iz( GLuint line_aa, GLuint lookup, + GLboolean ps_uses_depth, struct brw_wm_prog_key *key ) { GLuint reg = 2; @@ -127,7 +132,7 @@ void brw_wm_lookup_iz( GLuint line_aa, if (lookup & IZ_PS_COMPUTES_DEPTH_BIT) key->computes_depth = 1; - if (wm_iz_table[lookup].sd_present) { + if (wm_iz_table[lookup].sd_present || ps_uses_depth) { key->source_depth_reg = reg; reg += 2; } diff --git a/i965/brw_wm_pass0.c b/i965/brw_wm_pass0.c index 9214276..6279258 100644 --- a/i965/brw_wm_pass0.c +++ b/i965/brw_wm_pass0.c @@ -257,34 +257,6 @@ static void pass0_set_dst( struct brw_wm_compile *c, } -static void pass0_set_dst_scalar( struct brw_wm_compile *c, - struct brw_wm_instruction *out, - const struct prog_instruction *inst, - GLuint writemask ) -{ - if (writemask) { - const struct prog_dst_register *dst = &inst->DstReg; - GLuint i; - - /* Compute only the first (X) value: - */ - out->writemask = WRITEMASK_X; - out->dst[0] = get_value(c); - - /* Update our tracking register file for all the components in - * writemask: - */ - for (i = 0; i < 4; i++) { - if (writemask & (1<<i)) { - pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[0]); - } - } - } - else - out->writemask = 0; -} - - static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c, struct prog_src_register src, GLuint i ) @@ -363,10 +335,7 @@ translate_insn(struct brw_wm_compile *c, /* Dst: */ - if (brw_wm_is_scalar_result(out->opcode)) - pass0_set_dst_scalar(c, out, inst, writemask); - else - pass0_set_dst(c, out, inst, writemask); + pass0_set_dst(c, out, inst, writemask); } diff --git a/i965/brw_wm_pass1.c b/i965/brw_wm_pass1.c index ab9aa2f..3436a24 100644 --- a/i965/brw_wm_pass1.c +++ b/i965/brw_wm_pass1.c @@ -159,6 +159,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case OPCODE_FRC: case OPCODE_MOV: case OPCODE_SWZ: + case OPCODE_TRUNC: read0 = writemask; break; diff --git a/i965/brw_wm_sampler_state.c b/i965/brw_wm_sampler_state.c index 3fc18ff..dff4665 100644 --- a/i965/brw_wm_sampler_state.c +++ b/i965/brw_wm_sampler_state.c @@ -103,6 +103,10 @@ struct wm_sampler_key { GLenum minfilter, magfilter; GLenum comparemode, comparefunc; dri_bo *sdc_bo; + + /** If target is cubemap, take context setting. + */ + GLboolean seamless_cube_map; } sampler[BRW_MAX_TEX_UNIT]; }; @@ -169,30 +173,33 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, } } - if (key->tex_target == GL_TEXTURE_CUBE_MAP && - (key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) { - /* If we're using anything but nearest sampling for a cube map, we - * need to set this wrap mode to avoid GPU lock-ups. - */ - sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; - sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; - sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; - } - else if (key->tex_target == GL_TEXTURE_1D) { + sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); + sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); + sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t); + + /* Cube-maps on 965 and later must use the same wrap mode for all 3 + * coordinate dimensions. Futher, only CUBE and CLAMP are valid. + */ + if (key->tex_target == GL_TEXTURE_CUBE_MAP) { + if (key->seamless_cube_map && + (key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) { + sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; + sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; + sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; + } else { + sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + } + } else if (key->tex_target == GL_TEXTURE_1D) { /* There's a bug in 1D texture sampling - it actually pays * attention to the wrap_t value, though it should not. * Override the wrap_t value here to GL_REPEAT to keep * any nonexistent border pixels from floating in. */ - sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); - sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; } - else { - sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); - sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); - sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t); - } + /* Set shadow function: */ @@ -249,6 +256,9 @@ brw_wm_sampler_populate_key(struct brw_context *brw, entry->tex_target = texObj->Target; + entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP) + ? ctx->Texture.CubeMapSeamless : GL_FALSE; + entry->wrap_r = texObj->WrapR; entry->wrap_s = texObj->WrapS; entry->wrap_t = texObj->WrapT; diff --git a/i965/brw_wm_state.c b/i965/brw_wm_state.c index 67b4117..39f8c6d 100644 --- a/i965/brw_wm_state.c +++ b/i965/brw_wm_state.c @@ -71,7 +71,9 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) key->max_threads = 1; else { /* WM maximum threads is number of EUs times number of threads per EU. */ - if (BRW_IS_G4X(brw)) + if (BRW_IS_IGDNG(brw)) + key->max_threads = 12 * 6; + else if (BRW_IS_G4X(brw)) key->max_threads = 10 * 5; else key->max_threads = 8 * 4; @@ -141,7 +143,11 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */ wm.thread1.depth_coef_urb_read_offset = 1; wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - wm.thread1.binding_table_entry_count = key->nr_surfaces; + + if (BRW_IS_IGDNG(brw)) + wm.thread1.binding_table_entry_count = 0; /* hardware requirement */ + else + wm.thread1.binding_table_entry_count = key->nr_surfaces; if (key->total_scratch != 0) { wm.thread2.scratch_space_base_pointer = @@ -158,7 +164,11 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length; wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - wm.wm4.sampler_count = (key->sampler_count + 1) / 4; + if (BRW_IS_IGDNG(brw)) + wm.wm4.sampler_count = 0; /* hardware requirement */ + else + wm.wm4.sampler_count = (key->sampler_count + 1) / 4; + if (brw->wm.sampler_bo != NULL) { /* reloc */ wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5; diff --git a/i965/brw_wm_surface_state.c b/i965/brw_wm_surface_state.c index 805df8a..3dcc592 100644 --- a/i965/brw_wm_surface_state.c +++ b/i965/brw_wm_surface_state.c @@ -176,22 +176,6 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, } } - -/** - * Use same key for WM and VS surfaces. - */ -struct brw_surface_key { - GLenum target, depthmode; - dri_bo *bo; - GLint format, internal_format; - GLint first_level, last_level; - GLint width, height, depth; - GLint pitch, cpp; - uint32_t tiling; - GLuint offset; -}; - - static void brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) { @@ -268,7 +252,7 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.cube_neg_z = 1; } - bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), &key->bo, key->bo ? 1 : 0, &surf, sizeof(surf), @@ -321,10 +305,11 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.tiling = intelObj->mt->region->tiling; dri_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); + brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); if (brw->wm.surf_bo[surf] == NULL) { brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key); } @@ -336,7 +321,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) * Create the constant buffer surface. Vertex/fragment shader constants will be * read from this buffer with Data Port Read instructions/messages. */ -static dri_bo * +dri_bo * brw_create_constant_surface( struct brw_context *brw, struct brw_surface_key *key ) { @@ -362,7 +347,7 @@ brw_create_constant_surface( struct brw_context *brw, surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */ brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ - bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), &key->bo, key->bo ? 1 : 0, &surf, sizeof(surf), @@ -380,39 +365,70 @@ brw_create_constant_surface( struct brw_context *brw, return bo; } +/* Creates a new WM constant buffer reflecting the current fragment program's + * constants, if needed by the fragment program. + * + * Otherwise, constants go through the CURBEs using the brw_constant_buffer + * state atom. + */ +static drm_intel_bo * +brw_wm_update_constant_buffer(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + const struct gl_program_parameter_list *params = fp->program.Base.Parameters; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + drm_intel_bo *const_buffer; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + if (!fp->use_const_buffer) + return NULL; + + const_buffer = drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", + size, 64); + + /* _NEW_PROGRAM_CONSTANTS */ + dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); + + return const_buffer; +} /** * Update the surface state for a WM constant buffer. * The constant buffer will be (re)allocated here if needed. */ -static dri_bo * +static void brw_update_wm_constant_surface( GLcontext *ctx, - GLuint surf, - dri_bo *const_buffer, - const struct gl_program_parameter_list *params) + GLuint surf) { struct brw_context *brw = brw_context(ctx); struct brw_surface_key key; - struct intel_context *intel = &brw->intel; - const int size = params->NumParameters * 4 * sizeof(GLfloat); + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + const struct gl_program_parameter_list *params = + fp->program.Base.Parameters; - /* free old const buffer if too small */ - if (const_buffer && const_buffer->size < size) { - dri_bo_unreference(const_buffer); - const_buffer = NULL; - } + /* If we're in this state update atom, we need to update WM constants, so + * free the old buffer and create a new one for the new contents. + */ + dri_bo_unreference(fp->const_buffer); + fp->const_buffer = brw_wm_update_constant_buffer(brw); - /* alloc new buffer if needed */ - if (!const_buffer) { - const_buffer = - drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", size, 64); + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (fp->const_buffer == 0) { + drm_intel_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; + return; } memset(&key, 0, sizeof(key)); key.format = MESA_FORMAT_RGBA_FLOAT32; key.internal_format = GL_RGBA; - key.bo = const_buffer; + key.bo = fp->const_buffer; key.depthmode = GL_NONE; key.pitch = params->NumParameters; key.width = params->NumParameters; @@ -427,77 +443,59 @@ brw_update_wm_constant_surface( GLcontext *ctx, */ dri_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, &key, sizeof(key), &key.bo, key.bo ? 1 : 0, NULL); if (brw->wm.surf_bo[surf] == NULL) { brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key); } - - return const_buffer; + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; } - /** - * Update the surface state for a VS constant buffer. - * The constant buffer will be (re)allocated here if needed. + * Updates surface / buffer for fragment shader constant buffer, if + * one is required. + * + * This consumes the state updates for the constant buffer, and produces + * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for + * inclusion in the binding table. */ -static dri_bo * -brw_update_vs_constant_surface( GLcontext *ctx, - GLuint surf, - dri_bo *const_buffer, - const struct gl_program_parameter_list *params) +static void prepare_wm_constant_surface(struct brw_context *brw ) { - struct brw_context *brw = brw_context(ctx); - struct brw_surface_key key; - struct intel_context *intel = &brw->intel; - const int size = params->NumParameters * 4 * sizeof(GLfloat); - - assert(surf == 0); - - /* free old const buffer if too small */ - if (const_buffer && const_buffer->size < size) { - dri_bo_unreference(const_buffer); - const_buffer = NULL; - } - - /* alloc new buffer if needed */ - if (!const_buffer) { - const_buffer = - drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64); - } - - memset(&key, 0, sizeof(key)); - - key.format = MESA_FORMAT_RGBA_FLOAT32; - key.internal_format = GL_RGBA; - key.bo = const_buffer; - key.depthmode = GL_NONE; - key.pitch = params->NumParameters; - key.width = params->NumParameters; - key.height = 1; - key.depth = 1; - key.cpp = 16; + GLcontext *ctx = &brw->intel.ctx; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; - /* - printf("%s:\n", __FUNCTION__); - printf(" width %d height %d depth %d cpp %d pitch %d\n", - key.width, key.height, key.depth, key.cpp, key.pitch); - */ + drm_intel_bo_unreference(fp->const_buffer); + fp->const_buffer = brw_wm_update_constant_buffer(brw); - dri_bo_unreference(brw->vs.surf_bo[surf]); - brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); - if (brw->vs.surf_bo[surf] == NULL) { - brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (fp->const_buffer == 0) { + if (brw->wm.surf_bo[surf] != NULL) { + drm_intel_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; + } + return; } - return const_buffer; + brw_update_wm_constant_surface(ctx, surf); } +const struct brw_tracked_state brw_wm_constant_surface = { + .dirty = { + .mesa = (_NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_FRAGMENT_PROGRAM), + .cache = 0 + }, + .prepare = prepare_wm_constant_surface, +}; + /** * Sets up a surface state structure to point at the given region. @@ -507,7 +505,7 @@ brw_update_vs_constant_surface( GLcontext *ctx, static void brw_update_renderbuffer_surface(struct brw_context *brw, struct gl_renderbuffer *rb, - unsigned int unit, GLboolean cached) + unsigned int unit) { GLcontext *ctx = &brw->intel.ctx; dri_bo *region_bo = NULL; @@ -567,12 +565,11 @@ brw_update_renderbuffer_surface(struct brw_context *brw, ctx->Color.BlendEnabled); dri_bo_unreference(brw->wm.surf_bo[unit]); - brw->wm.surf_bo[unit] = NULL; - if (cached) - brw->wm.surf_bo[unit] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - ®ion_bo, 1, - NULL); + brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + ®ion_bo, 1, + NULL); if (brw->wm.surf_bo[unit] == NULL) { struct brw_surface_state surf; @@ -581,7 +578,27 @@ brw_update_renderbuffer_surface(struct brw_context *brw, surf.ss0.surface_format = key.surface_format; surf.ss0.surface_type = key.surface_type; - surf.ss1.base_addr = key.draw_offset; + if (key.tiling == I915_TILING_NONE) { + surf.ss1.base_addr = key.draw_offset; + } else { + uint32_t tile_offset = key.draw_offset % 4096; + + surf.ss1.base_addr = key.draw_offset - tile_offset; + + assert(BRW_IS_G4X(brw) || tile_offset == 0); + if (BRW_IS_G4X(brw)) { + if (key.tiling == I915_TILING_X) { + /* Note that the low bits of these fields are missing, so + * there's the possibility of getting in trouble. + */ + surf.ss5.x_offset = (tile_offset % 512) / key.cpp / 4; + surf.ss5.y_offset = tile_offset / 512 / 2; + } else { + surf.ss5.x_offset = (tile_offset % 128) / key.cpp / 4; + surf.ss5.y_offset = tile_offset / 128 / 2; + } + } + } if (region_bo != NULL) surf.ss1.base_addr += region_bo->offset; /* reloc */ @@ -598,7 +615,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, surf.ss0.writedisable_alpha = !key.color_mask[3]; /* Key size will never match key size for textures, so we're safe. */ - brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache, + BRW_SS_SURFACE, &key, sizeof(key), ®ion_bo, 1, &surf, sizeof(surf), @@ -611,7 +629,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit], offsetof(struct brw_surface_state, ss1), region_bo, - key.draw_offset, + surf.ss1.base_addr - region_bo->offset, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); } @@ -630,7 +648,7 @@ brw_wm_get_binding_table(struct brw_context *brw) assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); - bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, + bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, NULL); @@ -646,7 +664,7 @@ brw_wm_get_binding_table(struct brw_context *brw) else data[i] = 0; - bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, + bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, data, data_size, @@ -682,27 +700,17 @@ static void prepare_wm_surfaces(struct brw_context *brw ) for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { brw_update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i], - i, - GL_FALSE); + i); } } else { - brw_update_renderbuffer_surface(brw, NULL, 0, GL_TRUE); + brw_update_renderbuffer_surface(brw, NULL, 0); } old_nr_surfaces = brw->wm.nr_surfaces; brw->wm.nr_surfaces = MAX_DRAW_BUFFERS; - /* Update surface / buffer for fragment shader constant buffer */ - { - const GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; - struct brw_fragment_program *fp = - (struct brw_fragment_program *) brw->fragment_program; - fp->const_buffer = - brw_update_wm_constant_surface(ctx, surf, fp->const_buffer, - fp->program.Base.Parameters); - - brw->wm.nr_surfaces = surf + 1; - } + if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL) + brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1; /* Update surfaces for textures */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { @@ -735,100 +743,16 @@ static void prepare_wm_surfaces(struct brw_context *brw ) brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; } - -/** - * Constructs the binding table for the VS surface state. - */ -static dri_bo * -brw_vs_get_binding_table(struct brw_context *brw) -{ - dri_bo *bind_bo; - - assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF); - - bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->vs.surf_bo, brw->vs.nr_surfaces, - NULL); - - if (bind_bo == NULL) { - GLuint data_size = brw->vs.nr_surfaces * sizeof(GLuint); - uint32_t *data = malloc(data_size); - int i; - - for (i = 0; i < brw->vs.nr_surfaces; i++) - if (brw->vs.surf_bo[i]) - data[i] = brw->vs.surf_bo[i]->offset; - else - data[i] = 0; - - bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->vs.surf_bo, brw->vs.nr_surfaces, - data, data_size, - NULL, NULL); - - /* Emit binding table relocations to surface state */ - for (i = 0; i < BRW_VS_MAX_SURF; i++) { - if (brw->vs.surf_bo[i] != NULL) { - dri_bo_emit_reloc(bind_bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 0, - i * sizeof(GLuint), - brw->vs.surf_bo[i]); - } - } - - free(data); - } - - return bind_bo; -} - - -/** - * Vertex shader surfaces. Just constant buffer for now. Could add vertex - * shader textures in the future. - */ -static void prepare_vs_surfaces(struct brw_context *brw ) -{ - GLcontext *ctx = &brw->intel.ctx; - - /* Update surface / buffer for vertex shader constant buffer */ - { - const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER; - struct brw_vertex_program *vp = - (struct brw_vertex_program *) brw->vertex_program; - vp->const_buffer = - brw_update_vs_constant_surface(ctx, surf, vp->const_buffer, - vp->program.Base.Parameters); - - brw->vs.nr_surfaces = 1; - } - - dri_bo_unreference(brw->vs.bind_bo); - brw->vs.bind_bo = brw_vs_get_binding_table(brw); - - if (1) - brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; -} - - -static void -prepare_surfaces(struct brw_context *brw) -{ - prepare_wm_surfaces(brw); - prepare_vs_surfaces(brw); -} - - const struct brw_tracked_state brw_wm_surfaces = { .dirty = { - .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM, - .brw = BRW_NEW_CONTEXT, + .mesa = (_NEW_COLOR | + _NEW_TEXTURE | + _NEW_BUFFERS), + .brw = (BRW_NEW_CONTEXT | + BRW_NEW_WM_SURFACES), .cache = 0 }, - .prepare = prepare_surfaces, + .prepare = prepare_wm_surfaces, }; diff --git a/shared/intel_batchbuffer.c b/shared/intel_batchbuffer.c index 29dc05c..6aa36d1 100644 --- a/shared/intel_batchbuffer.c +++ b/shared/intel_batchbuffer.c @@ -195,7 +195,16 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, { struct intel_context *intel = batch->intel; GLuint used = batch->ptr - batch->map; - GLboolean was_locked = intel->locked; + + if (intel->first_post_swapbuffers_batch == NULL) { + intel->first_post_swapbuffers_batch = intel->batch->buf; + drm_intel_bo_reference(intel->first_post_swapbuffers_batch); + } + + if (intel->first_post_swapbuffers_batch == NULL) { + intel->first_post_swapbuffers_batch = intel->batch->buf; + drm_intel_bo_reference(intel->first_post_swapbuffers_batch); + } if (used == 0) { batch->cliprect_mode = IGNORE_CLIPRECTS; @@ -243,13 +252,9 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, /* TODO: Just pass the relocation list and dma buffer up to the * kernel. */ - if (!was_locked) - LOCK_HARDWARE(intel); - + LOCK_HARDWARE(intel); do_flush_locked(batch, used, GL_FALSE); - - if (!was_locked) - UNLOCK_HARDWARE(intel); + UNLOCK_HARDWARE(intel); if (INTEL_DEBUG & DEBUG_SYNC) { fprintf(stderr, "waiting for idle\n"); diff --git a/shared/intel_blit.c b/shared/intel_blit.c index 4919828..0c5be4c 100644 --- a/shared/intel_blit.c +++ b/shared/intel_blit.c @@ -108,6 +108,8 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; } + assert(src->tiling != I915_TILING_Y); + assert(dst->tiling != I915_TILING_Y); #ifndef I915 if (src->tiling != I915_TILING_NONE) { CMD |= XY_SRC_TILED; @@ -175,66 +177,6 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, UNLOCK_HARDWARE(intel); } - - - -void -intelEmitFillBlit(struct intel_context *intel, - GLuint cpp, - GLshort dst_pitch, - dri_bo *dst_buffer, - GLuint dst_offset, - uint32_t dst_tiling, - GLshort x, GLshort y, - GLshort w, GLshort h, - GLuint color) -{ - GLuint BR13, CMD; - BATCH_LOCALS; - - dst_pitch *= cpp; - - switch (cpp) { - case 1: - BR13 = (0xF0 << 16); - CMD = XY_COLOR_BLT_CMD; - break; - case 2: - BR13 = (0xF0 << 16) | BR13_565; - CMD = XY_COLOR_BLT_CMD; - break; - case 4: - BR13 = (0xF0 << 16) | BR13_8888; - CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; - break; - default: - return; - } -#ifndef I915 - if (dst_tiling != I915_TILING_NONE) { - CMD |= XY_DST_TILED; - dst_pitch /= 4; - } -#endif - - DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", - __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h); - - assert(w > 0); - assert(h > 0); - - BEGIN_BATCH(6, NO_LOOP_CLIPRECTS); - OUT_BATCH(CMD); - OUT_BATCH(BR13 | dst_pitch); - OUT_BATCH((y << 16) | x); - OUT_BATCH(((y + h) << 16) | (x + w)); - OUT_RELOC(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - dst_offset); - OUT_BATCH(color); - ADVANCE_BATCH(); -} - static GLuint translate_raster_op(GLenum logicop) { switch(logicop) { @@ -261,7 +203,7 @@ static GLuint translate_raster_op(GLenum logicop) /* Copy BitBlt */ -void +GLboolean intelEmitCopyBlit(struct intel_context *intel, GLuint cpp, GLshort src_pitch, @@ -283,6 +225,19 @@ intelEmitCopyBlit(struct intel_context *intel, dri_bo *aper_array[3]; BATCH_LOCALS; + if (dst_tiling != I915_TILING_NONE) { + if (dst_offset & 4095) + return GL_FALSE; + if (dst_tiling == I915_TILING_Y) + return GL_FALSE; + } + if (src_tiling != I915_TILING_NONE) { + if (src_offset & 4095) + return GL_FALSE; + if (src_tiling == I915_TILING_Y) + return GL_FALSE; + } + /* do space/cliprects check before going any further */ do { aper_array[0] = intel->batch->buf; @@ -297,12 +252,7 @@ intelEmitCopyBlit(struct intel_context *intel, } while (pass < 2); if (pass >= 2) { - GLboolean locked = GL_FALSE; - if (!intel->locked) { - LOCK_HARDWARE(intel); - locked = GL_TRUE; - } - + LOCK_HARDWARE(intel); dri_bo_map(dst_buffer, GL_TRUE); dri_bo_map(src_buffer, GL_FALSE); _mesa_copy_rect((GLubyte *)dst_buffer->virtual + dst_offset, @@ -316,11 +266,9 @@ intelEmitCopyBlit(struct intel_context *intel, dri_bo_unmap(src_buffer); dri_bo_unmap(dst_buffer); - - if (locked) - UNLOCK_HARDWARE(intel); + UNLOCK_HARDWARE(intel); - return; + return GL_TRUE; } intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS); @@ -347,7 +295,7 @@ intelEmitCopyBlit(struct intel_context *intel, CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; break; default: - return; + return GL_FALSE; } #ifndef I915 @@ -362,7 +310,7 @@ intelEmitCopyBlit(struct intel_context *intel, #endif if (dst_y2 <= dst_y || dst_x2 <= dst_x) { - return; + return GL_TRUE; } assert(dst_x < dst_x2); @@ -384,6 +332,8 @@ intelEmitCopyBlit(struct intel_context *intel, ADVANCE_BATCH(); intel_batchbuffer_emit_mi_flush(intel->batch); + + return GL_TRUE; } @@ -527,6 +477,8 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) BR13 |= BR13_565; } + assert(irb->region->tiling != I915_TILING_Y); + #ifndef I915 if (irb->region->tiling != I915_TILING_NONE) { CMD |= XY_DST_TILED; @@ -596,7 +548,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) UNLOCK_HARDWARE(intel); } -void +GLboolean intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLuint cpp, GLubyte *src_bits, GLuint src_size, @@ -612,11 +564,19 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, int dwords = ALIGN(src_size, 8) / 4; uint32_t opcode, br13, blit_cmd; + if (dst_tiling != I915_TILING_NONE) { + if (dst_offset & 4095) + return GL_FALSE; + if (dst_tiling == I915_TILING_Y) + return GL_FALSE; + } + assert( logic_op - GL_CLEAR >= 0 ); assert( logic_op - GL_CLEAR < 0x10 ); + assert(dst_pitch > 0); if (w < 0 || h < 0) - return; + return GL_TRUE; dst_pitch *= cpp; @@ -673,4 +633,46 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, REFERENCES_CLIPRECTS ); intel_batchbuffer_emit_mi_flush(intel->batch); + + return GL_TRUE; +} + +/* We don't have a memmove-type blit like some other hardware, so we'll do a + * rectangular blit covering a large space, then emit 1-scanline blit at the + * end to cover the last if we need. + */ +void +intel_emit_linear_blit(struct intel_context *intel, + drm_intel_bo *dst_bo, + unsigned int dst_offset, + drm_intel_bo *src_bo, + unsigned int src_offset, + unsigned int size) +{ + GLuint pitch, height; + + /* The pitch is a signed value. */ + pitch = MIN2(size, (1 << 15) - 1); + height = size / pitch; + intelEmitCopyBlit(intel, 1, + pitch, src_bo, src_offset, I915_TILING_NONE, + pitch, dst_bo, dst_offset, I915_TILING_NONE, + 0, 0, /* src x/y */ + 0, 0, /* dst x/y */ + pitch, height, /* w, h */ + GL_COPY); + + src_offset += pitch * height; + dst_offset += pitch * height; + size -= pitch * height; + assert (size < (1 << 15)); + if (size != 0) { + intelEmitCopyBlit(intel, 1, + size, src_bo, src_offset, I915_TILING_NONE, + size, dst_bo, dst_offset, I915_TILING_NONE, + 0, 0, /* src x/y */ + 0, 0, /* dst x/y */ + size, 1, /* w, h */ + GL_COPY); + } } diff --git a/shared/intel_blit.h b/shared/intel_blit.h index 52065b1..240cb7c 100644 --- a/shared/intel_blit.h +++ b/shared/intel_blit.h @@ -35,7 +35,8 @@ extern void intelCopyBuffer(const __DRIdrawablePrivate * dpriv, extern void intelClearWithBlit(GLcontext * ctx, GLbitfield mask); -extern void intelEmitCopyBlit(struct intel_context *intel, +GLboolean +intelEmitCopyBlit(struct intel_context *intel, GLuint cpp, GLshort src_pitch, dri_bo *src_buffer, @@ -50,16 +51,7 @@ extern void intelEmitCopyBlit(struct intel_context *intel, GLshort w, GLshort h, GLenum logicop ); -extern void intelEmitFillBlit(struct intel_context *intel, - GLuint cpp, - GLshort dst_pitch, - dri_bo *dst_buffer, - GLuint dst_offset, - uint32_t dst_tiling, - GLshort x, GLshort y, - GLshort w, GLshort h, GLuint color); - -void +GLboolean intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLuint cpp, GLubyte *src_bits, GLuint src_size, @@ -71,5 +63,11 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLshort x, GLshort y, GLshort w, GLshort h, GLenum logic_op); +void intel_emit_linear_blit(struct intel_context *intel, + drm_intel_bo *dst_bo, + unsigned int dst_offset, + drm_intel_bo *src_bo, + unsigned int src_offset, + unsigned int size); #endif diff --git a/shared/intel_buffer_objects.c b/shared/intel_buffer_objects.c index 2e6b778..a022593 100644 --- a/shared/intel_buffer_objects.c +++ b/shared/intel_buffer_objects.c @@ -28,13 +28,18 @@ #include "main/imports.h" #include "main/mtypes.h" +#include "main/macros.h" #include "main/bufferobj.h" #include "intel_context.h" +#include "intel_blit.h" #include "intel_buffer_objects.h" #include "intel_batchbuffer.h" #include "intel_regions.h" +static GLboolean +intel_bufferobj_unmap(GLcontext * ctx, + GLenum target, struct gl_buffer_object *obj); /** Allocates a new dri_bo to store the data for the buffer object. */ static void @@ -100,7 +105,13 @@ intel_bufferobj_free(GLcontext * ctx, struct gl_buffer_object *obj) struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); - assert(!obj->Pointer); /* Mesa should have unmapped it */ + + /* Buffer objects are automatically unmapped when deleting according + * to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy + * (though it does if you call glDeleteBuffers) + */ + if (obj->Pointer) + intel_bufferobj_unmap(ctx, 0, obj); _mesa_free(intel_obj->sys_buffer); if (intel_obj->region) { @@ -119,9 +130,10 @@ intel_bufferobj_free(GLcontext * ctx, struct gl_buffer_object *obj) * Allocate space for and store data in a buffer object. Any data that was * previously stored in the buffer object is lost. If data is NULL, * memory will be allocated, but no copy will occur. - * Called via glBufferDataARB(). + * Called via ctx->Driver.BufferData(). + * \return GL_TRUE for success, GL_FALSE if out of memory */ -static void +static GLboolean intel_bufferobj_data(GLcontext * ctx, GLenum target, GLsizeiptrARB size, @@ -156,15 +168,19 @@ intel_bufferobj_data(GLcontext * ctx, if (intel_obj->sys_buffer != NULL) { if (data != NULL) memcpy(intel_obj->sys_buffer, data, size); - return; + return GL_TRUE; } } #endif intel_bufferobj_alloc_buffer(intel, intel_obj); + if (!intel_obj->buffer) + return GL_FALSE; if (data != NULL) dri_bo_subdata(intel_obj->buffer, 0, size, data); } + + return GL_TRUE; } @@ -191,8 +207,12 @@ intel_bufferobj_subdata(GLcontext * ctx, if (intel_obj->sys_buffer) memcpy((char *)intel_obj->sys_buffer + offset, data, size); - else + else { + /* Flush any existing batchbuffer that might reference this data. */ + intelFlush(ctx); + dri_bo_subdata(intel_obj->buffer, offset, size, data); + } } @@ -209,7 +229,10 @@ intel_bufferobj_get_subdata(GLcontext * ctx, struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); - dri_bo_get_subdata(intel_obj->buffer, offset, size, data); + if (intel_obj->sys_buffer) + memcpy(data, (char *)intel_obj->sys_buffer + offset, size); + else + dri_bo_get_subdata(intel_obj->buffer, offset, size, data); } @@ -234,6 +257,11 @@ intel_bufferobj_map(GLcontext * ctx, return obj->Pointer; } + /* Flush any existing batchbuffer that might have written to this + * buffer. + */ + intelFlush(ctx); + if (intel_obj->region) intel_bufferobj_cow(intel, intel_obj); @@ -251,32 +279,205 @@ intel_bufferobj_map(GLcontext * ctx, } obj->Pointer = intel_obj->buffer->virtual; + obj->Length = obj->Size; + obj->Offset = 0; + return obj->Pointer; } +/** + * Called via glMapBufferRange(). + * + * The goal of this extension is to allow apps to accumulate their rendering + * at the same time as they accumulate their buffer object. Without it, + * you'd end up blocking on execution of rendering every time you mapped + * the buffer to put new data in. + * + * We support it in 3 ways: If unsynchronized, then don't bother + * flushing the batchbuffer before mapping the buffer, which can save blocking + * in many cases. If we would still block, and they allow the whole buffer + * to be invalidated, then just allocate a new buffer to replace the old one. + * If not, and we'd block, and they allow the subrange of the buffer to be + * invalidated, then we can make a new little BO, let them write into that, + * and blit it into the real BO at unmap time. + */ +static void * +intel_bufferobj_map_range(GLcontext * ctx, + GLenum target, GLintptr offset, GLsizeiptr length, + GLbitfield access, struct gl_buffer_object *obj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + assert(intel_obj); + + /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also + * internally uses our functions directly. + */ + obj->Offset = offset; + obj->Length = length; + obj->AccessFlags = access; + + if (intel_obj->sys_buffer) { + obj->Pointer = intel_obj->sys_buffer + offset; + return obj->Pointer; + } + + if (intel_obj->region) + intel_bufferobj_cow(intel, intel_obj); + + /* If the mapping is synchronized with other GL operations, flush + * the batchbuffer so that GEM knows about the buffer access for later + * syncing. + */ + if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) + intelFlush(ctx); + + if (intel_obj->buffer == NULL) { + obj->Pointer = NULL; + return NULL; + } + + /* If the user doesn't care about existing buffer contents and mapping + * would cause us to block, then throw out the old buffer. + */ + if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) && + (access & GL_MAP_INVALIDATE_BUFFER_BIT) && + drm_intel_bo_busy(intel_obj->buffer)) { + drm_intel_bo_unreference(intel_obj->buffer); + intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj", + intel_obj->Base.Size, 64); + } + + /* If the user is mapping a range of an active buffer object but + * doesn't require the current contents of that range, make a new + * BO, and we'll copy what they put in there out at unmap or + * FlushRange time. + */ + if ((access & GL_MAP_INVALIDATE_RANGE_BIT) && + drm_intel_bo_busy(intel_obj->buffer)) { + if (access & GL_MAP_FLUSH_EXPLICIT_BIT) { + intel_obj->range_map_buffer = _mesa_malloc(length); + obj->Pointer = intel_obj->range_map_buffer; + } else { + intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr, + "range map", + length, 64); + if (!(access & GL_MAP_READ_BIT) && + intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo); + intel_obj->mapped_gtt = GL_TRUE; + } else { + drm_intel_bo_map(intel_obj->range_map_bo, + (access & GL_MAP_WRITE_BIT) != 0); + intel_obj->mapped_gtt = GL_FALSE; + } + obj->Pointer = intel_obj->range_map_bo->virtual; + } + return obj->Pointer; + } + + if (!(access & GL_MAP_READ_BIT) && + intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(intel_obj->buffer); + intel_obj->mapped_gtt = GL_TRUE; + } else { + drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0); + intel_obj->mapped_gtt = GL_FALSE; + } + + obj->Pointer = intel_obj->buffer->virtual + offset; + return obj->Pointer; +} + +/* Ideally we'd use a BO to avoid taking up cache space for the temporary + * data, but FlushMappedBufferRange may be followed by further writes to + * the pointer, so we would have to re-map after emitting our blit, which + * would defeat the point. + */ +static void +intel_bufferobj_flush_mapped_range(GLcontext *ctx, GLenum target, + GLintptr offset, GLsizeiptr length, + struct gl_buffer_object *obj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + drm_intel_bo *temp_bo; + + /* Unless we're in the range map using a temporary system buffer, + * there's no work to do. + */ + if (intel_obj->range_map_buffer == NULL) + return; + + temp_bo = drm_intel_bo_alloc(intel->bufmgr, "range map flush", length, 64); + + drm_intel_bo_subdata(temp_bo, 0, length, intel_obj->range_map_buffer); + + intel_emit_linear_blit(intel, + intel_obj->buffer, obj->Offset + offset, + temp_bo, 0, + length); + + drm_intel_bo_unreference(temp_bo); +} + /** - * Called via glMapBufferARB(). + * Called via glUnmapBuffer(). */ static GLboolean intel_bufferobj_unmap(GLcontext * ctx, GLenum target, struct gl_buffer_object *obj) { + struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); + assert(obj->Pointer); if (intel_obj->sys_buffer != NULL) { - assert(obj->Pointer); - obj->Pointer = NULL; + /* always keep the mapping around. */ + } else if (intel_obj->range_map_buffer != NULL) { + /* Since we've emitted some blits to buffers that will (likely) be used + * in rendering operations in other cache domains in this batch, emit a + * flush. Once again, we wish for a domain tracker in libdrm to cover + * usage inside of a batchbuffer. + */ + intel_batchbuffer_emit_mi_flush(intel->batch); + free(intel_obj->range_map_buffer); + intel_obj->range_map_buffer = NULL; + } else if (intel_obj->range_map_bo != NULL) { + if (intel_obj->mapped_gtt) { + drm_intel_gem_bo_unmap_gtt(intel_obj->range_map_bo); + } else { + drm_intel_bo_unmap(intel_obj->range_map_bo); + } + + intel_emit_linear_blit(intel, + intel_obj->buffer, obj->Offset, + intel_obj->range_map_bo, 0, + obj->Length); + + /* Since we've emitted some blits to buffers that will (likely) be used + * in rendering operations in other cache domains in this batch, emit a + * flush. Once again, we wish for a domain tracker in libdrm to cover + * usage inside of a batchbuffer. + */ + intel_batchbuffer_emit_mi_flush(intel->batch); + + drm_intel_bo_unreference(intel_obj->range_map_bo); + intel_obj->range_map_bo = NULL; } else if (intel_obj->buffer != NULL) { - assert(obj->Pointer); if (intel_obj->mapped_gtt) { drm_intel_gem_bo_unmap_gtt(intel_obj->buffer); } else { drm_intel_bo_unmap(intel_obj->buffer); } - obj->Pointer = NULL; } + obj->Pointer = NULL; + obj->Offset = 0; + obj->Length = 0; + return GL_TRUE; } @@ -294,30 +495,94 @@ intel_bufferobj_buffer(struct intel_context *intel, } if (intel_obj->buffer == NULL) { + void *sys_buffer = intel_obj->sys_buffer; + + /* only one of buffer and sys_buffer could be non-NULL */ intel_bufferobj_alloc_buffer(intel, intel_obj); + intel_obj->sys_buffer = NULL; + intel_bufferobj_subdata(&intel->ctx, GL_ARRAY_BUFFER_ARB, 0, intel_obj->Base.Size, - intel_obj->sys_buffer, + sys_buffer, &intel_obj->Base); - _mesa_free(intel_obj->sys_buffer); + _mesa_free(sys_buffer); intel_obj->sys_buffer = NULL; } return intel_obj->buffer; } +static void +intel_bufferobj_copy_subdata(GLcontext *ctx, + struct gl_buffer_object *src, + struct gl_buffer_object *dst, + GLintptr read_offset, GLintptr write_offset, + GLsizeiptr size) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_src = intel_buffer_object(src); + struct intel_buffer_object *intel_dst = intel_buffer_object(dst); + drm_intel_bo *src_bo, *dst_bo; + + if (size == 0) + return; + + /* If we're in system memory, just map and memcpy. */ + if (intel_src->sys_buffer || intel_dst->sys_buffer) { + /* The same buffer may be used, but note that regions copied may + * not overlap. + */ + if (src == dst) { + char *ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER, + GL_READ_WRITE, dst); + memcpy(ptr + write_offset, ptr + read_offset, size); + intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst); + } else { + const char *src_ptr; + char *dst_ptr; + + src_ptr = intel_bufferobj_map(ctx, GL_COPY_READ_BUFFER, + GL_READ_ONLY, src); + dst_ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER, + GL_WRITE_ONLY, dst); + + memcpy(dst_ptr + write_offset, src_ptr + read_offset, size); + + intel_bufferobj_unmap(ctx, GL_COPY_READ_BUFFER, src); + intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst); + } + } + + /* Otherwise, we have real BOs, so blit them. */ + + dst_bo = intel_bufferobj_buffer(intel, intel_dst, INTEL_WRITE_PART); + src_bo = intel_bufferobj_buffer(intel, intel_src, INTEL_READ); + + intel_emit_linear_blit(intel, + dst_bo, write_offset, + src_bo, read_offset, size); + + /* Since we've emitted some blits to buffers that will (likely) be used + * in rendering operations in other cache domains in this batch, emit a + * flush. Once again, we wish for a domain tracker in libdrm to cover + * usage inside of a batchbuffer. + */ + intel_batchbuffer_emit_mi_flush(intel->batch); +} + void -intel_bufferobj_init(struct intel_context *intel) +intelInitBufferObjectFuncs(struct dd_function_table *functions) { - GLcontext *ctx = &intel->ctx; - - ctx->Driver.NewBufferObject = intel_bufferobj_alloc; - ctx->Driver.DeleteBuffer = intel_bufferobj_free; - ctx->Driver.BufferData = intel_bufferobj_data; - ctx->Driver.BufferSubData = intel_bufferobj_subdata; - ctx->Driver.GetBufferSubData = intel_bufferobj_get_subdata; - ctx->Driver.MapBuffer = intel_bufferobj_map; - ctx->Driver.UnmapBuffer = intel_bufferobj_unmap; + functions->NewBufferObject = intel_bufferobj_alloc; + functions->DeleteBuffer = intel_bufferobj_free; + functions->BufferData = intel_bufferobj_data; + functions->BufferSubData = intel_bufferobj_subdata; + functions->GetBufferSubData = intel_bufferobj_get_subdata; + functions->MapBuffer = intel_bufferobj_map; + functions->MapBufferRange = intel_bufferobj_map_range; + functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range; + functions->UnmapBuffer = intel_bufferobj_unmap; + functions->CopyBufferSubData = intel_bufferobj_copy_subdata; } diff --git a/shared/intel_buffer_objects.h b/shared/intel_buffer_objects.h index 0431015..bf3e08a 100644 --- a/shared/intel_buffer_objects.h +++ b/shared/intel_buffer_objects.h @@ -48,6 +48,12 @@ struct intel_buffer_object struct intel_region *region; /* Is there a zero-copy texture associated with this (pixel) buffer object? */ + + drm_intel_bo *range_map_bo; + void *range_map_buffer; + unsigned int range_map_offset; + GLsizei range_map_size; + GLboolean mapped_gtt; }; @@ -60,7 +66,7 @@ dri_bo *intel_bufferobj_buffer(struct intel_context *intel, /* Hook the bufferobject implementation into mesa: */ -void intel_bufferobj_init(struct intel_context *intel); +void intelInitBufferObjectFuncs(struct dd_function_table *functions); @@ -72,10 +78,7 @@ void intel_bufferobj_init(struct intel_context *intel); static INLINE struct intel_buffer_object * intel_buffer_object(struct gl_buffer_object *obj) { - if (obj->Name) - return (struct intel_buffer_object *) obj; - else - return NULL; + return (struct intel_buffer_object *) obj; } /* Helpers for zerocopy image uploads. See also intel_regions.h: diff --git a/shared/intel_buffers.c b/shared/intel_buffers.c index d2fad9e..e7357e7 100644 --- a/shared/intel_buffers.c +++ b/shared/intel_buffers.c @@ -157,7 +157,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) /* Do this here, not core Mesa, since this function is called from * many places within the driver. */ - if (ctx->NewState & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { + if (ctx->NewState & _NEW_BUFFERS) { /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */ _mesa_update_framebuffer(ctx); /* this updates the DrawBuffer's Width/Height if it's a FBO */ @@ -345,6 +345,23 @@ intelDrawBuffer(GLcontext * ctx, GLenum mode) static void intelReadBuffer(GLcontext * ctx, GLenum mode) { + if ((ctx->DrawBuffer != NULL) && (ctx->DrawBuffer->Name == 0)) { + struct intel_context *const intel = intel_context(ctx); + const GLboolean was_front_buffer_reading = + intel->is_front_buffer_reading; + + intel->is_front_buffer_reading = (mode == GL_FRONT_LEFT) + || (mode == GL_FRONT); + + /* If we weren't front-buffer reading before but we are now, make sure + * that the front-buffer has actually been allocated. + */ + if (!was_front_buffer_reading && intel->is_front_buffer_reading) { + intel_update_renderbuffers(intel->driContext, + intel->driContext->driDrawablePriv); + } + } + if (ctx->ReadBuffer == ctx->DrawBuffer) { /* This will update FBO completeness status. * A framebuffer will be incomplete if the GL_READ_BUFFER setting diff --git a/shared/intel_chipset.h b/shared/intel_chipset.h index 4593d90..3dc8653 100644 --- a/shared/intel_chipset.h +++ b/shared/intel_chipset.h @@ -66,6 +66,10 @@ #define PCI_CHIP_Q45_G 0x2E12 #define PCI_CHIP_G45_G 0x2E22 #define PCI_CHIP_G41_G 0x2E32 +#define PCI_CHIP_B43_G 0x2E42 + +#define PCI_CHIP_ILD_G 0x0042 +#define PCI_CHIP_ILM_G 0x0046 #define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \ devid == PCI_CHIP_I915_GM || \ @@ -73,15 +77,22 @@ devid == PCI_CHIP_I945_GME || \ devid == PCI_CHIP_I965_GM || \ devid == PCI_CHIP_I965_GME || \ - devid == PCI_CHIP_GM45_GM || IS_IGD(devid)) + devid == PCI_CHIP_GM45_GM || \ + IS_IGD(devid) || \ + devid == PCI_CHIP_ILM_G) #define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ devid == PCI_CHIP_Q45_G || \ devid == PCI_CHIP_G45_G || \ - devid == PCI_CHIP_G41_G) + devid == PCI_CHIP_G41_G || \ + devid == PCI_CHIP_B43_G) #define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) #define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) +#define IS_ILD(devid) (devid == PCI_CHIP_ILD_G) +#define IS_ILM(devid) (devid == PCI_CHIP_ILM_G) +#define IS_IGDNG(devid) (IS_ILD(devid) || IS_ILM(devid)) + #define IS_915(devid) (devid == PCI_CHIP_I915_G || \ devid == PCI_CHIP_E7221_G || \ devid == PCI_CHIP_I915_GM) @@ -99,7 +110,8 @@ devid == PCI_CHIP_I965_GM || \ devid == PCI_CHIP_I965_GME || \ devid == PCI_CHIP_I946_GZ || \ - IS_G4X(devid)) + IS_G4X(devid) || \ + IS_IGDNG(devid)) #define IS_9XX(devid) (IS_915(devid) || \ IS_945(devid) || \ diff --git a/shared/intel_clear.c b/shared/intel_clear.c index 19f4763..1b0e221 100644 --- a/shared/intel_clear.c +++ b/shared/intel_clear.c @@ -27,25 +27,9 @@ **************************************************************************/ #include "main/glheader.h" -#include "main/enums.h" -#include "main/image.h" #include "main/mtypes.h" -#include "main/arrayobj.h" -#include "main/attrib.h" -#include "main/blend.h" -#include "main/bufferobj.h" -#include "main/buffers.h" -#include "main/depth.h" -#include "main/enable.h" -#include "main/macros.h" -#include "main/matrix.h" -#include "main/polygon.h" -#include "main/texstate.h" -#include "main/shaders.h" -#include "main/stencil.h" -#include "main/varray.h" -#include "glapi/dispatch.h" #include "swrast/swrast.h" +#include "drivers/common/meta.h" #include "intel_context.h" #include "intel_blit.h" @@ -53,240 +37,11 @@ #include "intel_clear.h" #include "intel_fbo.h" #include "intel_pixel.h" +#include "intel_regions.h" +#include "intel_batchbuffer.h" #define FILE_DEBUG_FLAG DEBUG_BLIT -#define TRI_CLEAR_COLOR_BITS (BUFFER_BIT_BACK_LEFT | \ - BUFFER_BIT_FRONT_LEFT | \ - BUFFER_BIT_COLOR0 | \ - BUFFER_BIT_COLOR1 | \ - BUFFER_BIT_COLOR2 | \ - BUFFER_BIT_COLOR3 | \ - BUFFER_BIT_COLOR4 | \ - BUFFER_BIT_COLOR5 | \ - BUFFER_BIT_COLOR6 | \ - BUFFER_BIT_COLOR7) - - -/** - * Per-context one-time init of things for intl_clear_tris(). - * Basically set up a private array object for vertex/color arrays. - */ -static void -init_clear(GLcontext *ctx) -{ - struct intel_context *intel = intel_context(ctx); - struct gl_array_object *arraySave = NULL; - const GLuint arrayBuffer = ctx->Array.ArrayBufferObj->Name; - const GLuint elementBuffer = ctx->Array.ElementArrayBufferObj->Name; - - /* create new array object */ - intel->clear.arrayObj = _mesa_new_array_object(ctx, ~0); - - /* save current array object, bind new one */ - _mesa_reference_array_object(ctx, &arraySave, ctx->Array.ArrayObj); - _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, intel->clear.arrayObj); - - /* one-time setup of vertex arrays (pos, color) */ - _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0); - _mesa_BindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, 0); - _mesa_ColorPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), intel->clear.color); - _mesa_VertexPointer(3, GL_FLOAT, 3 * sizeof(GLfloat), intel->clear.vertices); - _mesa_Enable(GL_COLOR_ARRAY); - _mesa_Enable(GL_VERTEX_ARRAY); - - /* restore original array object */ - _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, arraySave); - _mesa_reference_array_object(ctx, &arraySave, NULL); - - /* restore original buffer objects */ - _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, arrayBuffer); - _mesa_BindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, elementBuffer); -} - - - -/** - * Perform glClear where mask contains only color, depth, and/or stencil. - * - * The implementation is based on calling into Mesa to set GL state and - * performing normal triangle rendering. The intent of this path is to - * have as generic a path as possible, so that any driver could make use of - * it. - */ -void -intel_clear_tris(GLcontext *ctx, GLbitfield mask) -{ - struct intel_context *intel = intel_context(ctx); - GLfloat dst_z; - struct gl_framebuffer *fb = ctx->DrawBuffer; - int i; - GLboolean saved_fp_enable = GL_FALSE, saved_vp_enable = GL_FALSE; - GLuint saved_shader_program = 0; - unsigned int saved_active_texture; - struct gl_array_object *arraySave = NULL; - - if (!intel->clear.arrayObj) - init_clear(ctx); - - assert((mask & ~(TRI_CLEAR_COLOR_BITS | BUFFER_BIT_DEPTH | - BUFFER_BIT_STENCIL)) == 0); - - _mesa_PushAttrib(GL_COLOR_BUFFER_BIT | - GL_CURRENT_BIT | - GL_DEPTH_BUFFER_BIT | - GL_ENABLE_BIT | - GL_POLYGON_BIT | - GL_STENCIL_BUFFER_BIT | - GL_TRANSFORM_BIT | - GL_CURRENT_BIT); - saved_active_texture = ctx->Texture.CurrentUnit; - - /* Disable existing GL state we don't want to apply to a clear. */ - _mesa_Disable(GL_ALPHA_TEST); - _mesa_Disable(GL_BLEND); - _mesa_Disable(GL_CULL_FACE); - _mesa_Disable(GL_FOG); - _mesa_Disable(GL_POLYGON_SMOOTH); - _mesa_Disable(GL_POLYGON_STIPPLE); - _mesa_Disable(GL_POLYGON_OFFSET_FILL); - _mesa_Disable(GL_LIGHTING); - _mesa_Disable(GL_CLIP_PLANE0); - _mesa_Disable(GL_CLIP_PLANE1); - _mesa_Disable(GL_CLIP_PLANE2); - _mesa_Disable(GL_CLIP_PLANE3); - _mesa_Disable(GL_CLIP_PLANE4); - _mesa_Disable(GL_CLIP_PLANE5); - _mesa_PolygonMode(GL_FRONT_AND_BACK, GL_FILL); - if (ctx->Extensions.ARB_fragment_program && ctx->FragmentProgram.Enabled) { - saved_fp_enable = GL_TRUE; - _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB); - } - if (ctx->Extensions.ARB_vertex_program && ctx->VertexProgram.Enabled) { - saved_vp_enable = GL_TRUE; - _mesa_Disable(GL_VERTEX_PROGRAM_ARB); - } - if (ctx->Extensions.ARB_shader_objects && ctx->Shader.CurrentProgram) { - saved_shader_program = ctx->Shader.CurrentProgram->Name; - _mesa_UseProgramObjectARB(0); - } - - if (ctx->Texture._EnabledUnits != 0) { - int i; - - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - _mesa_ActiveTextureARB(GL_TEXTURE0 + i); - _mesa_Disable(GL_TEXTURE_1D); - _mesa_Disable(GL_TEXTURE_2D); - _mesa_Disable(GL_TEXTURE_3D); - if (ctx->Extensions.ARB_texture_cube_map) - _mesa_Disable(GL_TEXTURE_CUBE_MAP_ARB); - if (ctx->Extensions.NV_texture_rectangle) - _mesa_Disable(GL_TEXTURE_RECTANGLE_NV); - if (ctx->Extensions.MESA_texture_array) { - _mesa_Disable(GL_TEXTURE_1D_ARRAY_EXT); - _mesa_Disable(GL_TEXTURE_2D_ARRAY_EXT); - } - } - } - - /* save current array object, bind our private one */ - _mesa_reference_array_object(ctx, &arraySave, ctx->Array.ArrayObj); - _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, intel->clear.arrayObj); - - intel_meta_set_passthrough_transform(intel); - - for (i = 0; i < 4; i++) { - COPY_4FV(intel->clear.color[i], ctx->Color.ClearColor); - } - - /* convert clear Z from [0,1] to NDC coord in [-1,1] */ - dst_z = -1.0 + 2.0 * ctx->Depth.Clear; - - /* Prepare the vertices, which are the same regardless of which buffer we're - * drawing to. - */ - intel->clear.vertices[0][0] = fb->_Xmin; - intel->clear.vertices[0][1] = fb->_Ymin; - intel->clear.vertices[0][2] = dst_z; - intel->clear.vertices[1][0] = fb->_Xmax; - intel->clear.vertices[1][1] = fb->_Ymin; - intel->clear.vertices[1][2] = dst_z; - intel->clear.vertices[2][0] = fb->_Xmax; - intel->clear.vertices[2][1] = fb->_Ymax; - intel->clear.vertices[2][2] = dst_z; - intel->clear.vertices[3][0] = fb->_Xmin; - intel->clear.vertices[3][1] = fb->_Ymax; - intel->clear.vertices[3][2] = dst_z; - - while (mask != 0) { - GLuint this_mask = 0; - GLuint color_bit; - - color_bit = _mesa_ffs(mask & TRI_CLEAR_COLOR_BITS); - if (color_bit != 0) - this_mask |= (1 << (color_bit - 1)); - - /* Clear depth/stencil in the same pass as color. */ - this_mask |= (mask & (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL)); - - /* Select the current color buffer and use the color write mask if - * we have one, otherwise don't write any color channels. - */ - if (this_mask & BUFFER_BIT_FRONT_LEFT) - _mesa_DrawBuffer(GL_FRONT_LEFT); - else if (this_mask & BUFFER_BIT_BACK_LEFT) - _mesa_DrawBuffer(GL_BACK_LEFT); - else if (color_bit != 0) - _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0 + - (color_bit - BUFFER_COLOR0 - 1)); - else - _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); - - /* Control writing of the depth clear value to depth. */ - if (this_mask & BUFFER_BIT_DEPTH) { - _mesa_DepthFunc(GL_ALWAYS); - _mesa_Enable(GL_DEPTH_TEST); - } else { - _mesa_Disable(GL_DEPTH_TEST); - _mesa_DepthMask(GL_FALSE); - } - - /* Control writing of the stencil clear value to stencil. */ - if (this_mask & BUFFER_BIT_STENCIL) { - _mesa_Enable(GL_STENCIL_TEST); - _mesa_StencilOpSeparate(GL_FRONT_AND_BACK, - GL_REPLACE, GL_REPLACE, GL_REPLACE); - _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS, - ctx->Stencil.Clear, - ctx->Stencil.WriteMask[0]); - } else { - _mesa_Disable(GL_STENCIL_TEST); - } - - _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); - - mask &= ~this_mask; - } - - intel_meta_restore_transform(intel); - - _mesa_ActiveTextureARB(GL_TEXTURE0 + saved_active_texture); - if (saved_fp_enable) - _mesa_Enable(GL_FRAGMENT_PROGRAM_ARB); - if (saved_vp_enable) - _mesa_Enable(GL_VERTEX_PROGRAM_ARB); - - if (saved_shader_program) - _mesa_UseProgramObjectARB(saved_shader_program); - - _mesa_PopAttrib(); - - /* restore current array object */ - _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, arraySave); - _mesa_reference_array_object(ctx, &arraySave, NULL); -} - static const char *buffer_names[] = { [BUFFER_FRONT_LEFT] = "front", [BUFFER_BACK_LEFT] = "back", @@ -340,7 +95,7 @@ intelClear(GLcontext *ctx, GLbitfield mask) = intel_get_rb_region(fb, BUFFER_STENCIL); if (stencilRegion) { /* have hw stencil */ - if (IS_965(intel->intelScreen->deviceID) || + if (stencilRegion->tiling == I915_TILING_Y || (ctx->Stencil.WriteMask[0] & 0xff) != 0xff) { /* We have to use the 3D engine if we're clearing a partial mask * of the stencil buffer, or if we're on a 965 which has a tiled @@ -357,9 +112,10 @@ intelClear(GLcontext *ctx, GLbitfield mask) /* HW depth */ if (mask & BUFFER_BIT_DEPTH) { + const struct intel_region *irb = intel_get_rb_region(fb, BUFFER_DEPTH); + /* clear depth with whatever method is used for stencil (see above) */ - if (IS_965(intel->intelScreen->deviceID) || - tri_mask & BUFFER_BIT_STENCIL) + if (irb->tiling == I915_TILING_Y || tri_mask & BUFFER_BIT_STENCIL) tri_mask |= BUFFER_BIT_DEPTH; else blit_mask |= BUFFER_BIT_DEPTH; @@ -369,7 +125,7 @@ intelClear(GLcontext *ctx, GLbitfield mask) * buffer with it. */ if (mask & (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL)) { - int color_bit = _mesa_ffs(mask & TRI_CLEAR_COLOR_BITS); + int color_bit = _mesa_ffs(mask & BUFFER_BITS_COLOR); if (color_bit != 0) { tri_mask |= blit_mask & (1 << (color_bit - 1)); blit_mask &= ~(1 << (color_bit - 1)); @@ -379,14 +135,18 @@ intelClear(GLcontext *ctx, GLbitfield mask) /* SW fallback clearing */ swrast_mask = mask & ~tri_mask & ~blit_mask; - for (i = 0; i < BUFFER_COUNT; i++) { - GLuint bufBit = 1 << i; - if ((blit_mask | tri_mask) & bufBit) { + { + /* look for non-Intel renderbuffers (clear them with swrast) */ + GLbitfield blit_or_tri = blit_mask | tri_mask; + while (blit_or_tri) { + GLuint i = _mesa_ffs(blit_or_tri) - 1; + GLbitfield bufBit = 1 << i; if (!fb->Attachment[i].Renderbuffer->ClassID) { blit_mask &= ~bufBit; tri_mask &= ~bufBit; swrast_mask |= bufBit; } + blit_or_tri ^= bufBit; } } @@ -411,7 +171,8 @@ intelClear(GLcontext *ctx, GLbitfield mask) } DBG("\n"); } - intel_clear_tris(ctx, tri_mask); + + _mesa_meta_clear(&intel->ctx, tri_mask); } if (swrast_mask) { diff --git a/shared/intel_context.c b/shared/intel_context.c index cfd983d..fce42e9 100644 --- a/shared/intel_context.c +++ b/shared/intel_context.c @@ -38,6 +38,7 @@ #include "swrast_setup/swrast_setup.h" #include "tnl/tnl.h" #include "drivers/common/driverfuncs.h" +#include "drivers/common/meta.h" #include "i830_dri.h" @@ -161,6 +162,15 @@ intelGetString(GLcontext * ctx, GLenum name) case PCI_CHIP_G41_G: chipset = "Intel(R) G41"; break; + case PCI_CHIP_B43_G: + chipset = "Intel(R) B43"; + break; + case PCI_CHIP_ILD_G: + chipset = "Intel(R) IGDNG_D"; + break; + case PCI_CHIP_ILM_G: + chipset = "Intel(R) IGDNG_M"; + break; default: chipset = "Unknown Intel Chipset"; break; @@ -220,7 +230,9 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) struct intel_renderbuffer *stencil_rb; i = 0; - if ((intel->is_front_buffer_rendering || !intel_fb->color_rb[1]) + if ((intel->is_front_buffer_rendering || + intel->is_front_buffer_reading || + !intel_fb->color_rb[1]) && intel_fb->color_rb[0]) { attachments[i++] = __DRI_BUFFER_FRONT_LEFT; attachments[i++] = intel_bits_per_pixel(intel_fb->color_rb[0]); @@ -396,7 +408,7 @@ intel_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h) if (!driContext->driScreenPriv->dri2.enabled) return; - if (!intel->internal_viewport_call && ctx->DrawBuffer->Name == 0) { + if (!intel->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) { /* If we're rendering to the fake front buffer, make sure all the pending * drawing has landed on the real front buffer. Otherwise when we * eventually get to DRI2GetBuffersWithFormat the stale real front @@ -495,7 +507,8 @@ intel_flush(GLcontext *ctx, GLboolean needs_mi_flush) if (screen->dri2.loader && (screen->dri2.loader->base.version >= 2) - && (screen->dri2.loader->flushFrontBuffer != NULL)) { + && (screen->dri2.loader->flushFrontBuffer != NULL) && + intel->driDrawable && intel->driDrawable->loaderPrivate) { (*screen->dri2.loader->flushFrontBuffer)(intel->driDrawable, intel->driDrawable->loaderPrivate); @@ -505,7 +518,7 @@ intel_flush(GLcontext *ctx, GLboolean needs_mi_flush) * each of N places that do rendering. This has worse performances, * but it is much easier to get correct. */ - if (intel->is_front_buffer_rendering) { + if (!intel->is_front_buffer_rendering) { intel->front_buffer_dirty = GL_FALSE; } } @@ -521,7 +534,27 @@ intelFlush(GLcontext * ctx) static void intel_glFlush(GLcontext *ctx) { + struct intel_context *intel = intel_context(ctx); + intel_flush(ctx, GL_TRUE); + + /* We're using glFlush as an indicator that a frame is done, which is + * what DRI2 does before calling SwapBuffers (and means we should catch + * people doing front-buffer rendering, as well).. + * + * Wait for the swapbuffers before the one we just emitted, so we don't + * get too many swaps outstanding for apps that are GPU-heavy but not + * CPU-heavy. + * + * Unfortunately, we don't have a handle to the batch containing the swap, + * and getting our hands on that doesn't seem worth it, so we just us the + * first batch we emitted after the last swap. + */ + if (intel->first_post_swapbuffers_batch != NULL) { + drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch); + drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); + intel->first_post_swapbuffers_batch = NULL; + } } void @@ -561,10 +594,15 @@ intelInitDriverFunctions(struct dd_function_table *functions) functions->CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D; intelInitTextureFuncs(functions); + intelInitTextureImageFuncs(functions); + intelInitTextureSubImageFuncs(functions); + intelInitTextureCopyImageFuncs(functions); intelInitStateFuncs(functions); intelInitClearFuncs(functions); intelInitBufferFuncs(functions); intelInitPixelFuncs(functions); + intelInitBufferObjectFuncs(functions); + intel_init_syncobj_functions(functions); } @@ -607,6 +645,10 @@ intelInitContext(struct intel_context *intel, intel->maxBatchSize = BATCH_SZ; intel->bufmgr = intelScreen->bufmgr; + + if (0) /* for debug */ + drm_intel_bufmgr_set_debug(intel->bufmgr, 1); + intel->ttm = intelScreen->ttm; if (intel->ttm) { int bo_reuse_mode; @@ -660,7 +702,15 @@ intelInitContext(struct intel_context *intel, */ _mesa_init_point(ctx); + meta_init_metaops(ctx, &intel->meta); ctx->Const.MaxColorAttachments = 4; /* XXX FBO: review this */ + if (IS_965(intelScreen->deviceID)) { + if (MAX_WIDTH > 8192) + ctx->Const.MaxRenderbufferSize = 8192; + } else { + if (MAX_WIDTH > 2048) + ctx->Const.MaxRenderbufferSize = 2048; + } /* Initialize the software rasterizer and helper modules. */ _swrast_CreateContext(ctx); @@ -672,6 +722,8 @@ intelInitContext(struct intel_context *intel, _swrast_allow_pixel_fog(ctx, GL_FALSE); _swrast_allow_vertex_fog(ctx, GL_TRUE); + _mesa_meta_init(ctx); + intel->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24; intel->hw_stipple = 1; @@ -718,7 +770,6 @@ intelInitContext(struct intel_context *intel, intel->batch = intel_batchbuffer_alloc(intel); - intel_bufferobj_init(intel); intel_fbo_init(intel); if (intel->ctx.Mesa_DXTn) { @@ -728,6 +779,15 @@ intelInitContext(struct intel_context *intel, else if (driQueryOptionb(&intel->optionCache, "force_s3tc_enable")) { _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); } + intel->use_texture_tiling = driQueryOptionb(&intel->optionCache, + "texture_tiling"); + if (intel->use_texture_tiling && + !intel->intelScreen->kernel_exec_fencing) { + fprintf(stderr, "No kernel support for execution fencing, " + "disabling texture tiling\n"); + intel->use_texture_tiling = GL_FALSE; + } + intel->use_early_z = driQueryOptionb(&intel->optionCache, "early_z"); intel->prim.primitive = ~0; @@ -767,8 +827,9 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) INTEL_FIREVERTICES(intel); - if (intel->clear.arrayObj) - _mesa_delete_array_object(&intel->ctx, intel->clear.arrayObj); + _mesa_meta_free(&intel->ctx); + + meta_destroy_metaops(&intel->meta); intel->vtbl.destroy(intel); @@ -787,15 +848,68 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) intel->prim.vb = NULL; dri_bo_unreference(intel->prim.vb_bo); intel->prim.vb_bo = NULL; + dri_bo_unreference(intel->first_post_swapbuffers_batch); + intel->first_post_swapbuffers_batch = NULL; if (release_texture_heaps) { - /* This share group is about to go away, free our private - * texture object data. + /* Nothing is currently done here to free texture heaps; + * but we're not using the texture heap utilities, so I + * rather think we shouldn't. I've taken a look, and can't + * find any private texture data hanging around anywhere, but + * I'm not yet certain there isn't any at all... */ - if (INTEL_DEBUG & DEBUG_TEXTURE) + /* if (INTEL_DEBUG & DEBUG_TEXTURE) fprintf(stderr, "do something to free texture heaps\n"); + */ } + /* XXX In intelMakeCurrent() below, the context's static regions are + * referenced inside the frame buffer; it's listed as a hack, + * with a comment of "XXX FBO temporary fix-ups!", but + * as long as it's there, we should release the regions here. + * The do/while loop around the block is used to allow the + * "continue" statements inside the block to exit the block, + * to avoid many layers of "if" constructs. + */ + do { + __DRIdrawablePrivate * driDrawPriv = intel->driDrawable; + struct intel_framebuffer *intel_fb; + struct intel_renderbuffer *irbDepth, *irbStencil; + if (!driDrawPriv) { + /* We're already detached from the drawable; exit this block. */ + continue; + } + intel_fb = (struct intel_framebuffer *) driDrawPriv->driverPrivate; + if (!intel_fb) { + /* The frame buffer is already gone; exit this block. */ + continue; + } + irbDepth = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); + irbStencil = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL); + + /* If the regions of the frame buffer still match the regions + * of the context, release them. If they've changed somehow, + * leave them alone. + */ + if (intel_fb->color_rb[0] && intel_fb->color_rb[0]->region == intel->front_region) { + intel_renderbuffer_set_region(intel_fb->color_rb[0], NULL); + } + if (intel_fb->color_rb[1] && intel_fb->color_rb[1]->region == intel->back_region) { + intel_renderbuffer_set_region(intel_fb->color_rb[1], NULL); + } + + if (irbDepth && irbDepth->region == intel->depth_region) { + intel_renderbuffer_set_region(irbDepth, NULL); + } + /* Usually, the stencil buffer is the same as the depth buffer; + * but they're handled separately in MakeCurrent, so we'll + * handle them separately here. + */ + if (irbStencil && irbStencil->region == intel->depth_region) { + intel_renderbuffer_set_region(irbStencil, NULL); + } + } while (0); + intel_region_release(&intel->front_region); intel_region_release(&intel->back_region); intel_region_release(&intel->depth_region); @@ -804,12 +918,23 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) /* free the Mesa context */ _mesa_free_context_data(&intel->ctx); + + FREE(intel); + driContextPriv->driverPrivate = NULL; } } GLboolean intelUnbindContext(__DRIcontextPrivate * driContextPriv) { + struct intel_context *intel = + (struct intel_context *) driContextPriv->driverPrivate; + + /* Deassociate the context with the drawables. + */ + intel->driDrawable = NULL; + intel->driReadDrawable = NULL; + return GL_TRUE; } @@ -832,7 +957,10 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv, if (driDrawPriv != driReadPriv) intel_update_renderbuffers(driContextPriv, driReadPriv); } else { - /* XXX FBO temporary fix-ups! */ + /* XXX FBO temporary fix-ups! These are released in + * intelDextroyContext(), above. Changes here should be + * reflected there. + */ /* if the renderbuffers don't have regions, init them from the context */ struct intel_renderbuffer *irbDepth = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); @@ -917,7 +1045,6 @@ intelContendedLock(struct intel_context *intel, GLuint flags) int me = intel->hHWContext; drmGetLock(intel->driFd, intel->hHWContext, flags); - intel->locked = 1; if (INTEL_DEBUG & DEBUG_LOCK) _mesa_printf("%s - got contended lock\n", __progname); @@ -974,9 +1101,12 @@ void LOCK_HARDWARE( struct intel_context *intel ) struct intel_framebuffer *intel_fb = NULL; struct intel_renderbuffer *intel_rb = NULL; - _glthread_LOCK_MUTEX(lockMutex); - assert(!intel->locked); - intel->locked = 1; + intel->locked++; + if (intel->locked >= 2) + return; + + if (!sPriv->dri2.enabled) + _glthread_LOCK_MUTEX(lockMutex); if (intel->driDrawable) { intel_fb = intel->driDrawable->driverPrivate; @@ -1023,13 +1153,16 @@ void UNLOCK_HARDWARE( struct intel_context *intel ) { __DRIscreen *sPriv = intel->driScreen; - intel->vtbl.note_unlock( intel ); - intel->locked = 0; + intel->locked--; + if (intel->locked > 0) + return; - if (!sPriv->dri2.enabled) - DRM_UNLOCK(intel->driFd, intel->driHwLock, intel->hHWContext); + assert(intel->locked == 0); - _glthread_UNLOCK_MUTEX(lockMutex); + if (!sPriv->dri2.enabled) { + DRM_UNLOCK(intel->driFd, intel->driHwLock, intel->hHWContext); + _glthread_UNLOCK_MUTEX(lockMutex); + } if (INTEL_DEBUG & DEBUG_LOCK) _mesa_printf("%s - unlocked\n", __progname); diff --git a/shared/intel_context.h b/shared/intel_context.h index b5cf7e6..03e7cf3 100644 --- a/shared/intel_context.h +++ b/shared/intel_context.h @@ -33,6 +33,7 @@ #include "main/mtypes.h" #include "main/mm.h" #include "texmem.h" +#include "dri_metaops.h" #include "drm.h" #include "intel_bufmgr.h" @@ -79,9 +80,19 @@ extern void intelFallback(struct intel_context *intel, GLuint bit, #define INTEL_MAX_FIXUP 64 +struct intel_sync_object { + struct gl_sync_object Base; + + /** Batch associated with this sync object */ + drm_intel_bo *bo; +}; + +/** + * intel_context is derived from Mesa's context class: GLcontext. + */ struct intel_context { - GLcontext ctx; /* the parent class */ + GLcontext ctx; /**< base class, must be first field */ struct { @@ -91,7 +102,6 @@ struct intel_context void (*new_batch) (struct intel_context * intel); void (*emit_invarient_state) (struct intel_context * intel); void (*note_fence) (struct intel_context *intel, GLuint fence); - void (*note_unlock) (struct intel_context *intel); void (*update_texture_state) (struct intel_context * intel); void (*render_start) (struct intel_context * intel); @@ -158,19 +168,7 @@ struct intel_context void (*debug_batch)(struct intel_context *intel); } vtbl; - struct { - struct gl_fragment_program *bitmap_fp; - struct gl_vertex_program *passthrough_vp; - - struct gl_fragment_program *saved_fp; - GLboolean saved_fp_enable; - struct gl_vertex_program *saved_vp; - GLboolean saved_vp_enable; - - GLint saved_vp_x, saved_vp_y; - GLsizei saved_vp_width, saved_vp_height; - GLenum saved_matrix_mode; - } meta; + struct dri_metaops meta; GLint refcount; GLuint Fallback; @@ -182,7 +180,6 @@ struct intel_context struct intel_region *front_region; struct intel_region *back_region; struct intel_region *depth_region; - GLboolean internal_viewport_call; /** * This value indicates that the kernel memory manager is being used @@ -191,6 +188,7 @@ struct intel_context GLboolean ttm; struct intel_batchbuffer *batch; + drm_intel_bo *first_post_swapbuffers_batch; GLboolean no_batch_wrap; unsigned batch_id; @@ -215,13 +213,6 @@ struct intel_context GLuint ClearColor565; GLuint ClearColor8888; - /* info for intel_clear_tris() */ - struct - { - struct gl_array_object *arrayObj; - GLfloat vertices[4][3]; - GLfloat color[4][4]; - } clear; /* Offsets of fields within the current vertex: */ @@ -294,6 +285,17 @@ struct intel_context * easily. */ GLboolean is_front_buffer_rendering; + /** + * Track whether front-buffer is the current read target. + * + * This is closely associated with is_front_buffer_rendering, but may + * be set separately. The DRI2 fake front buffer must be referenced + * either way. + */ + GLboolean is_front_buffer_reading; + + GLboolean use_texture_tiling; + GLboolean use_early_z; drm_clip_rect_t fboRect; /**< cliprect for FBO rendering */ @@ -316,7 +318,7 @@ struct intel_context __DRIdrawablePrivate *driReadDrawable; __DRIscreenPrivate *driScreen; intelScreenPrivate *intelScreen; - volatile struct drm_i915_sarea *sarea; + volatile drm_i915_sarea_t *sarea; GLuint lastStamp; @@ -474,6 +476,8 @@ extern void intelFlush(GLcontext * ctx); extern void intelInitDriverFunctions(struct dd_function_table *functions); +void intel_init_syncobj_functions(struct dd_function_table *functions); + /* ================================================================ * intel_state.c: @@ -549,6 +553,9 @@ void intel_viewport(GLcontext * ctx, GLint x, GLint y, void intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable); +void i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region, + uint32_t buffer_id); + /*====================================================================== * Inline conversion functions. * These are better-typed than the macros used previously: diff --git a/shared/intel_extensions.c b/shared/intel_extensions.c index 9ec1b4e..2e61c55 100644 --- a/shared/intel_extensions.c +++ b/shared/intel_extensions.c @@ -30,10 +30,14 @@ #include "intel_extensions.h" +#define need_GL_ARB_copy_buffer #define need_GL_ARB_framebuffer_object +#define need_GL_ARB_map_buffer_range #define need_GL_ARB_occlusion_query #define need_GL_ARB_point_parameters #define need_GL_ARB_shader_objects +#define need_GL_ARB_sync +#define need_GL_ARB_vertex_array_object #define need_GL_ARB_vertex_program #define need_GL_ARB_vertex_shader #define need_GL_ARB_window_pos @@ -45,9 +49,12 @@ #define need_GL_EXT_fog_coord #define need_GL_EXT_framebuffer_object #define need_GL_EXT_framebuffer_blit +#define need_GL_EXT_gpu_program_parameters #define need_GL_EXT_point_parameters +#define need_GL_EXT_provoking_vertex #define need_GL_EXT_secondary_color #define need_GL_EXT_stencil_two_side +#define need_GL_APPLE_vertex_array_object #define need_GL_ATI_separate_stencil #define need_GL_ATI_envmap_bumpmap #define need_GL_NV_point_sprite @@ -65,8 +72,13 @@ * i965_dri. */ static const struct dri_extension card_extensions[] = { + { "GL_ARB_copy_buffer", GL_ARB_copy_buffer_functions }, + { "GL_ARB_half_float_pixel", NULL }, + { "GL_ARB_map_buffer_range", GL_ARB_map_buffer_range_functions }, { "GL_ARB_multitexture", NULL }, { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, + { "GL_ARB_point_sprite", NULL }, + { "GL_ARB_sync", GL_ARB_sync_functions }, { "GL_ARB_texture_border_clamp", NULL }, { "GL_ARB_texture_cube_map", NULL }, { "GL_ARB_texture_env_add", NULL }, @@ -75,6 +87,7 @@ static const struct dri_extension card_extensions[] = { { "GL_ARB_texture_env_dot3", NULL }, { "GL_ARB_texture_mirrored_repeat", NULL }, { "GL_ARB_texture_rectangle", NULL }, + { "GL_ARB_vertex_array_object", GL_ARB_vertex_array_object_functions}, { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }, { "GL_ARB_window_pos", GL_ARB_window_pos_functions }, { "GL_EXT_blend_color", GL_EXT_blend_color_functions }, @@ -85,7 +98,9 @@ static const struct dri_extension card_extensions[] = { { "GL_EXT_blend_subtract", NULL }, { "GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions }, { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, + { "GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions }, { "GL_EXT_packed_depth_stencil", NULL }, + { "GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions }, { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, { "GL_EXT_stencil_wrap", NULL }, { "GL_EXT_texture_edge_clamp", NULL }, @@ -95,6 +110,7 @@ static const struct dri_extension card_extensions[] = { { "GL_EXT_texture_lod_bias", NULL }, { "GL_3DFX_texture_compression_FXT1", NULL }, { "GL_APPLE_client_storage", NULL }, + { "GL_APPLE_vertex_array_object", GL_APPLE_vertex_array_object_functions}, { "GL_MESA_pack_invert", NULL }, { "GL_MESA_ycbcr_texture", NULL }, { "GL_NV_blend_square", NULL }, @@ -112,8 +128,10 @@ static const struct dri_extension i915_extensions[] = { { "GL_ARB_fragment_program", NULL }, { "GL_ARB_shadow", NULL }, { "GL_ARB_texture_non_power_of_two", NULL }, + { "GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions }, { "GL_ATI_texture_env_combine3", NULL }, { "GL_EXT_shadow_funcs", NULL }, + { "GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions }, { "GL_NV_texture_env_combine4", NULL }, { NULL, NULL } }; @@ -128,6 +146,7 @@ static const struct dri_extension brw_extensions[] = { { "GL_ARB_framebuffer_object", GL_ARB_framebuffer_object_functions}, { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions }, { "GL_ARB_point_sprite", NULL }, + { "GL_ARB_seamless_cube_map", NULL }, { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions }, diff --git a/shared/intel_fbo.c b/shared/intel_fbo.c index 30f58b1..804c034 100644 --- a/shared/intel_fbo.c +++ b/shared/intel_fbo.c @@ -35,6 +35,7 @@ #include "main/context.h" #include "main/texformat.h" #include "main/texrender.h" +#include "drivers/common/meta.h" #include "intel_context.h" #include "intel_buffers.h" @@ -217,7 +218,8 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width, height, pitch); - irb->region = intel_region_alloc(intel, cpp, width, height, pitch, + irb->region = intel_region_alloc(intel, I915_TILING_NONE, + cpp, width, height, pitch, GL_TRUE); if (!irb->region) return GL_FALSE; /* out of memory? */ @@ -575,9 +577,10 @@ intel_render_texture(GLcontext * ctx, ASSERT(newImage); - if (newImage->Border != 0) { - /* Fallback on drawing to a texture with a border, which won't have a - * miptree. + intel_image = intel_texture_image(newImage); + if (!intel_image->mt) { + /* Fallback on drawing to a texture that doesn't have a miptree + * (has a border, width/height 0, etc.) */ _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); _mesa_render_texture(ctx, fb, att); @@ -608,7 +611,6 @@ intel_render_texture(GLcontext * ctx, irb->Base.RefCount); /* point the renderbufer's region to the texture image region */ - intel_image = intel_texture_image(newImage); if (irb->region != intel_image->mt->region) { if (irb->region) intel_region_release(&irb->region); @@ -680,6 +682,11 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) if (rb == NULL) continue; + if (irb == NULL) { + fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; + continue; + } + switch (irb->texformat->MesaFormat) { case MESA_FORMAT_ARGB8888: case MESA_FORMAT_RGB565: @@ -694,74 +701,6 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) /** - * Called from glBlitFramebuffer(). - * For now, we're doing an approximation with glCopyPixels(). - * XXX we need to bypass all the per-fragment operations, except scissor. - */ -static void -intel_blit_framebuffer(GLcontext *ctx, - GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, - GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, - GLbitfield mask, GLenum filter) -{ - const GLfloat xZoomSave = ctx->Pixel.ZoomX; - const GLfloat yZoomSave = ctx->Pixel.ZoomY; - GLsizei width, height; - GLfloat xFlip = 1.0F, yFlip = 1.0F; - - if (srcX1 < srcX0) { - GLint tmp = srcX1; - srcX1 = srcX0; - srcX0 = tmp; - xFlip = -1.0F; - } - - if (srcY1 < srcY0) { - GLint tmp = srcY1; - srcY1 = srcY0; - srcY0 = tmp; - yFlip = -1.0F; - } - - width = srcX1 - srcX0; - height = srcY1 - srcY0; - - ctx->Pixel.ZoomX = xFlip * (dstX1 - dstX0) / (srcX1 - srcY0); - ctx->Pixel.ZoomY = yFlip * (dstY1 - dstY0) / (srcY1 - srcY0); - - if (ctx->Pixel.ZoomX < 0.0F) { - dstX0 = MAX2(dstX0, dstX1); - } - else { - dstX0 = MIN2(dstX0, dstX1); - } - - if (ctx->Pixel.ZoomY < 0.0F) { - dstY0 = MAX2(dstY0, dstY1); - } - else { - dstY0 = MIN2(dstY0, dstY1); - } - - if (mask & GL_COLOR_BUFFER_BIT) { - ctx->Driver.CopyPixels(ctx, srcX0, srcY0, width, height, - dstX0, dstY0, GL_COLOR); - } - if (mask & GL_DEPTH_BUFFER_BIT) { - ctx->Driver.CopyPixels(ctx, srcX0, srcY0, width, height, - dstX0, dstY0, GL_DEPTH); - } - if (mask & GL_STENCIL_BUFFER_BIT) { - ctx->Driver.CopyPixels(ctx, srcX0, srcY0, width, height, - dstX0, dstY0, GL_STENCIL); - } - - ctx->Pixel.ZoomX = xZoomSave; - ctx->Pixel.ZoomY = yZoomSave; -} - - -/** * Do one-time context initializations related to GL_EXT_framebuffer_object. * Hook in device driver functions. */ @@ -776,5 +715,5 @@ intel_fbo_init(struct intel_context *intel) intel->ctx.Driver.FinishRenderTexture = intel_finish_render_texture; intel->ctx.Driver.ResizeBuffers = intel_resize_buffers; intel->ctx.Driver.ValidateFramebuffer = intel_validate_framebuffer; - intel->ctx.Driver.BlitFramebuffer = intel_blit_framebuffer; + intel->ctx.Driver.BlitFramebuffer = _mesa_meta_blit_framebuffer; } diff --git a/shared/intel_generatemipmap.c b/shared/intel_generatemipmap.c new file mode 100644 index 0000000..12059e1 --- /dev/null +++ b/shared/intel_generatemipmap.c @@ -0,0 +1,304 @@ +/* + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "main/glheader.h" +#include "main/enums.h" +#include "main/image.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "main/bufferobj.h" +#include "main/teximage.h" +#include "main/texenv.h" +#include "main/texobj.h" +#include "main/texstate.h" +#include "main/texparam.h" +#include "main/varray.h" +#include "main/attrib.h" +#include "main/enable.h" +#include "main/buffers.h" +#include "main/fbobject.h" +#include "main/framebuffer.h" +#include "main/renderbuffer.h" +#include "main/depth.h" +#include "main/hash.h" +#include "main/mipmap.h" +#include "main/blend.h" +#include "glapi/dispatch.h" +#include "swrast/swrast.h" + +#include "intel_screen.h" +#include "intel_context.h" +#include "intel_batchbuffer.h" +#include "intel_pixel.h" +#include "intel_tex.h" +#include "intel_mipmap_tree.h" + +static const char *intel_fp_tex2d = + "!!ARBfp1.0\n" + "TEX result.color, fragment.texcoord[0], texture[0], 2D;\n" + "END\n"; + +static GLboolean +intel_generate_mipmap_level(GLcontext *ctx, GLuint tex_name, + int level, int width, int height) +{ + struct intel_context *intel = intel_context(ctx); + GLfloat vertices[4][2]; + GLint status; + + /* Set to source from the previous level */ + _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, level - 1); + _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, level - 1); + + /* Set to draw into the current level */ + _mesa_FramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, + GL_COLOR_ATTACHMENT0_EXT, + GL_TEXTURE_2D, + tex_name, + level); + /* Choose to render to the color attachment. */ + _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0_EXT); + + status = _mesa_CheckFramebufferStatusEXT (GL_FRAMEBUFFER_EXT); + if (status != GL_FRAMEBUFFER_COMPLETE_EXT) + return GL_FALSE; + + meta_set_passthrough_transform(&intel->meta); + + /* XXX: Doing it right would involve setting up the transformation to do + * 0-1 mapping or something, and not changing the vertex data. + */ + vertices[0][0] = 0; + vertices[0][1] = 0; + vertices[1][0] = width; + vertices[1][1] = 0; + vertices[2][0] = width; + vertices[2][1] = height; + vertices[3][0] = 0; + vertices[3][1] = height; + + _mesa_VertexPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &vertices); + _mesa_Enable(GL_VERTEX_ARRAY); + meta_set_default_texrect(&intel->meta); + + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + + meta_restore_texcoords(&intel->meta); + meta_restore_transform(&intel->meta); + + return GL_TRUE; +} + +static GLboolean +intel_generate_mipmap_2d(GLcontext *ctx, + GLenum target, + struct gl_texture_object *texObj) +{ + struct intel_context *intel = intel_context(ctx); + GLint old_active_texture; + int level, max_levels, start_level, end_level; + GLuint fb_name; + GLboolean success = GL_FALSE; + struct gl_framebuffer *saved_fbo = NULL; + struct gl_buffer_object *saved_array_buffer = NULL; + struct gl_buffer_object *saved_element_buffer = NULL; + + _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT | + GL_CURRENT_BIT | GL_COLOR_BUFFER_BIT | + GL_DEPTH_BUFFER_BIT); + _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT); + old_active_texture = ctx->Texture.CurrentUnit; + _mesa_reference_framebuffer(&saved_fbo, ctx->DrawBuffer); + + /* use default array/index buffers */ + _mesa_reference_buffer_object(ctx, &saved_array_buffer, + ctx->Array.ArrayBufferObj); + _mesa_reference_buffer_object(ctx, &ctx->Array.ArrayBufferObj, + ctx->Shared->NullBufferObj); + _mesa_reference_buffer_object(ctx, &saved_element_buffer, + ctx->Array.ElementArrayBufferObj); + _mesa_reference_buffer_object(ctx, &ctx->Array.ElementArrayBufferObj, + ctx->Shared->NullBufferObj); + + _mesa_Disable(GL_POLYGON_STIPPLE); + _mesa_Disable(GL_DEPTH_TEST); + _mesa_Disable(GL_STENCIL_TEST); + _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + _mesa_DepthMask(GL_FALSE); + + /* Bind the given texture to GL_TEXTURE_2D with linear filtering for our + * minification. + */ + _mesa_ActiveTextureARB(GL_TEXTURE0_ARB); + _mesa_Enable(GL_TEXTURE_2D); + _mesa_BindTexture(GL_TEXTURE_2D, texObj->Name); + _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, + GL_LINEAR_MIPMAP_NEAREST); + _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + + /* Bind the new renderbuffer to the color attachment point. */ + _mesa_GenFramebuffersEXT(1, &fb_name); + _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb_name); + + meta_set_fragment_program(&intel->meta, &intel->meta.tex2d_fp, + intel_fp_tex2d); + meta_set_passthrough_vertex_program(&intel->meta); + + max_levels = _mesa_max_texture_levels(ctx, texObj->Target); + start_level = texObj->BaseLevel; + end_level = texObj->MaxLevel; + + /* Loop generating level+1 from level. */ + for (level = start_level; level < end_level && level < max_levels - 1; level++) { + const struct gl_texture_image *srcImage; + int width, height; + + srcImage = _mesa_select_tex_image(ctx, texObj, target, level); + if (srcImage->Border != 0) + goto fail; + + width = srcImage->Width / 2; + if (width < 1) + width = 1; + height = srcImage->Height / 2; + if (height < 1) + height = 1; + + if (width == srcImage->Width && + height == srcImage->Height) { + /* Neither _mesa_max_texture_levels nor texObj->MaxLevel are the + * maximum texture level for the object, so break out when we've gone + * over the edge. + */ + break; + } + + /* Make sure that there's space allocated for the target level. + * We could skip this if there's already space allocated and save some + * time. + */ + _mesa_TexImage2D(GL_TEXTURE_2D, level + 1, srcImage->InternalFormat, + width, height, 0, + GL_RGBA, GL_UNSIGNED_INT, NULL); + + if (!intel_generate_mipmap_level(ctx, texObj->Name, level + 1, + width, height)) + goto fail; + } + + success = GL_TRUE; + +fail: + meta_restore_fragment_program(&intel->meta); + meta_restore_vertex_program(&intel->meta); + + /* restore array/index buffers */ + _mesa_reference_buffer_object(ctx, &ctx->Array.ArrayBufferObj, + saved_array_buffer); + _mesa_reference_buffer_object(ctx, &saved_array_buffer, NULL); + _mesa_reference_buffer_object(ctx, &ctx->Array.ElementArrayBufferObj, + saved_element_buffer); + _mesa_reference_buffer_object(ctx, &saved_element_buffer, NULL); + + + _mesa_DeleteFramebuffersEXT(1, &fb_name); + _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture); + if (saved_fbo) + _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, saved_fbo->Name); + _mesa_reference_framebuffer(&saved_fbo, NULL); + _mesa_PopClientAttrib(); + _mesa_PopAttrib(); + + return success; +} + + +/** + * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap + * level). + * + * The texture object's miptree must be mapped. + * + * It would be really nice if this was just called by Mesa whenever mipmaps + * needed to be regenerated, rather than us having to remember to do so in + * each texture image modification path. + * + * This function should also include an accelerated path. + */ +void +intel_generate_mipmap(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_object *intelObj = intel_texture_object(texObj); + GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; + int face, i; + + /* HW path */ + if (target == GL_TEXTURE_2D && + ctx->Extensions.EXT_framebuffer_object && + ctx->Extensions.ARB_fragment_program && + ctx->Extensions.ARB_vertex_program) { + GLboolean success; + + /* We'll be accessing this texture using GL entrypoints, which should + * be resilient against other access to this texture. + */ + _mesa_unlock_texture(ctx, texObj); + success = intel_generate_mipmap_2d(ctx, target, texObj); + _mesa_lock_texture(ctx, texObj); + + if (success) + return; + } + + /* SW path */ + intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel); + _mesa_generate_mipmap(ctx, target, texObj); + intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel); + + /* Update the level information in our private data in the new images, since + * it didn't get set as part of a normal TexImage path. + */ + for (face = 0; face < nr_faces; face++) { + for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) { + struct intel_texture_image *intelImage; + + intelImage = intel_texture_image(texObj->Image[face][i]); + if (intelImage == NULL) + break; + + intelImage->level = i; + intelImage->face = face; + /* Unreference the miptree to signal that the new Data is a bare + * pointer from mesa. + */ + intel_miptree_release(intel, &intelImage->mt); + } + } +} diff --git a/shared/intel_mipmap_tree.c b/shared/intel_mipmap_tree.c index 6e1e034..c985da5 100644 --- a/shared/intel_mipmap_tree.c +++ b/shared/intel_mipmap_tree.c @@ -57,14 +57,16 @@ intel_miptree_create_internal(struct intel_context *intel, GLuint last_level, GLuint width0, GLuint height0, - GLuint depth0, GLuint cpp, GLuint compress_byte) + GLuint depth0, GLuint cpp, GLuint compress_byte, + uint32_t tiling) { GLboolean ok; struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); - DBG("%s target %s format %s level %d..%d\n", __FUNCTION__, + DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), - _mesa_lookup_enum_by_nr(internal_format), first_level, last_level); + _mesa_lookup_enum_by_nr(internal_format), + first_level, last_level, mt); mt->target = target_to_target(target); mt->internal_format = internal_format; @@ -80,15 +82,16 @@ intel_miptree_create_internal(struct intel_context *intel, #ifdef I915 if (IS_945(intel->intelScreen->deviceID)) - ok = i945_miptree_layout(intel, mt); + ok = i945_miptree_layout(intel, mt, tiling); else - ok = i915_miptree_layout(intel, mt); + ok = i915_miptree_layout(intel, mt, tiling); #else - ok = brw_miptree_layout(intel, mt); + ok = brw_miptree_layout(intel, mt, tiling); #endif if (!ok) { free(mt); + DBG("%s not okay - returning NULL\n", __FUNCTION__); return NULL; } @@ -98,6 +101,7 @@ intel_miptree_create_internal(struct intel_context *intel, struct intel_mipmap_tree * intel_miptree_create(struct intel_context *intel, GLenum target, + GLenum base_format, GLenum internal_format, GLuint first_level, GLuint last_level, @@ -107,10 +111,23 @@ intel_miptree_create(struct intel_context *intel, GLboolean expect_accelerated_upload) { struct intel_mipmap_tree *mt; + uint32_t tiling; + + if (intel->use_texture_tiling && compress_byte == 0 && + intel->intelScreen->kernel_exec_fencing) { + if (IS_965(intel->intelScreen->deviceID) && + (base_format == GL_DEPTH_COMPONENT || + base_format == GL_DEPTH_STENCIL_EXT)) + tiling = I915_TILING_Y; + else + tiling = I915_TILING_X; + } else + tiling = I915_TILING_NONE; mt = intel_miptree_create_internal(intel, target, internal_format, first_level, last_level, width0, - height0, depth0, cpp, compress_byte); + height0, depth0, cpp, compress_byte, + tiling); /* * pitch == 0 || height == 0 indicates the null texture */ @@ -118,6 +135,7 @@ intel_miptree_create(struct intel_context *intel, return NULL; mt->region = intel_region_alloc(intel, + tiling, mt->cpp, mt->pitch, mt->total_height, @@ -147,7 +165,8 @@ intel_miptree_create_for_region(struct intel_context *intel, mt = intel_miptree_create_internal(intel, target, internal_format, first_level, last_level, region->width, region->height, 1, - region->cpp, compress_byte); + region->cpp, compress_byte, + I915_TILING_NONE); if (!mt) return mt; #if 0 @@ -185,6 +204,7 @@ intel_miptree_create_for_region(struct intel_context *intel, int intel_miptree_pitch_align (struct intel_context *intel, struct intel_mipmap_tree *mt, + uint32_t tiling, int pitch) { #ifdef I915 @@ -205,6 +225,11 @@ int intel_miptree_pitch_align (struct intel_context *intel, pitch_align = 4; } + if (tiling == I915_TILING_X) + pitch_align = 512; + else if (tiling == I915_TILING_Y) + pitch_align = 128; + pitch = ALIGN(pitch * mt->cpp, pitch_align); #ifdef I915 @@ -328,23 +353,31 @@ intel_miptree_set_level_info(struct intel_mipmap_tree *mt, } - void -intel_miptree_set_image_offset(struct intel_mipmap_tree *mt, - GLuint level, GLuint img, - GLuint x, GLuint y) +intel_miptree_set_image_offset_ex(struct intel_mipmap_tree *mt, + GLuint level, GLuint img, + GLuint x, GLuint y, + GLuint offset) { if (img == 0 && level == 0) assert(x == 0 && y == 0); assert(img < mt->level[level].nr_images); - mt->level[level].image_offset[img] = (x + y * mt->pitch) * mt->cpp; + mt->level[level].image_offset[img] = (x + y * mt->pitch) * mt->cpp + offset; DBG("%s level %d img %d pos %d,%d image_offset %x\n", __FUNCTION__, level, img, x, y, mt->level[level].image_offset[img]); } +void +intel_miptree_set_image_offset(struct intel_mipmap_tree *mt, + GLuint level, GLuint img, + GLuint x, GLuint y) +{ + intel_miptree_set_image_offset_ex(mt, level, img, x, y, 0); +} + /* Although we use the image_offset[] array to store relative offsets * to cube faces, Mesa doesn't know anything about this and expects @@ -454,11 +487,11 @@ intel_miptree_image_data(struct intel_context *intel, 0, 0, /* source x, y */ dst->level[level].width, height); /* width, height */ - src += src_image_pitch * dst->cpp; + src = (char *)src + src_image_pitch * dst->cpp; } } -extern GLuint intel_compressed_alignment(GLenum); +extern void intel_get_texture_alignment_unit(GLenum, GLuint *, GLuint *); /* Copy mipmap image between trees */ void @@ -475,20 +508,37 @@ intel_miptree_image_copy(struct intel_context *intel, const GLuint *dst_depth_offset = intel_miptree_depth_offsets(dst, level); const GLuint *src_depth_offset = intel_miptree_depth_offsets(src, level); GLuint i; + GLboolean success; if (dst->compressed) { - GLuint alignment = intel_compressed_alignment(dst->internal_format); + GLuint align_w, align_h; + + intel_get_texture_alignment_unit(dst->internal_format, &align_w, &align_h); height = (height + 3) / 4; - width = ((width + alignment - 1) & ~(alignment - 1)); + width = ALIGN(width, align_w); } for (i = 0; i < depth; i++) { - intel_region_copy(intel, - dst->region, dst_offset + dst_depth_offset[i], - 0, - 0, - src->region, src_offset + src_depth_offset[i], - 0, 0, width, height); + success = intel_region_copy(intel, + dst->region, dst_offset + dst_depth_offset[i], + 0, 0, + src->region, src_offset + src_depth_offset[i], + 0, 0, width, height, GL_COPY); + if (!success) { + GLubyte *src_ptr, *dst_ptr; + + src_ptr = intel_region_map(intel, src->region); + dst_ptr = intel_region_map(intel, dst->region); + + _mesa_copy_rect(dst_ptr + dst_offset + dst_depth_offset[i], + dst->cpp, + dst->pitch, + 0, 0, width, height, + src_ptr + src_offset + src_depth_offset[i], + src->pitch, + 0, 0); + intel_region_unmap(intel, src->region); + intel_region_unmap(intel, dst->region); + } } - } diff --git a/shared/intel_mipmap_tree.h b/shared/intel_mipmap_tree.h index 4060b9d..c890b2a 100644 --- a/shared/intel_mipmap_tree.h +++ b/shared/intel_mipmap_tree.h @@ -126,6 +126,7 @@ struct intel_mipmap_tree struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel, GLenum target, + GLenum base_format, GLenum internal_format, GLuint first_level, GLuint last_level, @@ -148,6 +149,7 @@ intel_miptree_create_for_region(struct intel_context *intel, int intel_miptree_pitch_align (struct intel_context *intel, struct intel_mipmap_tree *mt, + uint32_t tiling, int pitch); void intel_miptree_reference(struct intel_mipmap_tree **dst, @@ -194,6 +196,11 @@ void intel_miptree_set_level_info(struct intel_mipmap_tree *mt, GLuint x, GLuint y, GLuint w, GLuint h, GLuint d); +void intel_miptree_set_image_offset_ex(struct intel_mipmap_tree *mt, + GLuint level, + GLuint img, GLuint x, GLuint y, + GLuint offset); + void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt, GLuint level, GLuint img, GLuint x, GLuint y); @@ -218,10 +225,13 @@ void intel_miptree_image_copy(struct intel_context *intel, /* i915_mipmap_tree.c: */ GLboolean i915_miptree_layout(struct intel_context *intel, - struct intel_mipmap_tree *mt); + struct intel_mipmap_tree *mt, + uint32_t tiling); GLboolean i945_miptree_layout(struct intel_context *intel, - struct intel_mipmap_tree *mt); + struct intel_mipmap_tree *mt, + uint32_t tiling); GLboolean brw_miptree_layout(struct intel_context *intel, - struct intel_mipmap_tree *mt); + struct intel_mipmap_tree *mt, + uint32_t tiling); #endif diff --git a/shared/intel_pixel.c b/shared/intel_pixel.c index fc0ac0b..a300141 100644 --- a/shared/intel_pixel.c +++ b/shared/intel_pixel.c @@ -27,9 +27,12 @@ #include "main/enums.h" #include "main/state.h" +#include "main/bufferobj.h" #include "main/context.h" #include "main/enable.h" #include "main/matrix.h" +#include "main/texstate.h" +#include "main/varray.h" #include "main/viewport.h" #include "swrast/swrast.h" #include "shader/arbprogram.h" @@ -174,167 +177,6 @@ intel_check_blit_format(struct intel_region * region, } void -intel_meta_set_passthrough_transform(struct intel_context *intel) -{ - GLcontext *ctx = &intel->ctx; - - intel->meta.saved_vp_x = ctx->Viewport.X; - intel->meta.saved_vp_y = ctx->Viewport.Y; - intel->meta.saved_vp_width = ctx->Viewport.Width; - intel->meta.saved_vp_height = ctx->Viewport.Height; - intel->meta.saved_matrix_mode = ctx->Transform.MatrixMode; - - intel->internal_viewport_call = GL_TRUE; - _mesa_Viewport(0, 0, ctx->DrawBuffer->Width, ctx->DrawBuffer->Height); - intel->internal_viewport_call = GL_FALSE; - - _mesa_MatrixMode(GL_PROJECTION); - _mesa_PushMatrix(); - _mesa_LoadIdentity(); - _mesa_Ortho(0, ctx->DrawBuffer->Width, 0, ctx->DrawBuffer->Height, 1, -1); - - _mesa_MatrixMode(GL_MODELVIEW); - _mesa_PushMatrix(); - _mesa_LoadIdentity(); -} - -void -intel_meta_restore_transform(struct intel_context *intel) -{ - _mesa_MatrixMode(GL_PROJECTION); - _mesa_PopMatrix(); - _mesa_MatrixMode(GL_MODELVIEW); - _mesa_PopMatrix(); - - _mesa_MatrixMode(intel->meta.saved_matrix_mode); - - intel->internal_viewport_call = GL_TRUE; - _mesa_Viewport(intel->meta.saved_vp_x, intel->meta.saved_vp_y, - intel->meta.saved_vp_width, intel->meta.saved_vp_height); - intel->internal_viewport_call = GL_FALSE; -} - -/** - * Set up a vertex program to pass through the position and first texcoord - * for pixel path. - */ -void -intel_meta_set_passthrough_vertex_program(struct intel_context *intel) -{ - GLcontext *ctx = &intel->ctx; - static const char *vp = - "!!ARBvp1.0\n" - "TEMP vertexClip;\n" - "DP4 vertexClip.x, state.matrix.mvp.row[0], vertex.position;\n" - "DP4 vertexClip.y, state.matrix.mvp.row[1], vertex.position;\n" - "DP4 vertexClip.z, state.matrix.mvp.row[2], vertex.position;\n" - "DP4 vertexClip.w, state.matrix.mvp.row[3], vertex.position;\n" - "MOV result.position, vertexClip;\n" - "MOV result.texcoord[0], vertex.texcoord[0];\n" - "MOV result.color, vertex.color;\n" - "END\n"; - - assert(intel->meta.saved_vp == NULL); - - _mesa_reference_vertprog(ctx, &intel->meta.saved_vp, - ctx->VertexProgram.Current); - if (intel->meta.passthrough_vp == NULL) { - GLuint prog_name; - _mesa_GenPrograms(1, &prog_name); - _mesa_BindProgram(GL_VERTEX_PROGRAM_ARB, prog_name); - _mesa_ProgramStringARB(GL_VERTEX_PROGRAM_ARB, - GL_PROGRAM_FORMAT_ASCII_ARB, - strlen(vp), (const GLubyte *)vp); - _mesa_reference_vertprog(ctx, &intel->meta.passthrough_vp, - ctx->VertexProgram.Current); - _mesa_DeletePrograms(1, &prog_name); - } - - FLUSH_VERTICES(ctx, _NEW_PROGRAM); - _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current, - intel->meta.passthrough_vp); - ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB, - &intel->meta.passthrough_vp->Base); - - intel->meta.saved_vp_enable = ctx->VertexProgram.Enabled; - _mesa_Enable(GL_VERTEX_PROGRAM_ARB); -} - -/** - * Restores the previous vertex program after - * intel_meta_set_passthrough_vertex_program() - */ -void -intel_meta_restore_vertex_program(struct intel_context *intel) -{ - GLcontext *ctx = &intel->ctx; - - FLUSH_VERTICES(ctx, _NEW_PROGRAM); - _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current, - intel->meta.saved_vp); - _mesa_reference_vertprog(ctx, &intel->meta.saved_vp, NULL); - ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB, - &ctx->VertexProgram.Current->Base); - - if (!intel->meta.saved_vp_enable) - _mesa_Disable(GL_VERTEX_PROGRAM_ARB); -} - -/** - * Binds the given program string to GL_FRAGMENT_PROGRAM_ARB, caching the - * program object. - */ -void -intel_meta_set_fragment_program(struct intel_context *intel, - struct gl_fragment_program **prog, - const char *prog_string) -{ - GLcontext *ctx = &intel->ctx; - assert(intel->meta.saved_fp == NULL); - - _mesa_reference_fragprog(ctx, &intel->meta.saved_fp, - ctx->FragmentProgram.Current); - if (*prog == NULL) { - GLuint prog_name; - _mesa_GenPrograms(1, &prog_name); - _mesa_BindProgram(GL_FRAGMENT_PROGRAM_ARB, prog_name); - _mesa_ProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, - GL_PROGRAM_FORMAT_ASCII_ARB, - strlen(prog_string), (const GLubyte *)prog_string); - _mesa_reference_fragprog(ctx, prog, ctx->FragmentProgram.Current); - /* Note that DeletePrograms unbinds the program on us */ - _mesa_DeletePrograms(1, &prog_name); - } - - FLUSH_VERTICES(ctx, _NEW_PROGRAM); - _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, *prog); - ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, &((*prog)->Base)); - - intel->meta.saved_fp_enable = ctx->FragmentProgram.Enabled; - _mesa_Enable(GL_FRAGMENT_PROGRAM_ARB); -} - -/** - * Restores the previous fragment program after - * intel_meta_set_fragment_program() - */ -void -intel_meta_restore_fragment_program(struct intel_context *intel) -{ - GLcontext *ctx = &intel->ctx; - - FLUSH_VERTICES(ctx, _NEW_PROGRAM); - _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, - intel->meta.saved_fp); - _mesa_reference_fragprog(ctx, &intel->meta.saved_fp, NULL); - ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, - &ctx->FragmentProgram.Current->Base); - - if (!intel->meta.saved_fp_enable) - _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB); -} - -void intelInitPixelFuncs(struct dd_function_table *functions) { functions->Accum = _swrast_Accum; @@ -342,18 +184,7 @@ intelInitPixelFuncs(struct dd_function_table *functions) functions->Bitmap = intelBitmap; functions->CopyPixels = intelCopyPixels; functions->DrawPixels = intelDrawPixels; -#ifdef I915 - functions->ReadPixels = intelReadPixels; -#endif } -} - -void -intel_free_pixel_state(struct intel_context *intel) -{ - GLcontext *ctx = &intel->ctx; - - _mesa_reference_vertprog(ctx, &intel->meta.passthrough_vp, NULL); - _mesa_reference_fragprog(ctx, &intel->meta.bitmap_fp, NULL); + functions->ReadPixels = intelReadPixels; } diff --git a/shared/intel_pixel.h b/shared/intel_pixel.h index cb41fa1..96a6dd1 100644 --- a/shared/intel_pixel.h +++ b/shared/intel_pixel.h @@ -31,16 +31,6 @@ #include "main/mtypes.h" void intelInitPixelFuncs(struct dd_function_table *functions); -void intel_meta_set_passthrough_transform(struct intel_context *intel); -void intel_meta_restore_transform(struct intel_context *intel); -void intel_meta_set_passthrough_vertex_program(struct intel_context *intel); -void intel_meta_restore_vertex_program(struct intel_context *intel); -void intel_meta_set_fragment_program(struct intel_context *intel, - struct gl_fragment_program **prog, - const char *prog_string); -void intel_meta_restore_fragment_program(struct intel_context *intel); -void intel_free_pixel_state(struct intel_context *intel); - GLboolean intel_check_blit_fragment_ops(GLcontext * ctx, GLboolean src_alpha_is_one); @@ -76,6 +66,4 @@ void intelBitmap(GLcontext * ctx, const struct gl_pixelstore_attrib *unpack, const GLubyte * pixels); -void intel_clear_tris(GLcontext *ctx, GLbitfield mask); - #endif diff --git a/shared/intel_pixel_bitmap.c b/shared/intel_pixel_bitmap.c index a2ccae1..b543a0b 100644 --- a/shared/intel_pixel_bitmap.c +++ b/shared/intel_pixel_bitmap.c @@ -42,6 +42,7 @@ #include "main/varray.h" #include "main/attrib.h" #include "main/enable.h" +#include "main/viewport.h" #include "shader/arbprogram.h" #include "glapi/dispatch.h" #include "swrast/swrast.h" @@ -92,19 +93,12 @@ static const GLubyte *map_pbo( GLcontext *ctx, return ADD_POINTERS(buf, bitmap); } -static GLboolean test_bit( const GLubyte *src, - GLuint bit ) +static GLboolean test_bit( const GLubyte *src, GLuint bit ) { return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0; } -static GLboolean test_msb_bit(const GLubyte *src, GLuint bit) -{ - return (src[bit/8] & (1<<(7 - (bit % 8)))) ? 1 : 0; -} - -static void set_bit( GLubyte *dest, - GLuint bit ) +static void set_bit( GLubyte *dest, GLuint bit ) { dest[bit/8] |= 1 << (bit % 8); } @@ -194,7 +188,7 @@ do_blit_bitmap( GLcontext *ctx, struct gl_framebuffer *fb = ctx->DrawBuffer; GLfloat tmpColor[4]; GLubyte ubcolor[4]; - GLuint color8888, color565; + GLuint color; unsigned int num_cliprects; drm_clip_rect_t *cliprects; int x_off, y_off; @@ -232,8 +226,11 @@ do_blit_bitmap( GLcontext *ctx, UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[2], tmpColor[2]); UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[3], tmpColor[3]); - color8888 = INTEL_PACKCOLOR8888(ubcolor[0], ubcolor[1], ubcolor[2], ubcolor[3]); - color565 = INTEL_PACKCOLOR565(ubcolor[0], ubcolor[1], ubcolor[2]); + if (dst->cpp == 2) + color = INTEL_PACKCOLOR565(ubcolor[0], ubcolor[1], ubcolor[2]); + else + color = INTEL_PACKCOLOR8888(ubcolor[0], ubcolor[1], + ubcolor[2], ubcolor[3]); if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F)) return GL_FALSE; @@ -307,21 +304,21 @@ do_blit_bitmap( GLcontext *ctx, fb->Name == 0 ? GL_TRUE : GL_FALSE) == 0) continue; - /* - */ - intelEmitImmediateColorExpandBlit( intel, - dst->cpp, - (GLubyte *)stipple, - sz, - (dst->cpp == 2) ? color565 : color8888, - dst->pitch, - dst->buffer, - 0, - dst->tiling, - box_x + px, - box_y + py, - w, h, - logic_op); + if (!intelEmitImmediateColorExpandBlit(intel, + dst->cpp, + (GLubyte *)stipple, + sz, + color, + dst->pitch, + dst->buffer, + 0, + dst->tiling, + box_x + px, + box_y + py, + w, h, + logic_op)) { + return GL_FALSE; + } } } } @@ -360,11 +357,8 @@ intel_texture_bitmap(GLcontext * ctx, "END\n"; GLuint texname; GLfloat vertices[4][4]; - GLfloat texcoords[4][2]; GLint old_active_texture; - GLubyte *unpacked_bitmap; GLubyte *a8_bitmap; - int x, y; GLfloat dst_z; /* We need a fragment program for the KIL effect */ @@ -409,6 +403,12 @@ intel_texture_bitmap(GLcontext * ctx, return GL_FALSE; } + if (ctx->Fog.Enabled) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "glBitmap() fallback: fog\n"); + return GL_FALSE; + } + /* Check that we can load in a texture this big. */ if (width > (1 << (ctx->Const.MaxTextureLevels - 1)) || height > (1 << (ctx->Const.MaxTextureLevels - 1))) { @@ -418,22 +418,16 @@ intel_texture_bitmap(GLcontext * ctx, return GL_FALSE; } - /* Convert the A1 bitmap to an A8 format suitable for glTexImage */ if (unpack->BufferObj->Name) { bitmap = map_pbo(ctx, width, height, unpack, bitmap); if (bitmap == NULL) return GL_TRUE; /* even though this is an error, we're done */ } - unpacked_bitmap = _mesa_unpack_bitmap(width, height, bitmap, - unpack); + + /* Convert the A1 bitmap to an A8 format suitable for glTexImage */ a8_bitmap = _mesa_calloc(width * height); - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - if (test_msb_bit(unpacked_bitmap, ALIGN(width, 8) * y + x)) - a8_bitmap[y * width + x] = 0xff; - } - } - _mesa_free(unpacked_bitmap); + _mesa_expand_bitmap(width, height, unpack, bitmap, a8_bitmap, width, 0xff); + if (unpack->BufferObj->Name) { /* done with PBO so unmap it now */ ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, @@ -467,15 +461,18 @@ intel_texture_bitmap(GLcontext * ctx, GL_ALPHA, GL_UNSIGNED_BYTE, a8_bitmap); _mesa_free(a8_bitmap); - intel_meta_set_fragment_program(intel, &intel->meta.bitmap_fp, fp); + meta_set_fragment_program(&intel->meta, &intel->meta.bitmap_fp, fp); _mesa_ProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 0, ctx->Current.RasterColor); - intel_meta_set_passthrough_vertex_program(intel); - intel_meta_set_passthrough_transform(intel); + meta_set_passthrough_vertex_program(&intel->meta); + meta_set_passthrough_transform(&intel->meta); /* convert rasterpos Z from [0,1] to NDC coord in [-1,1] */ dst_z = -1.0 + 2.0 * ctx->Current.RasterPos[2]; + /* RasterPos[2] already takes into account the DepthRange mapping. */ + _mesa_DepthRange(0.0, 1.0); + vertices[0][0] = dst_x; vertices[0][1] = dst_y; vertices[0][2] = dst_z; @@ -493,25 +490,15 @@ intel_texture_bitmap(GLcontext * ctx, vertices[3][2] = dst_z; vertices[3][3] = 1.0; - texcoords[0][0] = 0.0; - texcoords[0][1] = 0.0; - texcoords[1][0] = 1.0; - texcoords[1][1] = 0.0; - texcoords[2][0] = 1.0; - texcoords[2][1] = 1.0; - texcoords[3][0] = 0.0; - texcoords[3][1] = 1.0; - _mesa_VertexPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &vertices); - _mesa_ClientActiveTextureARB(GL_TEXTURE0); - _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords); _mesa_Enable(GL_VERTEX_ARRAY); - _mesa_Enable(GL_TEXTURE_COORD_ARRAY); + meta_set_default_texrect(&intel->meta); _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); - intel_meta_restore_transform(intel); - intel_meta_restore_fragment_program(intel); - intel_meta_restore_vertex_program(intel); + meta_restore_texcoords(&intel->meta); + meta_restore_transform(&intel->meta); + meta_restore_fragment_program(&intel->meta); + meta_restore_vertex_program(&intel->meta); _mesa_PopClientAttrib(); _mesa_Disable(GL_TEXTURE_2D); /* asserted that it was disabled at entry */ diff --git a/shared/intel_pixel_copy.c b/shared/intel_pixel_copy.c index d50dd68..07ca8f7 100644 --- a/shared/intel_pixel_copy.c +++ b/shared/intel_pixel_copy.c @@ -26,18 +26,13 @@ **************************************************************************/ #include "main/glheader.h" -#include "main/enums.h" #include "main/image.h" #include "main/state.h" #include "main/mtypes.h" -#include "main/macros.h" -#include "swrast/swrast.h" +#include "drivers/common/meta.h" -#include "intel_screen.h" #include "intel_context.h" -#include "intel_batchbuffer.h" #include "intel_buffers.h" -#include "intel_blit.h" #include "intel_regions.h" #include "intel_pixel.h" @@ -97,162 +92,6 @@ intel_check_copypixel_blit_fragment_ops(GLcontext * ctx) ctx->Color.BlendEnabled); } -#ifdef I915 -/* Doesn't work for overlapping regions. Could do a double copy or - * just fallback. - */ -static GLboolean -do_texture_copypixels(GLcontext * ctx, - GLint srcx, GLint srcy, - GLsizei width, GLsizei height, - GLint dstx, GLint dsty, GLenum type) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_region *dst = intel_drawbuf_region(intel); - struct intel_region *src = copypix_src_region(intel, type); - GLenum src_format; - GLenum src_type; - - DBG("%s %d,%d %dx%d --> %d,%d\n", __FUNCTION__, - srcx, srcy, width, height, dstx, dsty); - - if (!src || !dst || type != GL_COLOR) - return GL_FALSE; - - if (ctx->_ImageTransferState) { - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s: check_color failed\n", __FUNCTION__); - return GL_FALSE; - } - - /* Can't handle overlapping regions. Don't have sufficient control - * over rasterization to pull it off in-place. Punt on these for - * now. - * - * XXX: do a copy to a temporary. - */ - if (src->buffer == dst->buffer) { - drm_clip_rect_t srcbox; - drm_clip_rect_t dstbox; - drm_clip_rect_t tmp; - - srcbox.x1 = srcx; - srcbox.y1 = srcy; - srcbox.x2 = srcx + width; - srcbox.y2 = srcy + height; - - if (ctx->Pixel.ZoomX > 0) { - dstbox.x1 = dstx; - dstbox.x2 = dstx + width * ctx->Pixel.ZoomX; - } else { - dstbox.x1 = dstx + width * ctx->Pixel.ZoomX; - dstbox.x2 = dstx; - } - if (ctx->Pixel.ZoomY > 0) { - dstbox.y1 = dsty; - dstbox.y2 = dsty + height * ctx->Pixel.ZoomY; - } else { - dstbox.y1 = dsty + height * ctx->Pixel.ZoomY; - dstbox.y2 = dsty; - } - - DBG("src %d,%d %d,%d\n", srcbox.x1, srcbox.y1, srcbox.x2, srcbox.y2); - DBG("dst %d,%d %d,%d (%dx%d) (%f,%f)\n", dstbox.x1, dstbox.y1, dstbox.x2, dstbox.y2, - width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY); - - if (intel_intersect_cliprects(&tmp, &srcbox, &dstbox)) { - DBG("%s: regions overlap\n", __FUNCTION__); - return GL_FALSE; - } - } - - intelFlush(&intel->ctx); - - intel->vtbl.install_meta_state(intel); - - /* Is this true? Also will need to turn depth testing on according - * to state: - */ - intel->vtbl.meta_no_stencil_write(intel); - intel->vtbl.meta_no_depth_write(intel); - - /* Set the 3d engine to draw into the destination region: - */ - intel->vtbl.meta_draw_region(intel, dst, intel->depth_region); - - intel->vtbl.meta_import_pixel_state(intel); - - if (src->cpp == 2) { - src_format = GL_RGB; - src_type = GL_UNSIGNED_SHORT_5_6_5; - } - else { - src_format = GL_BGRA; - src_type = GL_UNSIGNED_BYTE; - } - - /* Set the frontbuffer up as a large rectangular texture. - */ - if (!intel->vtbl.meta_tex_rect_source(intel, src->buffer, 0, - src->pitch, - src->height, src_format, src_type)) { - intel->vtbl.leave_meta_state(intel); - return GL_FALSE; - } - - - intel->vtbl.meta_texture_blend_replace(intel); - - LOCK_HARDWARE(intel); - - if (intel->driDrawable->numClipRects) { - __DRIdrawablePrivate *dPriv = intel->driDrawable; - - - srcy = dPriv->h - srcy - height; /* convert from gl to hardware coords */ - - srcx += dPriv->x; - srcy += dPriv->y; - - /* Clip against the source region. This is the only source - * clipping we do. XXX: Just set the texcord wrap mode to clamp - * or similar. - * - */ - if (0) { - GLint orig_x = srcx; - GLint orig_y = srcy; - - if (!_mesa_clip_to_region(0, 0, src->pitch, src->height, - &srcx, &srcy, &width, &height)) - goto out; - - dstx += srcx - orig_x; - dsty += (srcy - orig_y) * ctx->Pixel.ZoomY; - } - - /* Just use the regular cliprect mechanism... Does this need to - * even hold the lock??? - */ - intel->vtbl.meta_draw_quad(intel, - dstx, - dstx + width * ctx->Pixel.ZoomX, - dPriv->h - (dsty + height * ctx->Pixel.ZoomY), - dPriv->h - (dsty), 0, /* XXX: what z value? */ - 0x00ff00ff, - srcx, srcx + width, srcy, srcy + height); - - out: - intel->vtbl.leave_meta_state(intel); - intel_batchbuffer_emit_mi_flush(intel->batch); - } - UNLOCK_HARDWARE(intel); - - DBG("%s: success\n", __FUNCTION__); - return GL_TRUE; -} -#endif /* I915 */ - /** * CopyPixels with the blitter. Don't support zooming, pixel transfer, etc. @@ -272,6 +111,12 @@ do_blit_copypixels(GLcontext * ctx, drm_clip_rect_t *cliprects; int x_off, y_off; + if (type == GL_DEPTH || type == GL_STENCIL) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "glCopyPixels() fallback: GL_DEPTH || GL_STENCIL\n"); + return GL_FALSE; + } + /* Update draw buffer bounds */ _mesa_update_state(ctx); @@ -362,14 +207,16 @@ do_blit_copypixels(GLcontext * ctx, &clip_x, &clip_y, &clip_w, &clip_h)) continue; - intelEmitCopyBlit(intel, dst->cpp, - src->pitch, src->buffer, 0, src->tiling, - dst->pitch, dst->buffer, 0, dst->tiling, - clip_x + delta_x, clip_y + delta_y, /* srcx, srcy */ - clip_x, clip_y, /* dstx, dsty */ - clip_w, clip_h, - ctx->Color.ColorLogicOpEnabled ? - ctx->Color.LogicOp : GL_COPY); + if (!intel_region_copy(intel, + dst, 0, clip_x, clip_y, + src, 0, clip_x + delta_x, clip_y + delta_y, + clip_w, clip_h, + ctx->Color.ColorLogicOpEnabled ? + ctx->Color.LogicOp : GL_COPY)) { + DBG("%s: blit failure\n", __FUNCTION__); + UNLOCK_HARDWARE(intel); + return GL_FALSE; + } } } out: @@ -392,12 +239,6 @@ intelCopyPixels(GLcontext * ctx, if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type)) return; -#ifdef I915 - if (do_texture_copypixels(ctx, srcx, srcy, width, height, destx, desty, type)) - return; -#endif - - DBG("fallback to _swrast_CopyPixels\n"); - - _swrast_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type); + /* this will use swrast if needed */ + _mesa_meta_copy_pixels(ctx, srcx, srcy, width, height, destx, desty, type); } diff --git a/shared/intel_pixel_draw.c b/shared/intel_pixel_draw.c index d80069d..7fbb89f 100644 --- a/shared/intel_pixel_draw.c +++ b/shared/intel_pixel_draw.c @@ -29,8 +29,6 @@ #include "main/enums.h" #include "main/image.h" #include "main/mtypes.h" -#include "main/macros.h" -#include "main/bufferobj.h" #include "main/teximage.h" #include "main/texenv.h" #include "main/texobj.h" @@ -41,169 +39,22 @@ #include "main/enable.h" #include "main/buffers.h" #include "main/fbobject.h" -#include "main/renderbuffer.h" #include "main/depth.h" #include "main/hash.h" #include "main/blend.h" -#include "glapi/dispatch.h" #include "swrast/swrast.h" +#include "drivers/common/meta.h" -#include "intel_screen.h" #include "intel_context.h" #include "intel_batchbuffer.h" #include "intel_blit.h" #include "intel_buffers.h" #include "intel_regions.h" #include "intel_pixel.h" -#include "intel_buffer_objects.h" #include "intel_fbo.h" -static GLboolean -intel_texture_drawpixels(GLcontext * ctx, - GLint x, GLint y, - GLsizei width, GLsizei height, - GLenum format, - GLenum type, - const struct gl_pixelstore_attrib *unpack, - const GLvoid *pixels) -{ - struct intel_context *intel = intel_context(ctx); - GLuint texname; - GLfloat vertices[4][4]; - GLfloat texcoords[4][2]; - GLfloat z; - GLint old_active_texture; - GLenum internalFormat; - - /* We're going to mess with texturing with no regard to existing texture - * state, so if there is some set up we have to bail. - */ - if (ctx->Texture._EnabledUnits != 0) { - if (INTEL_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "glDrawPixels() fallback: texturing enabled\n"); - return GL_FALSE; - } - - /* Can't do textured DrawPixels with a fragment program, unless we were - * to generate a new program that sampled our texture and put the results - * in the fragment color before the user's program started. - */ - if (ctx->FragmentProgram.Enabled) { - if (INTEL_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "glDrawPixels() fallback: fragment program enabled\n"); - return GL_FALSE; - } - - /* We don't have a way to generate fragments with stencil values which - * will set the resulting stencil value. - */ - if (format == GL_STENCIL_INDEX || format == GL_DEPTH_STENCIL) - return GL_FALSE; - - /* Check that we can load in a texture this big. */ - if (width > (1 << (ctx->Const.MaxTextureLevels - 1)) || - height > (1 << (ctx->Const.MaxTextureLevels - 1))) { - if (INTEL_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "glDrawPixels() fallback: bitmap too large (%dx%d)\n", - width, height); - return GL_FALSE; - } - - /* To do DEPTH_COMPONENT, we would need to change our setup to not draw to - * the color buffer, and sample the texture values into the fragment depth - * in a program. - */ - if (format == GL_DEPTH_COMPONENT) { - if (INTEL_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, - "glDrawPixels() fallback: format == GL_DEPTH_COMPONENT\n"); - return GL_FALSE; - } - - if (!ctx->Extensions.ARB_texture_non_power_of_two && - (!is_power_of_two(width) || !is_power_of_two(height))) { - if (INTEL_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, - "glDrawPixels() fallback: NPOT texture\n"); - return GL_FALSE; - } - - _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT | - GL_CURRENT_BIT); - _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT); - - /* XXX: pixel store stuff */ - _mesa_Disable(GL_POLYGON_STIPPLE); - - old_active_texture = ctx->Texture.CurrentUnit; - _mesa_ActiveTextureARB(GL_TEXTURE0_ARB); - _mesa_Enable(GL_TEXTURE_2D); - _mesa_GenTextures(1, &texname); - _mesa_BindTexture(GL_TEXTURE_2D, texname); - _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - _mesa_TexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); - if (type == GL_ALPHA) - internalFormat = GL_ALPHA; - else - internalFormat = GL_RGBA; - _mesa_TexImage2D(GL_TEXTURE_2D, 0, internalFormat, width, height, 0, format, - type, pixels); - - intel_meta_set_passthrough_transform(intel); - - /* convert rasterpos Z from [0,1] to NDC coord in [-1,1] */ - z = -1.0 + 2.0 * ctx->Current.RasterPos[2]; - - /* Create the vertex buffer based on the current raster pos. The x and y - * we're handed are ctx->Current.RasterPos[0,1] rounded to integers. - * We also apply the depth. However, the W component is already multiplied - * into ctx->Current.RasterPos[0,1,2] and we can ignore it at this point. - */ - vertices[0][0] = x; - vertices[0][1] = y; - vertices[0][2] = z; - vertices[0][3] = 1.0; - vertices[1][0] = x + width * ctx->Pixel.ZoomX; - vertices[1][1] = y; - vertices[1][2] = z; - vertices[1][3] = 1.0; - vertices[2][0] = x + width * ctx->Pixel.ZoomX; - vertices[2][1] = y + height * ctx->Pixel.ZoomY; - vertices[2][2] = z; - vertices[2][3] = 1.0; - vertices[3][0] = x; - vertices[3][1] = y + height * ctx->Pixel.ZoomY; - vertices[3][2] = z; - vertices[3][3] = 1.0; - - texcoords[0][0] = 0.0; - texcoords[0][1] = 0.0; - texcoords[1][0] = 1.0; - texcoords[1][1] = 0.0; - texcoords[2][0] = 1.0; - texcoords[2][1] = 1.0; - texcoords[3][0] = 0.0; - texcoords[3][1] = 1.0; - - _mesa_VertexPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &vertices); - _mesa_ClientActiveTextureARB(GL_TEXTURE0); - _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords); - _mesa_Enable(GL_VERTEX_ARRAY); - _mesa_Enable(GL_TEXTURE_COORD_ARRAY); - _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); - - intel_meta_restore_transform(intel); - - _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture); - _mesa_PopClientAttrib(); - _mesa_PopAttrib(); - - _mesa_DeleteTextures(1, &texname); - - return GL_TRUE; -} +/** XXX compare perf of this vs. _mesa_meta_draw_pixels(STENCIL) */ static GLboolean intel_stencil_drawpixels(GLcontext * ctx, GLint x, GLint y, @@ -216,14 +67,14 @@ intel_stencil_drawpixels(GLcontext * ctx, struct intel_context *intel = intel_context(ctx); GLuint texname, rb_name, fb_name, old_fb_name; GLfloat vertices[4][2]; - GLfloat texcoords[4][2]; struct intel_renderbuffer *irb; struct intel_renderbuffer *depth_irb; struct gl_renderbuffer *rb; struct gl_pixelstore_attrib old_unpack; GLstencil *stencil_pixels; - int row; + int row, y1, y2; GLint old_active_texture; + GLboolean rendering_to_fbo = ctx->DrawBuffer->Name != 0; if (format != GL_STENCIL_INDEX) return GL_FALSE; @@ -358,34 +209,35 @@ intel_stencil_drawpixels(GLcontext * ctx, ctx->Unpack = old_unpack; _mesa_free(stencil_pixels); - intel_meta_set_passthrough_transform(intel); + meta_set_passthrough_transform(&intel->meta); + /* Since we're rendering to the framebuffer as if it was an FBO, + * if it's the window system we have to flip the coordinates. + */ + if (rendering_to_fbo) { + y1 = y; + y2 = y + height * ctx->Pixel.ZoomY; + } else { + y1 = irb->Base.Height - (y + height * ctx->Pixel.ZoomY); + y2 = irb->Base.Height - y; + } vertices[0][0] = x; - vertices[0][1] = y; + vertices[0][1] = y1; vertices[1][0] = x + width * ctx->Pixel.ZoomX; - vertices[1][1] = y; + vertices[1][1] = y1; vertices[2][0] = x + width * ctx->Pixel.ZoomX; - vertices[2][1] = y + height * ctx->Pixel.ZoomY; + vertices[2][1] = y2; vertices[3][0] = x; - vertices[3][1] = y + height * ctx->Pixel.ZoomY; - - texcoords[0][0] = 0.0; - texcoords[0][1] = 0.0; - texcoords[1][0] = 1.0; - texcoords[1][1] = 0.0; - texcoords[2][0] = 1.0; - texcoords[2][1] = 1.0; - texcoords[3][0] = 0.0; - texcoords[3][1] = 1.0; + vertices[3][1] = y2; _mesa_VertexPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &vertices); - _mesa_ClientActiveTextureARB(GL_TEXTURE0); - _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords); _mesa_Enable(GL_VERTEX_ARRAY); - _mesa_Enable(GL_TEXTURE_COORD_ARRAY); + meta_set_default_texrect(&intel->meta); + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); - intel_meta_restore_transform(intel); + meta_restore_texcoords(&intel->meta); + meta_restore_transform(&intel->meta); _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture); _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, old_fb_name); @@ -409,17 +261,25 @@ intelDrawPixels(GLcontext * ctx, const struct gl_pixelstore_attrib *unpack, const GLvoid * pixels) { - if (intel_texture_drawpixels(ctx, x, y, width, height, format, type, - unpack, pixels)) - return; - +#if 0 + /* XXX this function doesn't seem to work reliably even when all + * the pre-requisite conditions are met. + * Note that this function is never hit with conform. + * Fall back to swrast because even the _mesa_meta_draw_pixels() approach + * isn't working because of an apparent stencil bug. + */ if (intel_stencil_drawpixels(ctx, x, y, width, height, format, type, unpack, pixels)) return; +#else + (void) intel_stencil_drawpixels; /* silence warning */ + if (format == GL_STENCIL_INDEX) { + _swrast_DrawPixels(ctx, x, y, width, height, format, type, + unpack, pixels); + return; + } +#endif - if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s: fallback to swrast\n", __FUNCTION__); - - _swrast_DrawPixels(ctx, x, y, width, height, format, type, - unpack, pixels); + _mesa_meta_draw_pixels(ctx, x, y, width, height, format, type, + unpack, pixels); } diff --git a/i915/intel_pixel_read.c b/shared/intel_pixel_read.c index 56087aa..8713463 100644 --- a/i915/intel_pixel_read.c +++ b/shared/intel_pixel_read.c @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,7 +22,7 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ #include "main/glheader.h" @@ -31,6 +31,7 @@ #include "main/macros.h" #include "main/image.h" #include "main/bufferobj.h" +#include "main/state.h" #include "swrast/swrast.h" #include "intel_screen.h" @@ -45,10 +46,10 @@ /* For many applications, the new ability to pull the source buffers * back out of the GTT and then do the packing/conversion operations * in software will be as much of an improvement as trying to get the - * blitter and/or texture engine to do the work. + * blitter and/or texture engine to do the work. * * This step is gated on private backbuffers. - * + * * Obviously the frontbuffer can't be pulled back, so that is either * an argument for blit/texture readpixels, or for blitting to a * temporary and then pulling that back. @@ -179,7 +180,7 @@ do_blit_readpixels(GLcontext * ctx, if (!src) return GL_FALSE; - if (dst) { + if (pack->BufferObj->Name) { /* XXX This validation should be done by core mesa: */ if (!_mesa_validate_pbo_access(2, pack, width, height, 1, @@ -260,16 +261,19 @@ do_blit_readpixels(GLcontext * ctx, if (!intel_intersect_cliprects(&rect, &src_rect, &box[i])) continue; - intelEmitCopyBlit(intel, - src->cpp, - src->pitch, src->buffer, 0, src->tiling, - rowLength, dst_buffer, dst_offset, GL_FALSE, - rect.x1, - rect.y1, - rect.x1 - src_rect.x1, - rect.y2 - src_rect.y2, - rect.x2 - rect.x1, rect.y2 - rect.y1, - GL_COPY); + if (!intelEmitCopyBlit(intel, + src->cpp, + src->pitch, src->buffer, 0, src->tiling, + rowLength, dst_buffer, dst_offset, GL_FALSE, + rect.x1, + rect.y1, + rect.x1 - src_rect.x1, + rect.y2 - src_rect.y2, + rect.x2 - rect.x1, rect.y2 - rect.y1, + GL_COPY)) { + UNLOCK_HARDWARE(intel); + return GL_FALSE; + } } } UNLOCK_HARDWARE(intel); @@ -291,6 +295,7 @@ intelReadPixels(GLcontext * ctx, intelFlush(ctx); +#ifdef I915 if (do_blit_readpixels (ctx, x, y, width, height, format, type, pack, pixels)) return; @@ -298,9 +303,22 @@ intelReadPixels(GLcontext * ctx, if (do_texture_readpixels (ctx, x, y, width, height, format, type, pack, pixels)) return; +#else + (void)do_blit_readpixels; + (void)do_texture_readpixels; +#endif if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s: fallback to swrast\n", __FUNCTION__); + /* Update Mesa state before calling down into _swrast_ReadPixels, as + * the spans code requires the computed buffer states to be up to date, + * but _swrast_ReadPixels only updates Mesa state after setting up + * the spans code. + */ + + if (ctx->NewState) + _mesa_update_state(ctx); + _swrast_ReadPixels(ctx, x, y, width, height, format, type, pack, pixels); } diff --git a/shared/intel_reg.h b/shared/intel_reg.h index 57ac8f0..d19f1ba 100644 --- a/shared/intel_reg.h +++ b/shared/intel_reg.h @@ -189,6 +189,19 @@ #define S7_DEPTH_OFFSET_CONST_MASK ~0 +/* p143 */ +#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) +/* Dword 1 */ +#define BUF_3D_ID_COLOR_BACK (0x3<<24) +#define BUF_3D_ID_DEPTH (0x7<<24) +#define BUF_3D_USE_FENCE (1<<23) +#define BUF_3D_TILED_SURFACE (1<<22) +#define BUF_3D_TILE_WALK_X 0 +#define BUF_3D_TILE_WALK_Y (1<<21) +#define BUF_3D_PITCH(x) (((x)/4)<<2) +/* Dword 2 */ +#define BUF_3D_ADDR(x) ((x) & ~0x3) + /* Primitive dispatch on 830-945 */ #define _3DPRIMITIVE (CMD_3D | (0x1f << 24)) #define PRIM_INDIRECT (1<<23) diff --git a/shared/intel_regions.c b/shared/intel_regions.c index 0aa5b8c..a86c66a 100644 --- a/shared/intel_regions.c +++ b/shared/intel_regions.c @@ -52,17 +52,77 @@ #define FILE_DEBUG_FLAG DEBUG_REGION +/* This should be set to the maximum backtrace size desired. + * Set it to 0 to disable backtrace debugging. + */ +#define DEBUG_BACKTRACE_SIZE 0 + +#if DEBUG_BACKTRACE_SIZE == 0 +/* Use the standard debug output */ +#define _DBG(...) DBG(__VA_ARGS__) +#else +/* Use backtracing debug output */ +#define _DBG(...) {debug_backtrace(); DBG(__VA_ARGS__);} + +/* Backtracing debug support */ +#include <execinfo.h> + +static void +debug_backtrace(void) +{ + void *trace[DEBUG_BACKTRACE_SIZE]; + char **strings = NULL; + int traceSize; + register int i; + + traceSize = backtrace(trace, DEBUG_BACKTRACE_SIZE); + strings = backtrace_symbols(trace, traceSize); + if (strings == NULL) { + DBG("no backtrace:"); + return; + } + + /* Spit out all the strings with a colon separator. Ignore + * the first, since we don't really care about the call + * to debug_backtrace() itself. Skip until the final "/" in + * the trace to avoid really long lines. + */ + for (i = 1; i < traceSize; i++) { + char *p = strings[i], *slash = strings[i]; + while (*p) { + if (*p++ == '/') { + slash = p; + } + } + + DBG("%s:", slash); + } + + /* Free up the memory, and we're done */ + free(strings); +} + +#endif + + + /* XXX: Thread safety? */ GLubyte * intel_region_map(struct intel_context *intel, struct intel_region *region) { - DBG("%s\n", __FUNCTION__); + intelFlush(&intel->ctx); + + _DBG("%s %p\n", __FUNCTION__, region); if (!region->map_refcount++) { if (region->pbo) intel_region_cow(intel, region); - dri_bo_map(region->buffer, GL_TRUE); + if (region->tiling != I915_TILING_NONE && + intel->intelScreen->kernel_exec_fencing) + drm_intel_gem_bo_map_gtt(region->buffer); + else + dri_bo_map(region->buffer, GL_TRUE); region->map = region->buffer->virtual; } @@ -72,9 +132,13 @@ intel_region_map(struct intel_context *intel, struct intel_region *region) void intel_region_unmap(struct intel_context *intel, struct intel_region *region) { - DBG("%s\n", __FUNCTION__); + _DBG("%s %p\n", __FUNCTION__, region); if (!--region->map_refcount) { - dri_bo_unmap(region->buffer); + if (region->tiling != I915_TILING_NONE && + intel->intelScreen->kernel_exec_fencing) + drm_intel_gem_bo_unmap_gtt(region->buffer); + else + dri_bo_unmap(region->buffer); region->map = NULL; } } @@ -87,10 +151,10 @@ intel_region_alloc_internal(struct intel_context *intel, { struct intel_region *region; - DBG("%s\n", __FUNCTION__); - - if (buffer == NULL) + if (buffer == NULL) { + _DBG("%s <-- NULL\n", __FUNCTION__); return NULL; + } region = calloc(sizeof(*region), 1); region->cpp = cpp; @@ -104,15 +168,40 @@ intel_region_alloc_internal(struct intel_context *intel, region->tiling = I915_TILING_NONE; region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE; + _DBG("%s <-- %p\n", __FUNCTION__, region); return region; } struct intel_region * intel_region_alloc(struct intel_context *intel, + uint32_t tiling, GLuint cpp, GLuint width, GLuint height, GLuint pitch, GLboolean expect_accelerated_upload) { dri_bo *buffer; + struct intel_region *region; + + /* If we're tiled, our allocations are in 8 or 32-row blocks, so + * failure to align our height means that we won't allocate enough pages. + * + * If we're untiled, we still have to align to 2 rows high because the + * data port accesses 2x2 blocks even if the bottom row isn't to be + * rendered, so failure to align means we could walk off the end of the + * GTT and fault. + */ + if (tiling == I915_TILING_X) + height = ALIGN(height, 8); + else if (tiling == I915_TILING_Y) + height = ALIGN(height, 32); + else + height = ALIGN(height, 2); + + /* If we're untiled, we have to align to 2 rows high because the + * data port accesses 2x2 blocks even if the bottom row isn't to be + * rendered, so failure to align means we could walk off the end of the + * GTT and fault. + */ + height = ALIGN(height, 2); if (expect_accelerated_upload) { buffer = drm_intel_bo_alloc_for_render(intel->bufmgr, "region", @@ -122,7 +211,16 @@ intel_region_alloc(struct intel_context *intel, pitch * cpp * height, 64); } - return intel_region_alloc_internal(intel, cpp, width, height, pitch, buffer); + region = intel_region_alloc_internal(intel, cpp, width, height, + pitch, buffer); + + if (tiling != I915_TILING_NONE) { + assert(((pitch * cpp) & 127) == 0); + drm_intel_bo_set_tiling(buffer, &tiling, pitch * cpp); + drm_intel_bo_get_tiling(buffer, ®ion->tiling, ®ion->bit_6_swizzle); + } + + return region; } struct intel_region * @@ -158,7 +256,7 @@ void intel_region_reference(struct intel_region **dst, struct intel_region *src) { if (src) - DBG("%s %p %d\n", __FUNCTION__, src, src->refcount); + _DBG("%s %p %d\n", __FUNCTION__, src, src->refcount); assert(*dst == NULL); if (src) { @@ -172,10 +270,12 @@ intel_region_release(struct intel_region **region_handle) { struct intel_region *region = *region_handle; - if (region == NULL) + if (region == NULL) { + _DBG("%s NULL\n", __FUNCTION__); return; + } - DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1); + _DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1); ASSERT(region->refcount > 0); region->refcount--; @@ -249,9 +349,7 @@ intel_region_data(struct intel_context *intel, const void *src, GLuint src_pitch, GLuint srcx, GLuint srcy, GLuint width, GLuint height) { - GLboolean locked = GL_FALSE; - - DBG("%s\n", __FUNCTION__); + _DBG("%s\n", __FUNCTION__); if (intel == NULL) return; @@ -264,39 +362,33 @@ intel_region_data(struct intel_context *intel, intel_region_cow(intel, dst); } - if (!intel->locked) { - LOCK_HARDWARE(intel); - locked = GL_TRUE; - } - + LOCK_HARDWARE(intel); _mesa_copy_rect(intel_region_map(intel, dst) + dst_offset, dst->cpp, dst->pitch, dstx, dsty, width, height, src, src_pitch, srcx, srcy); intel_region_unmap(intel, dst); - - if (locked) - UNLOCK_HARDWARE(intel); - + UNLOCK_HARDWARE(intel); } /* Copy rectangular sub-regions. Need better logic about when to * push buffers into AGP - will currently do so whenever possible. */ -void +GLboolean intel_region_copy(struct intel_context *intel, struct intel_region *dst, GLuint dst_offset, GLuint dstx, GLuint dsty, struct intel_region *src, GLuint src_offset, - GLuint srcx, GLuint srcy, GLuint width, GLuint height) + GLuint srcx, GLuint srcy, GLuint width, GLuint height, + GLenum logicop) { - DBG("%s\n", __FUNCTION__); + _DBG("%s\n", __FUNCTION__); if (intel == NULL) - return; + return GL_FALSE; if (dst->pbo) { if (dstx == 0 && @@ -308,41 +400,12 @@ intel_region_copy(struct intel_context *intel, assert(src->cpp == dst->cpp); - intelEmitCopyBlit(intel, - dst->cpp, - src->pitch, src->buffer, src_offset, src->tiling, - dst->pitch, dst->buffer, dst_offset, dst->tiling, - srcx, srcy, dstx, dsty, width, height, - GL_COPY); -} - -/* Fill a rectangular sub-region. Need better logic about when to - * push buffers into AGP - will currently do so whenever possible. - */ -void -intel_region_fill(struct intel_context *intel, - struct intel_region *dst, - GLuint dst_offset, - GLuint dstx, GLuint dsty, - GLuint width, GLuint height, GLuint color) -{ - DBG("%s\n", __FUNCTION__); - - if (intel == NULL) - return; - - if (dst->pbo) { - if (dstx == 0 && - dsty == 0 && width == dst->pitch && height == dst->height) - intel_region_release_pbo(intel, dst); - else - intel_region_cow(intel, dst); - } - - intelEmitFillBlit(intel, - dst->cpp, - dst->pitch, dst->buffer, dst_offset, dst->tiling, - dstx, dsty, width, height, color); + return intelEmitCopyBlit(intel, + dst->cpp, + src->pitch, src->buffer, src_offset, src->tiling, + dst->pitch, dst->buffer, dst_offset, dst->tiling, + srcx, srcy, dstx, dsty, width, height, + logicop); } /* Attach to a pbo, discarding our data. Effectively zero-copy upload @@ -353,9 +416,13 @@ intel_region_attach_pbo(struct intel_context *intel, struct intel_region *region, struct intel_buffer_object *pbo) { + dri_bo *buffer; + if (region->pbo == pbo) return; + _DBG("%s %p %p\n", __FUNCTION__, region, pbo); + /* If there is already a pbo attached, break the cow tie now. * Don't call intel_region_release_pbo() as that would * unnecessarily allocate a new buffer we would have to immediately @@ -371,10 +438,13 @@ intel_region_attach_pbo(struct intel_context *intel, region->buffer = NULL; } + /* make sure pbo has a buffer of its own */ + buffer = intel_bufferobj_buffer(intel, pbo, INTEL_WRITE_FULL); + region->pbo = pbo; region->pbo->region = region; - dri_bo_reference(pbo->buffer); - region->buffer = pbo->buffer; + dri_bo_reference(buffer); + region->buffer = buffer; } @@ -385,6 +455,7 @@ void intel_region_release_pbo(struct intel_context *intel, struct intel_region *region) { + _DBG("%s %p\n", __FUNCTION__, region); assert(region->buffer == region->pbo->buffer); region->pbo->region = NULL; region->pbo = NULL; @@ -403,34 +474,27 @@ void intel_region_cow(struct intel_context *intel, struct intel_region *region) { struct intel_buffer_object *pbo = region->pbo; - GLboolean was_locked = intel->locked; - - if (intel == NULL) - return; + GLboolean ok; intel_region_release_pbo(intel, region); assert(region->cpp * region->pitch * region->height == pbo->Base.Size); - DBG("%s (%d bytes)\n", __FUNCTION__, pbo->Base.Size); + _DBG("%s %p (%d bytes)\n", __FUNCTION__, region, pbo->Base.Size); /* Now blit from the texture buffer to the new buffer: */ - was_locked = intel->locked; - if (!was_locked) - LOCK_HARDWARE(intel); - - intelEmitCopyBlit(intel, - region->cpp, - region->pitch, region->buffer, 0, region->tiling, - region->pitch, pbo->buffer, 0, region->tiling, - 0, 0, 0, 0, - region->pitch, region->height, - GL_COPY); - - if (!was_locked) - UNLOCK_HARDWARE(intel); + LOCK_HARDWARE(intel); + ok = intelEmitCopyBlit(intel, + region->cpp, + region->pitch, pbo->buffer, 0, region->tiling, + region->pitch, region->buffer, 0, region->tiling, + 0, 0, 0, 0, + region->pitch, region->height, + GL_COPY); + assert(ok); + UNLOCK_HARDWARE(intel); } dri_bo * @@ -459,6 +523,10 @@ intel_recreate_static(struct intel_context *intel, if (region == NULL) { region = calloc(sizeof(*region), 1); region->refcount = 1; + _DBG("%s creating new region %p\n", __FUNCTION__, region); + } + else { + _DBG("%s %p\n", __FUNCTION__, region); } if (intel->ctx.Visual.rgbBits == 24) diff --git a/shared/intel_regions.h b/shared/intel_regions.h index 45e2bf4..0d379bd 100644 --- a/shared/intel_regions.h +++ b/shared/intel_regions.h @@ -73,7 +73,8 @@ struct intel_region * copied by calling intel_reference_region(). */ struct intel_region *intel_region_alloc(struct intel_context *intel, - GLuint cpp, GLuint width, + uint32_t tiling, + GLuint cpp, GLuint width, GLuint height, GLuint pitch, GLboolean expect_accelerated_upload); @@ -109,21 +110,15 @@ void intel_region_data(struct intel_context *intel, /* Copy rectangular sub-regions */ -void intel_region_copy(struct intel_context *intel, - struct intel_region *dest, - GLuint dest_offset, - GLuint destx, GLuint desty, - struct intel_region *src, - GLuint src_offset, - GLuint srcx, GLuint srcy, GLuint width, GLuint height); - -/* Fill a rectangular sub-region - */ -void intel_region_fill(struct intel_context *intel, - struct intel_region *dest, - GLuint dest_offset, - GLuint destx, GLuint desty, - GLuint width, GLuint height, GLuint color); +GLboolean +intel_region_copy(struct intel_context *intel, + struct intel_region *dest, + GLuint dest_offset, + GLuint destx, GLuint desty, + struct intel_region *src, + GLuint src_offset, + GLuint srcx, GLuint srcy, GLuint width, GLuint height, + GLenum logicop); /* Helpers for zerocopy uploads, particularly texture image uploads: */ diff --git a/shared/intel_screen.c b/shared/intel_screen.c index 0f278b3..1b8c56e 100644 --- a/shared/intel_screen.c +++ b/shared/intel_screen.c @@ -49,6 +49,10 @@ #include "i915_drm.h" #include "i830_dri.h" +#define DRI_CONF_TEXTURE_TILING(def) \ + DRI_CONF_OPT_BEGIN(texture_tiling, bool, def) \ + DRI_CONF_DESC(en, "Enable texture tiling") \ + DRI_CONF_OPT_END \ PUBLIC const char __driConfigOptions[] = DRI_CONF_BEGIN @@ -64,6 +68,17 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects") DRI_CONF_DESC_END DRI_CONF_OPT_END + +#ifdef I915 + DRI_CONF_TEXTURE_TILING(false) +#else + DRI_CONF_TEXTURE_TILING(true) +#endif + + DRI_CONF_OPT_BEGIN(early_z, bool, false) + DRI_CONF_DESC(en, "Enable early Z in classic mode (unstable, 945-only).") + DRI_CONF_OPT_END + DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY DRI_CONF_FORCE_S3TC_ENABLE(false) @@ -76,7 +91,7 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_SECTION_END DRI_CONF_END; -const GLuint __driNConfigOptions = 8; +const GLuint __driNConfigOptions = 10; #ifdef USE_NEW_INTERFACE static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; @@ -305,7 +320,7 @@ intelDestroyScreen(__DRIscreenPrivate * sPriv) dri_bufmgr_destroy(intelScreen->bufmgr); intelUnmapScreenRegions(intelScreen); - driDestroyOptionCache(&intelScreen->optionCache); + driDestroyOptionInfo(&intelScreen->optionCache); FREE(intelScreen); sPriv->private = NULL; @@ -587,7 +602,7 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen) GLboolean gem_supported; struct drm_i915_getparam gp; __DRIscreenPrivate *spriv = intelScreen->driScrnPriv; - int num_fences; + int num_fences = 0; intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL; @@ -617,10 +632,10 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen) /* Otherwise, use the classic buffer manager. */ if (intelScreen->bufmgr == NULL) { if (gem_disable) { - fprintf(stderr, "GEM disabled. Using classic.\n"); + _mesa_warning(NULL, "GEM disabled. Using classic."); } else { - fprintf(stderr, "Failed to initialize GEM. " - "Falling back to classic.\n"); + _mesa_warning(NULL, + "Failed to initialize GEM. Falling back to classic."); } if (intelScreen->tex.size == 0) { diff --git a/shared/intel_span.c b/shared/intel_span.c index 34b78eb..8df4990 100644 --- a/shared/intel_span.c +++ b/shared/intel_span.c @@ -501,7 +501,7 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) /** - * Prepare for softare rendering. Map current read/draw framebuffers' + * Prepare for software rendering. Map current read/draw framebuffers' * renderbuffes and all currently bound texture objects. * * Old note: Moved locking out to get reasonable span performance. @@ -526,7 +526,7 @@ intelSpanRenderStart(GLcontext * ctx) } /** - * Called when done softare rendering. Unmap the buffers we mapped in + * Called when done software rendering. Unmap the buffers we mapped in * the above function. */ void diff --git a/shared/intel_syncobj.c b/shared/intel_syncobj.c new file mode 100644 index 0000000..1286fe9 --- /dev/null +++ b/shared/intel_syncobj.c @@ -0,0 +1,132 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +/** @file intel_syncobj.c + * + * Support for ARB_sync + * + * ARB_sync is implemented by flushing the current batchbuffer and keeping a + * reference on it. We can then check for completion or wait for compeltion + * using the normal buffer object mechanisms. This does mean that if an + * application is using many sync objects, it will emit small batchbuffers + * which may end up being a significant overhead. In other tests of removing + * gratuitous batchbuffer syncs in Mesa, it hasn't appeared to be a significant + * performance bottleneck, though. + */ + +#include "main/simple_list.h" +#include "main/imports.h" + +#include "intel_context.h" +#include "intel_batchbuffer.h" +#include "intel_reg.h" + +static struct gl_sync_object * +intel_new_sync_object(GLcontext *ctx, GLuint id) +{ + struct intel_sync_object *sync; + + sync = _mesa_calloc(sizeof(struct intel_sync_object)); + + return &sync->Base; +} + +static void +intel_delete_sync_object(GLcontext *ctx, struct gl_sync_object *s) +{ + struct intel_sync_object *sync = (struct intel_sync_object *)s; + + drm_intel_bo_unreference(sync->bo); + _mesa_free(sync); +} + +static void +intel_fence_sync(GLcontext *ctx, struct gl_sync_object *s, + GLenum condition, GLbitfield flags) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_sync_object *sync = (struct intel_sync_object *)s; + + assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE); + intel_batchbuffer_emit_mi_flush(intel->batch); + + sync->bo = intel->batch->buf; + drm_intel_bo_reference(sync->bo); + + intelFlush(ctx); +} + +/* We ignore the user-supplied timeout. This is weaselly -- we're allowed to + * round to an implementation-dependent accuracy, and right now our + * implementation "rounds" to the wait-forever value. + * + * The fix would be a new kernel function to do the GTT transition with a + * timeout. + */ +static void intel_client_wait_sync(GLcontext *ctx, struct gl_sync_object *s, + GLbitfield flags, GLuint64 timeout) +{ + struct intel_sync_object *sync = (struct intel_sync_object *)s; + + if (sync->bo) { + drm_intel_bo_wait_rendering(sync->bo); + s->StatusFlag = 1; + drm_intel_bo_unreference(sync->bo); + sync->bo = NULL; + } +} + +/* We have nothing to do for WaitSync. Our GL command stream is sequential, + * so given that the sync object has already flushed the batchbuffer, + * any batchbuffers coming after this waitsync will naturally not occur until + * the previous one is done. + */ +static void intel_server_wait_sync(GLcontext *ctx, struct gl_sync_object *s, + GLbitfield flags, GLuint64 timeout) +{ +} + +static void intel_check_sync(GLcontext *ctx, struct gl_sync_object *s) +{ + struct intel_sync_object *sync = (struct intel_sync_object *)s; + + if (sync->bo && drm_intel_bo_busy(sync->bo)) { + drm_intel_bo_unreference(sync->bo); + sync->bo = NULL; + s->StatusFlag = 1; + } +} + +void intel_init_syncobj_functions(struct dd_function_table *functions) +{ + functions->NewSyncObject = intel_new_sync_object; + functions->DeleteSyncObject = intel_delete_sync_object; + functions->FenceSync = intel_fence_sync; + functions->CheckSync = intel_check_sync; + functions->ClientWaitSync = intel_client_wait_sync; + functions->ServerWaitSync = intel_server_wait_sync; +} diff --git a/shared/intel_tex.c b/shared/intel_tex.c index ae0994b..df63f29 100644 --- a/shared/intel_tex.c +++ b/shared/intel_tex.c @@ -158,81 +158,11 @@ timed_memcpy(void *dest, const void *src, size_t n) } #endif /* DO_DEBUG */ -/** - * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap - * level). - * - * The texture object's miptree must be mapped. - * - * It would be really nice if this was just called by Mesa whenever mipmaps - * needed to be regenerated, rather than us having to remember to do so in - * each texture image modification path. - * - * This function should also include an accelerated path. - */ -void -intel_generate_mipmap(GLcontext *ctx, GLenum target, - struct gl_texture_object *texObj) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_texture_object *intelObj = intel_texture_object(texObj); - GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; - int face, i; - - _mesa_generate_mipmap(ctx, target, texObj); - - /* Update the level information in our private data in the new images, since - * it didn't get set as part of a normal TexImage path. - */ - for (face = 0; face < nr_faces; face++) { - for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) { - struct intel_texture_image *intelImage; - - intelImage = intel_texture_image(texObj->Image[face][i]); - if (intelImage == NULL) - break; - - intelImage->level = i; - intelImage->face = face; - /* Unreference the miptree to signal that the new Data is a bare - * pointer from mesa. - */ - intel_miptree_release(intel, &intelImage->mt); - } - } -} - -static void intelGenerateMipmap(GLcontext *ctx, GLenum target, struct gl_texture_object *texObj) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_texture_object *intelObj = intel_texture_object(texObj); - - intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel); - intel_generate_mipmap(ctx, target, texObj); - intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel); -} - void intelInitTextureFuncs(struct dd_function_table *functions) { functions->ChooseTextureFormat = intelChooseTextureFormat; - functions->TexImage1D = intelTexImage1D; - functions->TexImage2D = intelTexImage2D; - functions->TexImage3D = intelTexImage3D; - functions->TexSubImage1D = intelTexSubImage1D; - functions->TexSubImage2D = intelTexSubImage2D; - functions->TexSubImage3D = intelTexSubImage3D; - functions->CopyTexImage1D = intelCopyTexImage1D; - functions->CopyTexImage2D = intelCopyTexImage2D; - functions->CopyTexSubImage1D = intelCopyTexSubImage1D; - functions->CopyTexSubImage2D = intelCopyTexSubImage2D; - functions->GetTexImage = intelGetTexImage; - functions->GenerateMipmap = intelGenerateMipmap; - - /* compressed texture functions */ - functions->CompressedTexImage2D = intelCompressedTexImage2D; - functions->CompressedTexSubImage2D = intelCompressedTexSubImage2D; - functions->GetCompressedTexImage = intelGetCompressedTexImage; + functions->GenerateMipmap = intel_generate_mipmap; functions->NewTextureObject = intelNewTextureObject; functions->NewTextureImage = intelNewTextureImage; diff --git a/shared/intel_tex.h b/shared/intel_tex.h index f5372d8..471aa2a 100644 --- a/shared/intel_tex.h +++ b/shared/intel_tex.h @@ -35,116 +35,17 @@ void intelInitTextureFuncs(struct dd_function_table *functions); +void intelInitTextureImageFuncs(struct dd_function_table *functions); + +void intelInitTextureSubImageFuncs(struct dd_function_table *functions); + +void intelInitTextureCopyImageFuncs(struct dd_function_table *functions); + const struct gl_texture_format *intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, GLenum format, GLenum type); - -void intelTexImage3D(GLcontext * ctx, - GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint depth, - GLint border, - GLenum format, GLenum type, const void *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelTexSubImage3D(GLcontext * ctx, - GLenum target, - GLint level, - GLint xoffset, GLint yoffset, GLint zoffset, - GLsizei width, GLsizei height, GLsizei depth, - GLenum format, GLenum type, - const GLvoid * pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelTexImage2D(GLcontext * ctx, - GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint border, - GLenum format, GLenum type, const void *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelTexSubImage2D(GLcontext * ctx, - GLenum target, - GLint level, - GLint xoffset, GLint yoffset, - GLsizei width, GLsizei height, - GLenum format, GLenum type, - const GLvoid * pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelTexImage1D(GLcontext * ctx, - GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint border, - GLenum format, GLenum type, const void *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelTexSubImage1D(GLcontext * ctx, - GLenum target, - GLint level, - GLint xoffset, - GLsizei width, - GLenum format, GLenum type, - const GLvoid * pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLint border); - -void intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border); - -void intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level, - GLint xoffset, GLint x, GLint y, GLsizei width); - -void intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, - GLint xoffset, GLint yoffset, - GLint x, GLint y, GLsizei width, GLsizei height); - -void intelGetTexImage(GLcontext * ctx, GLenum target, GLint level, - GLenum format, GLenum type, GLvoid * pixels, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint border, - GLsizei imageSize, const GLvoid *data, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ); - -void intelCompressedTexSubImage2D(GLcontext * ctx, - GLenum target, - GLint level, - GLint xoffset, GLint yoffset, - GLsizei width, GLsizei height, - GLenum format, GLsizei imageSize, - const GLvoid * pixels, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level, - GLvoid *pixels, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - void intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname, unsigned long long offset, GLint depth, GLuint pitch); void intelSetTexBuffer(__DRIcontext *pDRICtx, diff --git a/shared/intel_tex_copy.c b/shared/intel_tex_copy.c index 90bbb8c..74f7f58 100644 --- a/shared/intel_tex_copy.c +++ b/shared/intel_tex_copy.c @@ -73,11 +73,9 @@ get_teximage_source(struct intel_context *intel, GLenum internalFormat) return NULL; case GL_RGBA: case GL_RGBA8: - return intel_readbuf_region(intel); case GL_RGB: - if (intel->ctx.Visual.rgbBits == 16) - return intel_readbuf_region(intel); - return NULL; + case GL_RGB8: + return intel_readbuf_region(intel); default: return NULL; } @@ -99,14 +97,24 @@ do_copy_texsubimage(struct intel_context *intel, if (!intelImage->mt || !src) { if (INTEL_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "%s fail %p %p\n", - __FUNCTION__, intelImage->mt, src); + fprintf(stderr, "%s fail %p %p (0x%08x)\n", + __FUNCTION__, intelImage->mt, src, internalFormat); + return GL_FALSE; + } + + if (intelImage->mt->cpp != src->cpp) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "%s fail %d vs %d cpp\n", + __FUNCTION__, intelImage->mt->cpp, src->cpp); return GL_FALSE; } intelFlush(ctx); LOCK_HARDWARE(intel); { + drm_intel_bo *dst_bo = intel_region_buffer(intel, + intelImage->mt->region, + INTEL_WRITE_PART); GLuint image_offset = intel_miptree_image_offset(intelImage->mt, intelImage->face, intelImage->level); @@ -118,8 +126,12 @@ do_copy_texsubimage(struct intel_context *intel, dstx += x - orig_x; dsty += y - orig_y; - /* image_offset may be non-page-aligned, but that's illegal for tiling. */ - assert(intelImage->mt->region->tiling == I915_TILING_NONE); + /* Can't blit to tiled buffers with non-tile-aligned offset. */ + if (intelImage->mt->region->tiling != I915_TILING_NONE && + (image_offset & 4095) != 0) { + UNLOCK_HARDWARE(intel); + return GL_FALSE; + } if (ctx->ReadBuffer->Name == 0) { /* reading from a window, adjust x, y */ @@ -140,35 +152,35 @@ do_copy_texsubimage(struct intel_context *intel, src_pitch = src->pitch; } - intelEmitCopyBlit(intel, - intelImage->mt->cpp, - src_pitch, - src->buffer, - 0, - src->tiling, - intelImage->mt->pitch, - intelImage->mt->region->buffer, - image_offset, - intelImage->mt->region->tiling, - x, y, dstx, dsty, width, height, - GL_COPY); + if (!intelEmitCopyBlit(intel, + intelImage->mt->cpp, + src_pitch, + src->buffer, + 0, + src->tiling, + intelImage->mt->pitch, + dst_bo, + image_offset, + intelImage->mt->region->tiling, + x, y, dstx, dsty, width, height, + GL_COPY)) { + UNLOCK_HARDWARE(intel); + return GL_FALSE; + } } UNLOCK_HARDWARE(intel); /* GL_SGIS_generate_mipmap */ if (intelImage->level == texObj->BaseLevel && texObj->GenerateMipmap) { - ctx->Driver.GenerateMipmap(ctx, target, texObj); + intel_generate_mipmap(ctx, target, texObj); } return GL_TRUE; } - - - -void +static void intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level, GLenum internalFormat, GLint x, GLint y, GLsizei width, GLint border) @@ -214,7 +226,8 @@ intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level, width, border); } -void + +static void intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level, GLenum internalFormat, GLint x, GLint y, GLsizei width, GLsizei height, @@ -262,7 +275,7 @@ intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level, } -void +static void intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width) { @@ -287,8 +300,7 @@ intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level, } - -void +static void intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height) @@ -301,7 +313,6 @@ intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, _mesa_select_tex_image(ctx, texObj, target, level); GLenum internalFormat = texImage->InternalFormat; - /* Need to check texture is compatible with source format. */ @@ -316,3 +327,13 @@ intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, xoffset, yoffset, x, y, width, height); } } + + +void +intelInitTextureCopyImageFuncs(struct dd_function_table *functions) +{ + functions->CopyTexImage1D = intelCopyTexImage1D; + functions->CopyTexImage2D = intelCopyTexImage2D; + functions->CopyTexSubImage1D = intelCopyTexSubImage1D; + functions->CopyTexSubImage2D = intelCopyTexSubImage2D; +} diff --git a/shared/intel_tex_image.c b/shared/intel_tex_image.c index 5e61e9e..c5f5220 100644 --- a/shared/intel_tex_image.c +++ b/shared/intel_tex_image.c @@ -131,6 +131,7 @@ guess_and_alloc_mipmap_tree(struct intel_context *intel, comp_byte = intel_compressed_num_bytes(intelImage->base.TexFormat->MesaFormat); intelObj->mt = intel_miptree_create(intel, intelObj->base.Target, + intelImage->base._BaseFormat, intelImage->base.InternalFormat, firstLevel, lastLevel, @@ -205,7 +206,7 @@ try_pbo_upload(struct intel_context *intel, GLuint src_offset, src_stride; GLuint dst_offset, dst_stride; - if (!pbo || + if (unpack->BufferObj->Name == 0 || intel->ctx._ImageTransferState || unpack->SkipPixels || unpack->SkipRows) { DBG("%s: failure 1\n", __FUNCTION__); @@ -235,12 +236,15 @@ try_pbo_upload(struct intel_context *intel, INTEL_WRITE_FULL); - intelEmitCopyBlit(intel, - intelImage->mt->cpp, - src_stride, src_buffer, src_offset, GL_FALSE, - dst_stride, dst_buffer, dst_offset, GL_FALSE, - 0, 0, 0, 0, width, height, - GL_COPY); + if (!intelEmitCopyBlit(intel, + intelImage->mt->cpp, + src_stride, src_buffer, src_offset, GL_FALSE, + dst_stride, dst_buffer, dst_offset, GL_FALSE, + 0, 0, 0, 0, width, height, + GL_COPY)) { + UNLOCK_HARDWARE(intel); + return GL_FALSE; + } } UNLOCK_HARDWARE(intel); @@ -248,7 +252,6 @@ try_pbo_upload(struct intel_context *intel, } - static GLboolean try_pbo_zcopy(struct intel_context *intel, struct intel_texture_image *intelImage, @@ -261,7 +264,7 @@ try_pbo_zcopy(struct intel_context *intel, GLuint src_offset, src_stride; GLuint dst_offset, dst_stride; - if (!pbo || + if (unpack->BufferObj->Name == 0 || intel->ctx._ImageTransferState || unpack->SkipPixels || unpack->SkipRows) { DBG("%s: failure 1\n", __FUNCTION__); @@ -293,10 +296,6 @@ try_pbo_zcopy(struct intel_context *intel, } - - - - static void intelTexImage(GLcontext * ctx, GLint dims, @@ -307,7 +306,8 @@ intelTexImage(GLcontext * ctx, GLenum format, GLenum type, const void *pixels, const struct gl_pixelstore_attrib *unpack, struct gl_texture_object *texObj, - struct gl_texture_image *texImage, GLsizei imageSize, int compressed) + struct gl_texture_image *texImage, GLsizei imageSize, + GLboolean compressed) { struct intel_context *intel = intel_context(ctx); struct intel_texture_object *intelObj = intel_texture_object(texObj); @@ -316,7 +316,6 @@ intelTexImage(GLcontext * ctx, GLint postConvHeight = height; GLint texelBytes, sizeInBytes; GLuint dstRowStride = 0, srcRowStride = texImage->RowStride; - GLboolean needs_map; DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), level, width, height, depth, border); @@ -414,7 +413,9 @@ intelTexImage(GLcontext * ctx, * a miptree, so create one just for our level and store it in the image. * It'll get moved into the object miptree at validate time. */ - intelImage->mt = intel_miptree_create(intel, target, internalFormat, + intelImage->mt = intel_miptree_create(intel, target, + intelImage->base.TexFormat->BaseFormat, + internalFormat, level, level, width, height, depth, intelImage->base.TexFormat->TexelBytes, @@ -426,7 +427,7 @@ intelTexImage(GLcontext * ctx, */ if (dims <= 2 && intelImage->mt && - intel_buffer_object(unpack->BufferObj) && + unpack->BufferObj->Name != 0 && check_pbo_format(internalFormat, format, type, intelImage->base.TexFormat)) { @@ -464,8 +465,6 @@ intelTexImage(GLcontext * ctx, DBG("pbo upload failed\n"); } - - /* intelCopyTexImage calls this function with pixels == NULL, with * the expectation that the mipmap tree will be set up but nothing * more will be done. This is where those calls return: @@ -482,15 +481,8 @@ intelTexImage(GLcontext * ctx, LOCK_HARDWARE(intel); - /* Two cases where we need a mapping of the miptree: when the user supplied - * data is mapped as well (non-PBO, memcpy upload) or when we're going to do - * (software) mipmap generation. - */ - needs_map = (pixels != NULL) || (level == texObj->BaseLevel && - texObj->GenerateMipmap); - if (intelImage->mt) { - if (needs_map) + if (pixels != NULL) texImage->Data = intel_miptree_image_map(intel, intelImage->mt, intelImage->face, @@ -547,25 +539,26 @@ intelTexImage(GLcontext * ctx, format, type, pixels, unpack)) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage"); } - - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - intel_generate_mipmap(ctx, target, texObj); - } } _mesa_unmap_teximage_pbo(ctx, unpack); if (intelImage->mt) { - if (needs_map) + if (pixels != NULL) intel_miptree_image_unmap(intel, intelImage->mt); texImage->Data = NULL; } UNLOCK_HARDWARE(intel); + + /* GL_SGIS_generate_mipmap */ + if (level == texObj->BaseLevel && texObj->GenerateMipmap) { + intel_generate_mipmap(ctx, target, texObj); + } } -void + +static void intelTexImage3D(GLcontext * ctx, GLenum target, GLint level, GLint internalFormat, @@ -578,11 +571,11 @@ intelTexImage3D(GLcontext * ctx, { intelTexImage(ctx, 3, target, level, internalFormat, width, height, depth, border, - format, type, pixels, unpack, texObj, texImage, 0, 0); + format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE); } -void +static void intelTexImage2D(GLcontext * ctx, GLenum target, GLint level, GLint internalFormat, @@ -594,10 +587,11 @@ intelTexImage2D(GLcontext * ctx, { intelTexImage(ctx, 2, target, level, internalFormat, width, height, 1, border, - format, type, pixels, unpack, texObj, texImage, 0, 0); + format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE); } -void + +static void intelTexImage1D(GLcontext * ctx, GLenum target, GLint level, GLint internalFormat, @@ -609,21 +603,24 @@ intelTexImage1D(GLcontext * ctx, { intelTexImage(ctx, 1, target, level, internalFormat, width, 1, 1, border, - format, type, pixels, unpack, texObj, texImage, 0, 0); + format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE); } -void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint border, - GLsizei imageSize, const GLvoid *data, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) + +static void +intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint border, + GLsizei imageSize, const GLvoid *data, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) { intelTexImage(ctx, 2, target, level, internalFormat, width, height, 1, border, - 0, 0, data, &ctx->Unpack, texObj, texImage, imageSize, 1); + 0, 0, data, &ctx->Unpack, texObj, texImage, imageSize, GL_TRUE); } + /** * Need to map texture image into memory before copying image data, * then unmap it. @@ -632,7 +629,7 @@ static void intel_get_tex_image(GLcontext * ctx, GLenum target, GLint level, GLenum format, GLenum type, GLvoid * pixels, struct gl_texture_object *texObj, - struct gl_texture_image *texImage, int compressed) + struct gl_texture_image *texImage, GLboolean compressed) { struct intel_context *intel = intel_context(ctx); struct intel_texture_image *intelImage = intel_texture_image(texImage); @@ -686,28 +683,29 @@ intel_get_tex_image(GLcontext * ctx, GLenum target, GLint level, } } -void + +static void intelGetTexImage(GLcontext * ctx, GLenum target, GLint level, GLenum format, GLenum type, GLvoid * pixels, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { intel_get_tex_image(ctx, target, level, format, type, pixels, - texObj, texImage, 0); - - + texObj, texImage, GL_FALSE); } -void + +static void intelGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level, GLvoid *pixels, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { intel_get_tex_image(ctx, target, level, 0, 0, pixels, - texObj, texImage, 1); + texObj, texImage, GL_TRUE); } + void intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname, unsigned long long offset, GLint depth, GLuint pitch) @@ -816,3 +814,16 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) */ intelSetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv); } + + +void +intelInitTextureImageFuncs(struct dd_function_table *functions) +{ + functions->TexImage1D = intelTexImage1D; + functions->TexImage2D = intelTexImage2D; + functions->TexImage3D = intelTexImage3D; + functions->GetTexImage = intelGetTexImage; + + functions->CompressedTexImage2D = intelCompressedTexImage2D; + functions->GetCompressedTexImage = intelGetCompressedTexImage; +} diff --git a/shared/intel_tex_layout.c b/shared/intel_tex_layout.c index e6f9a41..7d69ea4 100644 --- a/shared/intel_tex_layout.c +++ b/shared/intel_tex_layout.c @@ -35,26 +35,39 @@ #include "intel_context.h" #include "main/macros.h" -GLuint intel_compressed_alignment(GLenum internalFormat) +void intel_get_texture_alignment_unit(GLenum internalFormat, GLuint *w, GLuint *h) { - GLuint alignment = 4; - switch (internalFormat) { case GL_COMPRESSED_RGB_FXT1_3DFX: case GL_COMPRESSED_RGBA_FXT1_3DFX: - alignment = 8; + *w = 8; + *h = 4; + break; + + case GL_RGB_S3TC: + case GL_RGB4_S3TC: + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + case GL_RGBA_S3TC: + case GL_RGBA4_S3TC: + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + *w = 4; + *h = 4; break; default: + *w = 4; + *h = 2; break; } - - return alignment; } -void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt ) +void i945_miptree_layout_2d( struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t tiling ) { - GLint align_h = 2, align_w = 4; + GLuint align_h = 2, align_w = 4; GLuint level; GLuint x = 0; GLuint y = 0; @@ -62,9 +75,9 @@ void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tr GLuint height = mt->height0; mt->pitch = mt->width0; + intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); if (mt->compressed) { - align_w = intel_compressed_alignment(mt->internal_format); mt->pitch = ALIGN(mt->width0, align_w); } @@ -92,7 +105,7 @@ void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tr /* Pitch must be a whole number of dwords, even though we * express it in texels. */ - mt->pitch = intel_miptree_pitch_align (intel, mt, mt->pitch); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->pitch); mt->total_height = 0; for ( level = mt->first_level ; level <= mt->last_level ; level++ ) { diff --git a/shared/intel_tex_layout.h b/shared/intel_tex_layout.h index dbc90e6..c9de9b5 100644 --- a/shared/intel_tex_layout.h +++ b/shared/intel_tex_layout.h @@ -38,5 +38,7 @@ static GLuint minify( GLuint d ) return MAX2(1, d>>1); } -extern void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt ); -extern GLuint intel_compressed_alignment(GLenum); +extern void i945_miptree_layout_2d(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t tiling); +extern void intel_get_texture_alignment_unit(GLenum, GLuint *, GLuint *); diff --git a/shared/intel_tex_subimage.c b/shared/intel_tex_subimage.c index f86de56..8903707 100644 --- a/shared/intel_tex_subimage.c +++ b/shared/intel_tex_subimage.c @@ -44,10 +44,12 @@ intelTexSubimage(GLcontext * ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint width, GLint height, GLint depth, + GLsizei imageSize, GLenum format, GLenum type, const void *pixels, const struct gl_pixelstore_attrib *packing, struct gl_texture_object *texObj, - struct gl_texture_image *texImage) + struct gl_texture_image *texImage, + GLboolean compressed) { struct intel_context *intel = intel_context(ctx); struct intel_texture_image *intelImage = intel_texture_image(texImage); @@ -59,9 +61,14 @@ intelTexSubimage(GLcontext * ctx, intelFlush(ctx); - pixels = - _mesa_validate_pbo_teximage(ctx, dims, width, height, depth, format, - type, pixels, packing, "glTexSubImage2D"); + if (compressed) + pixels = _mesa_validate_pbo_compressed_teximage(ctx, imageSize, + pixels, packing, + "glCompressedTexImage"); + else + pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, depth, + format, type, pixels, packing, + "glTexSubImage"); if (!pixels) return; @@ -90,20 +97,28 @@ intelTexSubimage(GLcontext * ctx, assert(dstRowStride); - if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat, - texImage->TexFormat, - texImage->Data, - xoffset, yoffset, zoffset, - dstRowStride, - texImage->ImageOffsets, - width, height, depth, - format, type, pixels, packing)) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage"); + if (compressed) { + if (intelImage->mt) { + struct intel_region *dst = intelImage->mt->region; + + _mesa_copy_rect(texImage->Data, dst->cpp, dst->pitch, + xoffset, yoffset / 4, + (width + 3) & ~3, (height + 3) / 4, + pixels, (width + 3) & ~3, 0, 0); + } else + memcpy(texImage->Data, pixels, imageSize); } - - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - intel_generate_mipmap(ctx, target, texObj); + else { + if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat, + texImage->TexFormat, + texImage->Data, + xoffset, yoffset, zoffset, + dstRowStride, + texImage->ImageOffsets, + width, height, depth, + format, type, pixels, packing)) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage"); + } } _mesa_unmap_teximage_pbo(ctx, packing); @@ -114,13 +129,15 @@ intelTexSubimage(GLcontext * ctx, } UNLOCK_HARDWARE(intel); -} - - + /* GL_SGIS_generate_mipmap */ + if (level == texObj->BaseLevel && texObj->GenerateMipmap) { + intel_generate_mipmap(ctx, target, texObj); + } +} -void +static void intelTexSubImage3D(GLcontext * ctx, GLenum target, GLint level, @@ -132,18 +149,15 @@ intelTexSubImage3D(GLcontext * ctx, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { - intelTexSubimage(ctx, 3, target, level, xoffset, yoffset, zoffset, - width, height, depth, - format, type, pixels, packing, texObj, texImage); - + width, height, depth, 0, + format, type, pixels, packing, texObj, texImage, GL_FALSE); } - -void +static void intelTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, @@ -155,17 +169,15 @@ intelTexSubImage2D(GLcontext * ctx, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { - intelTexSubimage(ctx, 2, target, level, xoffset, yoffset, 0, - width, height, 1, - format, type, pixels, packing, texObj, texImage); - + width, height, 1, 0, + format, type, pixels, packing, texObj, texImage, GL_FALSE); } -void +static void intelTexSubImage1D(GLcontext * ctx, GLenum target, GLint level, @@ -180,12 +192,11 @@ intelTexSubImage1D(GLcontext * ctx, intelTexSubimage(ctx, 1, target, level, xoffset, 0, 0, - width, 1, 1, - format, type, pixels, packing, texObj, texImage); - + width, 1, 1, 0, + format, type, pixels, packing, texObj, texImage, GL_FALSE); } -void +static void intelCompressedTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, @@ -196,6 +207,20 @@ intelCompressedTexSubImage2D(GLcontext * ctx, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { - fprintf(stderr, "stubbed CompressedTexSubImage2D: %dx%d@%dx%d\n", - width, height, xoffset, yoffset); + intelTexSubimage(ctx, 2, + target, level, + xoffset, yoffset, 0, + width, height, 1, imageSize, + format, 0, pixels, &ctx->Unpack, texObj, texImage, GL_TRUE); +} + + + +void +intelInitTextureSubImageFuncs(struct dd_function_table *functions) +{ + functions->TexSubImage1D = intelTexSubImage1D; + functions->TexSubImage2D = intelTexSubImage2D; + functions->TexSubImage3D = intelTexSubImage3D; + functions->CompressedTexSubImage2D = intelCompressedTexSubImage2D; } diff --git a/shared/intel_tex_validate.c b/shared/intel_tex_validate.c index 05a375e..a284d54 100644 --- a/shared/intel_tex_validate.c +++ b/shared/intel_tex_validate.c @@ -199,6 +199,7 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) if (!intelObj->mt) { intelObj->mt = intel_miptree_create(intel, intelObj->base.Target, + firstImage->base._BaseFormat, firstImage->base.InternalFormat, intelObj->firstLevel, intelObj->lastLevel, @@ -241,7 +242,7 @@ intel_tex_map_level_images(struct intel_context *intel, struct intel_texture_image *intelImage = intel_texture_image(intelObj->base.Image[face][level]); - if (intelImage->mt) { + if (intelImage && intelImage->mt) { intelImage->base.Data = intel_miptree_image_map(intel, intelImage->mt, @@ -268,7 +269,7 @@ intel_tex_unmap_level_images(struct intel_context *intel, struct intel_texture_image *intelImage = intel_texture_image(intelObj->base.Image[face][level]); - if (intelImage->mt) { + if (intelImage && intelImage->mt) { intel_miptree_image_unmap(intel, intelImage->mt); intelImage->base.Data = NULL; } |