diff options
83 files changed, 3984 insertions, 2383 deletions
diff --git a/configure.ac b/configure.ac index bd9ecb7..b73fe88 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ # Process this file with autoconf to produce a configure script AC_PREREQ(2.57) -AC_INIT([mesa-dri-i9xx], 7.4.0, [], mesa-dri-i9xx) +AC_INIT([mesa-dri-i9xx], 7.5.0, [], mesa-dri-i9xx) AM_INIT_AUTOMAKE([dist-bzip2]) @@ -16,8 +16,9 @@ AC_PROG_CC AC_HEADER_STDC PKG_CHECK_MODULES([DRM], [libdrm >= 2.4.3]) -PKG_CHECK_MODULES([DRI], [libmesadri >= 7.4.0 libmesadri < 7.5.0 - libmesadricommon >= 7.4.0 libmesadricommon < 7.5.0]) +# changes in struct gl_fragment_program. +PKG_CHECK_MODULES([DRI], [libmesadri >= 7.5.0 libmesadri < 7.6.0 + libmesadricommon >= 7.5.0 libmesadricommon < 7.6.0]) AC_OUTPUT([ Makefile diff --git a/i915/Makefile.am b/i915/Makefile.am index b28f2ba..2dc608c 100644 --- a/i915/Makefile.am +++ b/i915/Makefile.am @@ -12,7 +12,6 @@ i915_dri_la_SOURCES = \ i830_metaops.c \ i830_state.c \ i830_texblend.c \ - i830_tex.c \ i830_texstate.c \ i830_vtbl.c \ intel_render.c \ @@ -20,8 +19,8 @@ i915_dri_la_SOURCES = \ ../shared/intel_buffer_objects.c \ ../shared/intel_batchbuffer.c \ ../shared/intel_clear.c \ + ../shared/intel_extensions.c \ ../shared/intel_mipmap_tree.c \ - i915_tex_layout.c \ ../shared/intel_tex_layout.c \ ../shared/intel_tex_image.c \ ../shared/intel_tex_subimage.c \ @@ -37,7 +36,7 @@ i915_dri_la_SOURCES = \ ../shared/intel_buffers.c \ ../shared/intel_blit.c \ ../shared/intel_swapbuffers.c \ - i915_tex.c \ + i915_tex_layout.c \ i915_texstate.c \ i915_context.c \ i915_debug.c \ @@ -51,7 +50,6 @@ i915_dri_la_SOURCES = \ ../shared/intel_decode.c \ ../shared/intel_screen.c \ ../shared/intel_span.c \ - intel_state.c \ + ../shared/intel_state.c \ intel_tris.c \ - ../shared/intel_fbo.c \ - ../shared/intel_depthstencil.c + ../shared/intel_fbo.c diff --git a/i915/i830_context.c b/i915/i830_context.c index 09b1ec9..840946f 100644 --- a/i915/i830_context.c +++ b/i915/i830_context.c @@ -47,7 +47,6 @@ i830InitDriverFunctions(struct dd_function_table *functions) { intelInitDriverFunctions(functions); i830InitStateFuncs(functions); - i830InitTextureFuncs(functions); } extern const struct tnl_pipeline_stage *intel_pipeline[]; @@ -73,6 +72,8 @@ i830CreateContext(const __GLcontextModes * mesaVis, return GL_FALSE; } + _math_matrix_ctr(&intel->ViewportMatrix); + /* Initialize swrast, tnl driver tables: */ intelInitSpanFuncs(ctx); intelInitTriFuncs(ctx); @@ -97,6 +98,10 @@ i830CreateContext(const __GLcontextModes * mesaVis, ctx->Const.MaxTextureRectSize = (1 << 11); ctx->Const.MaxTextureUnits = I830_TEX_UNITS; + ctx->Const.MaxTextureMaxAnisotropy = 2.0; + + ctx->Const.MaxDrawBuffers = 1; + _tnl_init_vertices(ctx, ctx->Const.MaxArrayLockSize + 12, 18 * sizeof(GLfloat)); diff --git a/i915/i830_state.c b/i915/i830_state.c index d9cad0c..8ef6c91 100644 --- a/i915/i830_state.c +++ b/i915/i830_state.c @@ -39,6 +39,7 @@ #include "intel_screen.h" #include "intel_batchbuffer.h" #include "intel_fbo.h" +#include "intel_buffers.h" #include "i830_context.h" #include "i830_reg.h" @@ -446,6 +447,24 @@ i830DepthMask(GLcontext * ctx, GLboolean flag) i830->state.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_DEPTH_WRITE; } +/** Called from ctx->Driver.Viewport() */ +static void +i830Viewport(GLcontext * ctx, + GLint x, GLint y, GLsizei width, GLsizei height) +{ + intelCalcViewport(ctx); + + intel_viewport(ctx, x, y, width, height); +} + + +/** Called from ctx->Driver.DepthRange() */ +static void +i830DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) +{ + intelCalcViewport(ctx); +} + /* ============================================================= * Polygon stipple * @@ -1064,6 +1083,8 @@ i830InitStateFuncs(struct dd_function_table *functions) functions->StencilFuncSeparate = i830StencilFuncSeparate; functions->StencilMaskSeparate = i830StencilMaskSeparate; functions->StencilOpSeparate = i830StencilOpSeparate; + functions->DepthRange = i830DepthRange; + functions->Viewport = i830Viewport; } void diff --git a/i915/i830_tex.c b/i915/i830_tex.c deleted file mode 100644 index 34ac42a..0000000 --- a/i915/i830_tex.c +++ /dev/null @@ -1,100 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "main/glheader.h" -#include "main/mtypes.h" -#include "main/imports.h" -#include "main/simple_list.h" -#include "main/enums.h" -#include "main/image.h" -#include "main/mm.h" -#include "main/texstore.h" -#include "main/texformat.h" -#include "swrast/swrast.h" - -#include "texmem.h" - -#include "i830_context.h" -#include "i830_reg.h" - - - -static void -i830TexEnv(GLcontext * ctx, GLenum target, - GLenum pname, const GLfloat * param) -{ - - switch (pname) { - case GL_TEXTURE_ENV_COLOR: - case GL_TEXTURE_ENV_MODE: - case GL_COMBINE_RGB: - case GL_COMBINE_ALPHA: - case GL_SOURCE0_RGB: - case GL_SOURCE1_RGB: - case GL_SOURCE2_RGB: - case GL_SOURCE0_ALPHA: - case GL_SOURCE1_ALPHA: - case GL_SOURCE2_ALPHA: - case GL_OPERAND0_RGB: - case GL_OPERAND1_RGB: - case GL_OPERAND2_RGB: - case GL_OPERAND0_ALPHA: - case GL_OPERAND1_ALPHA: - case GL_OPERAND2_ALPHA: - case GL_RGB_SCALE: - case GL_ALPHA_SCALE: - break; - - case GL_TEXTURE_LOD_BIAS:{ - struct i830_context *i830 = i830_context(ctx); - GLuint unit = ctx->Texture.CurrentUnit; - int b = (int) ((*param) * 16.0); - if (b > 63) - b = 63; - if (b < -64) - b = -64; - I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit)); - i830->lodbias_tm0s3[unit] = - ((b << TM0S3_LOD_BIAS_SHIFT) & TM0S3_LOD_BIAS_MASK); - break; - } - - default: - break; - } -} - - - - -void -i830InitTextureFuncs(struct dd_function_table *functions) -{ -/* - functions->TexEnv = i830TexEnv; -*/ -} diff --git a/i915/i830_texstate.c b/i915/i830_texstate.c index c718bb0..753c25b 100644 --- a/i915/i830_texstate.c +++ b/i915/i830_texstate.c @@ -38,7 +38,7 @@ static GLuint -translate_texture_format(GLuint mesa_format) +translate_texture_format(GLuint mesa_format, GLuint internal_format) { switch (mesa_format) { case MESA_FORMAT_L8: @@ -56,7 +56,10 @@ translate_texture_format(GLuint mesa_format) case MESA_FORMAT_ARGB4444: return MAPSURF_16BIT | MT_16BIT_ARGB4444; case MESA_FORMAT_ARGB8888: - return MAPSURF_32BIT | MT_32BIT_ARGB8888; + if (internal_format == GL_RGB) + return MAPSURF_32BIT | MT_32BIT_XRGB8888; + else + return MAPSURF_32BIT | MT_32BIT_ARGB8888; case MESA_FORMAT_YCBCR_REV: return (MAPSURF_422 | MT_422_YCRCB_NORMAL); case MESA_FORMAT_YCBCR: @@ -119,6 +122,7 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) struct gl_texture_image *firstImage; GLuint *state = i830->state.Tex[unit], format, pitch; GLint lodbias; + GLubyte border[4]; memset(state, 0, sizeof(state)); @@ -162,7 +166,8 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) 0, intelObj-> firstLevel); - format = translate_texture_format(firstImage->TexFormat->MesaFormat); + format = translate_texture_format(firstImage->TexFormat->MesaFormat, + firstImage->InternalFormat); pitch = intelObj->mt->pitch * intelObj->mt->cpp; } @@ -290,11 +295,16 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) (ws))); } + /* convert border color from float to ubyte */ + CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->BorderColor[0]); + CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->BorderColor[1]); + CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->BorderColor[2]); + CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->BorderColor[3]); - state[I830_TEXREG_TM0S4] = INTEL_PACKCOLOR8888(tObj->_BorderChan[0], - tObj->_BorderChan[1], - tObj->_BorderChan[2], - tObj->_BorderChan[3]); + state[I830_TEXREG_TM0S4] = INTEL_PACKCOLOR8888(border[0], + border[1], + border[2], + border[3]); I830_ACTIVESTATE(i830, I830_UPLOAD_TEX(unit), GL_TRUE); diff --git a/i915/i830_vtbl.c b/i915/i830_vtbl.c index 8fc8aa5..3bf02de 100644 --- a/i915/i830_vtbl.c +++ b/i915/i830_vtbl.c @@ -26,12 +26,14 @@ **************************************************************************/ #include "glapi/glapi.h" +#include "main/texformat.h" #include "i830_context.h" #include "i830_reg.h" #include "intel_batchbuffer.h" #include "intel_regions.h" #include "intel_tris.h" +#include "intel_fbo.h" #include "tnl/t_context.h" #include "tnl/t_vertex.h" @@ -422,10 +424,10 @@ i830_emit_state(struct intel_context *intel) struct i830_hw_state *state = i830->current; int i, count; GLuint dirty; - GET_CURRENT_CONTEXT(ctx); - BATCH_LOCALS; dri_bo *aper_array[3 + I830_TEX_UNITS]; int aper_count; + GET_CURRENT_CONTEXT(ctx); + BATCH_LOCALS; /* We don't hold the lock at this point, so want to make sure that * there won't be a buffer wrap between the state emits and the primitive @@ -614,6 +616,8 @@ i830_state_draw_region(struct intel_context *intel, { struct i830_context *i830 = i830_context(&intel->ctx); GLcontext *ctx = &intel->ctx; + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); GLuint value; ASSERT(state == &i830->state || state == &i830->meta); @@ -651,13 +655,27 @@ i830_state_draw_region(struct intel_context *intel, */ value = (DSTORG_HORT_BIAS(0x8) | /* .5 */ DSTORG_VERT_BIAS(0x8) | DEPTH_IS_Z); /* .5 */ - - if (color_region && color_region->cpp == 4) { - value |= DV_PF_8888; - } - else { - value |= DV_PF_565; + + if (irb != NULL) { + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_ARGB8888: + value |= DV_PF_8888; + break; + case MESA_FORMAT_RGB565: + value |= DV_PF_565; + break; + case MESA_FORMAT_ARGB1555: + value |= DV_PF_1555; + break; + case MESA_FORMAT_ARGB4444: + value |= DV_PF_4444; + break; + default: + _mesa_problem(ctx, "Bad renderbuffer format: %d\n", + irb->texformat->MesaFormat); + } } + if (depth_region && depth_region->cpp == 4) { value |= DEPTH_FRMT_24_FIXED_8_OTHER; } diff --git a/i915/i915_context.c b/i915/i915_context.c index 3d6af38..1f9f363 100644 --- a/i915/i915_context.c +++ b/i915/i915_context.c @@ -50,16 +50,6 @@ * Mesa's Driver Functions ***************************************/ -static const struct dri_extension i915_extensions[] = { - {"GL_ARB_depth_texture", NULL}, - {"GL_ARB_fragment_program", NULL}, - {"GL_ARB_shadow", NULL}, - {"GL_ARB_texture_non_power_of_two", NULL}, - {"GL_ATI_texture_env_combine3", NULL}, - {"GL_EXT_shadow_funcs", NULL}, - {NULL, NULL} -}; - /* Override intel default. */ static void @@ -93,7 +83,6 @@ i915InitDriverFunctions(struct dd_function_table *functions) { intelInitDriverFunctions(functions); i915InitStateFunctions(functions); - i915InitTextureFuncs(functions); i915InitFragProgFuncs(functions); functions->UpdateState = i915InvalidateState; } @@ -129,6 +118,8 @@ i915CreateContext(const __GLcontextModes * mesaVis, return GL_FALSE; } + _math_matrix_ctr(&intel->ViewportMatrix); + /* Initialize swrast, tnl driver tables: */ intelInitSpanFuncs(ctx); intelInitTriFuncs(ctx); @@ -154,6 +145,8 @@ i915CreateContext(const __GLcontextModes * mesaVis, ctx->Const.MaxTextureRectSize = (1 << 11); ctx->Const.MaxTextureUnits = I915_TEX_UNITS; + ctx->Const.MaxTextureMaxAnisotropy = 4.0; + /* GL_ARB_fragment_program limits - don't think Mesa actually * validates programs against these, and in any case one ARB * instruction can translate to more than one HW instruction, so @@ -172,8 +165,7 @@ i915CreateContext(const __GLcontextModes * mesaVis, ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; - driInitExtensions(ctx, i915_extensions, GL_FALSE); - + ctx->Const.MaxDrawBuffers = 1; _tnl_init_vertices(ctx, ctx->Const.MaxArrayLockSize + 12, 36 * sizeof(GLfloat)); diff --git a/i915/i915_fragprog.c b/i915/i915_fragprog.c index f091d60..2db10c6 100644 --- a/i915/i915_fragprog.c +++ b/i915/i915_fragprog.c @@ -162,12 +162,12 @@ src_vector(struct i915_fragment_program *p, GET_SWZ(source->Swizzle, 1), GET_SWZ(source->Swizzle, 2), GET_SWZ(source->Swizzle, 3)); - if (source->NegateBase) + if (source->Negate) src = negate(src, - GET_BIT(source->NegateBase, 0), - GET_BIT(source->NegateBase, 1), - GET_BIT(source->NegateBase, 2), - GET_BIT(source->NegateBase, 3)); + GET_BIT(source->Negate, 0), + GET_BIT(source->Negate, 1), + GET_BIT(source->Negate, 2), + GET_BIT(source->Negate, 3)); return src; } @@ -180,9 +180,9 @@ get_result_vector(struct i915_fragment_program *p, switch (inst->DstReg.File) { case PROGRAM_OUTPUT: switch (inst->DstReg.Index) { - case FRAG_RESULT_COLR: + case FRAG_RESULT_COLOR: return UREG(REG_TYPE_OC, 0); - case FRAG_RESULT_DEPR: + case FRAG_RESULT_DEPTH: p->depth_written = 1; return UREG(REG_TYPE_OD, 0); default: @@ -323,7 +323,8 @@ upload_program(struct i915_fragment_program *p) p->ctx->FragmentProgram._Current; const struct prog_instruction *inst = program->Base.Instructions; -/* _mesa_debug_fp_inst(program->Base.NumInstructions, inst); */ + if (INTEL_DEBUG & DEBUG_WM) + _mesa_print_program(&program->Base); /* Is this a parse-failed program? Ensure a valid program is * loaded, as the flagging of an error isn't sufficient to stop @@ -1049,9 +1050,6 @@ i915ProgramStringNotify(GLcontext * ctx, _mesa_append_fog_code(ctx, &p->FragProg); p->FragProg.FogOption = GL_NONE; } - - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_print_program(prog); } _tnl_program_string(ctx, target, prog); diff --git a/i915/i915_state.c b/i915/i915_state.c index 8347deb..814fb59 100644 --- a/i915/i915_state.c +++ b/i915/i915_state.c @@ -321,18 +321,9 @@ intelCalcViewport(GLcontext * ctx) if (ctx->DrawBuffer->Name) { /* User created FBO */ - struct intel_renderbuffer *irb - = intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]); - if (irb && !irb->RenderToTexture) { - /* y=0=top */ - yScale = -1.0; - yBias = irb->Base.Height; - } - else { - /* y=0=bottom */ - yScale = 1.0; - yBias = 0.0; - } + /* y=0=bottom */ + yScale = 1.0; + yBias = 0.0; } else { /* window buffer, y=0=top */ @@ -353,7 +344,7 @@ intelCalcViewport(GLcontext * ctx) /** Called from ctx->Driver.Viewport() */ static void -intelViewport(GLcontext * ctx, +i915Viewport(GLcontext * ctx, GLint x, GLint y, GLsizei width, GLsizei height) { intelCalcViewport(ctx); @@ -364,7 +355,7 @@ intelViewport(GLcontext * ctx, /** Called from ctx->Driver.DepthRange() */ static void -intelDepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) +i915DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) { intelCalcViewport(ctx); } @@ -1033,8 +1024,8 @@ i915InitStateFunctions(struct dd_function_table *functions) functions->StencilFuncSeparate = i915StencilFuncSeparate; functions->StencilMaskSeparate = i915StencilMaskSeparate; functions->StencilOpSeparate = i915StencilOpSeparate; - functions->DepthRange = intelDepthRange; - functions->Viewport = intelViewport; + functions->DepthRange = i915DepthRange; + functions->Viewport = i915Viewport; } diff --git a/i915/i915_tex_layout.c b/i915/i915_tex_layout.c index d44a2f4..7cc1c09 100644 --- a/i915/i915_tex_layout.c +++ b/i915/i915_tex_layout.c @@ -454,7 +454,10 @@ i945_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt) { switch (mt->target) { case GL_TEXTURE_CUBE_MAP: - i945_miptree_layout_cube(intel, mt); + if (mt->compressed) + i945_miptree_layout_cube(intel, mt); + else + i915_miptree_layout_cube(intel, mt); break; case GL_TEXTURE_3D: i945_miptree_layout_3d(intel, mt); diff --git a/i915/i915_texstate.c b/i915/i915_texstate.c index adbb52a..a37dd7f 100644 --- a/i915/i915_texstate.c +++ b/i915/i915_texstate.c @@ -37,7 +37,8 @@ static GLuint -translate_texture_format(GLuint mesa_format, GLenum DepthMode) +translate_texture_format(GLuint mesa_format, GLuint internal_format, + GLenum DepthMode) { switch (mesa_format) { case MESA_FORMAT_L8: @@ -55,7 +56,10 @@ translate_texture_format(GLuint mesa_format, GLenum DepthMode) case MESA_FORMAT_ARGB4444: return MAPSURF_16BIT | MT_16BIT_ARGB4444; case MESA_FORMAT_ARGB8888: - return MAPSURF_32BIT | MT_32BIT_ARGB8888; + if (internal_format == GL_RGB) + return MAPSURF_32BIT | MT_32BIT_XRGB8888; + else + return MAPSURF_32BIT | MT_32BIT_ARGB8888; case MESA_FORMAT_YCBCR_REV: return (MAPSURF_422 | MT_422_YCRCB_NORMAL); case MESA_FORMAT_YCBCR: @@ -128,7 +132,8 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage; GLuint *state = i915->state.Tex[unit], format, pitch; - GLint lodbias; + GLint lodbias, aniso = 0; + GLubyte border[4]; memset(state, 0, sizeof(state)); @@ -173,7 +178,8 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) firstLevel); format = translate_texture_format(firstImage->TexFormat->MesaFormat, - tObj->DepthMode); + firstImage->InternalFormat, + tObj->DepthMode); pitch = intelObj->mt->pitch * intelObj->mt->cpp; } @@ -224,6 +230,10 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) if (tObj->MaxAnisotropy > 1.0) { minFilt = FILTER_ANISOTROPIC; magFilt = FILTER_ANISOTROPIC; + if (tObj->MaxAnisotropy > 2.0) + aniso = SS2_MAX_ANISO_4; + else + aniso = SS2_MAX_ANISO_2; } else { switch (tObj->MagFilter) { @@ -269,7 +279,8 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) state[I915_TEXREG_SS2] |= ((minFilt << SS2_MIN_FILTER_SHIFT) | (mipFilt << SS2_MIP_FILTER_SHIFT) | - (magFilt << SS2_MAG_FILTER_SHIFT)); + (magFilt << SS2_MAG_FILTER_SHIFT) | + aniso); } { @@ -313,21 +324,26 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) state[I915_TEXREG_SS3] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT); } + /* convert border color from float to ubyte */ + CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->BorderColor[0]); + CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->BorderColor[1]); + CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->BorderColor[2]); + CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->BorderColor[3]); if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { /* GL specs that border color for depth textures is taken from the * R channel, while the hardware uses A. Spam R into all the channels * for safety. */ - state[I915_TEXREG_SS4] = INTEL_PACKCOLOR8888(tObj->_BorderChan[0], - tObj->_BorderChan[0], - tObj->_BorderChan[0], - tObj->_BorderChan[0]); + state[I915_TEXREG_SS4] = INTEL_PACKCOLOR8888(border[0], + border[0], + border[0], + border[0]); } else { - state[I915_TEXREG_SS4] = INTEL_PACKCOLOR8888(tObj->_BorderChan[0], - tObj->_BorderChan[1], - tObj->_BorderChan[2], - tObj->_BorderChan[3]); + state[I915_TEXREG_SS4] = INTEL_PACKCOLOR8888(border[0], + border[1], + border[2], + border[3]); } diff --git a/i915/i915_vtbl.c b/i915/i915_vtbl.c index 3f6d282..1150046 100644 --- a/i915/i915_vtbl.c +++ b/i915/i915_vtbl.c @@ -32,6 +32,7 @@ #include "main/imports.h" #include "main/macros.h" #include "main/colormac.h" +#include "main/texformat.h" #include "tnl/t_context.h" #include "tnl/t_vertex.h" @@ -40,6 +41,7 @@ #include "intel_tex.h" #include "intel_regions.h" #include "intel_tris.h" +#include "intel_fbo.h" #include "i915_reg.h" #include "i915_context.h" @@ -542,6 +544,8 @@ i915_state_draw_region(struct intel_context *intel, { struct i915_context *i915 = i915_context(&intel->ctx); GLcontext *ctx = &intel->ctx; + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); GLuint value; ASSERT(state == &i915->state || state == &i915->meta); @@ -580,12 +584,26 @@ i915_state_draw_region(struct intel_context *intel, value = (DSTORG_HORT_BIAS(0x8) | /* .5 */ DSTORG_VERT_BIAS(0x8) | /* .5 */ LOD_PRECLAMP_OGL | TEX_DEFAULT_COLOR_OGL); - if (color_region && color_region->cpp == 4) { - value |= DV_PF_8888; - } - else { - value |= (DITHER_FULL_ALWAYS | DV_PF_565); + if (irb != NULL) { + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_ARGB8888: + value |= DV_PF_8888; + break; + case MESA_FORMAT_RGB565: + value |= DV_PF_565 | DITHER_FULL_ALWAYS; + break; + case MESA_FORMAT_ARGB1555: + value |= DV_PF_1555 | DITHER_FULL_ALWAYS; + break; + case MESA_FORMAT_ARGB4444: + value |= DV_PF_4444 | DITHER_FULL_ALWAYS; + break; + default: + _mesa_problem(ctx, "Bad renderbuffer format: %d\n", + irb->texformat->MesaFormat); + } } + if (depth_region && depth_region->cpp == 4) { value |= DEPTH_FRMT_24_FIXED_8_OTHER; } diff --git a/i915/intel_tris.c b/i915/intel_tris.c index a857803..1d39278 100644 --- a/i915/intel_tris.c +++ b/i915/intel_tris.c @@ -89,8 +89,8 @@ intel_flush_inline_primitive(struct intel_context *intel) static void intel_start_inline(struct intel_context *intel, uint32_t prim) { - BATCH_LOCALS; uint32_t batch_flags = LOOP_CLIPRECTS; + BATCH_LOCALS; intel->vtbl.emit_state(intel); @@ -201,10 +201,10 @@ uint32_t *intel_get_prim_space(struct intel_context *intel, unsigned int count) /** Dispatches the accumulated primitive to the batchbuffer. */ void intel_flush_prim(struct intel_context *intel) { - BATCH_LOCALS; dri_bo *aper_array[2]; dri_bo *vb_bo; unsigned int offset, count; + BATCH_LOCALS; /* Must be called after an intel_start_prim. */ assert(intel->prim.primitive != ~0); diff --git a/i965/Makefile.am b/i965/Makefile.am index 69abe6c..5a118af 100644 --- a/i965/Makefile.am +++ b/i965/Makefile.am @@ -15,7 +15,7 @@ i965_dri_la_SOURCES = \ ../shared/intel_clear.c \ ../shared/intel_context.c \ ../shared/intel_decode.c \ - ../shared/intel_depthstencil.c \ + ../shared/intel_extensions.c \ ../shared/intel_fbo.c \ ../shared/intel_mipmap_tree.c \ ../shared/intel_regions.c \ @@ -25,7 +25,7 @@ i965_dri_la_SOURCES = \ ../shared/intel_pixel_bitmap.c \ ../shared/intel_pixel_copy.c \ ../shared/intel_pixel_draw.c \ - intel_state.c \ + ../shared/intel_state.c \ ../shared/intel_swapbuffers.c \ ../shared/intel_tex.c \ ../shared/intel_tex_copy.c \ diff --git a/i965/brw_cc.c b/i965/brw_cc.c index 8237016..c724218 100644 --- a/i965/brw_cc.c +++ b/i965/brw_cc.c @@ -88,7 +88,7 @@ cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) memset(key, 0, sizeof(*key)); - key->stencil = ctx->Stencil.Enabled; + key->stencil = ctx->Stencil._Enabled; key->stencil_two_side = ctx->Stencil._TestTwoSide; if (key->stencil) { diff --git a/i965/brw_context.c b/i965/brw_context.c index 4357100..4dbe551 100644 --- a/i965/brw_context.c +++ b/i965/brw_context.c @@ -111,14 +111,15 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, ctx->Const.MaxTextureImageUnits); ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */ - /* Advertise the full hardware capabilities. The new memory - * manager should cope much better with overload situations: + /* Mesa limits textures to 4kx4k; it would be nice to fix that someday */ - ctx->Const.MaxTextureLevels = 12; + ctx->Const.MaxTextureLevels = 13; ctx->Const.Max3DTextureLevels = 9; ctx->Const.MaxCubeTextureLevels = 12; - ctx->Const.MaxTextureRectSize = (1<<11); + ctx->Const.MaxTextureRectSize = (1<<12); + ctx->Const.MaxTextureMaxAnisotropy = 16.0; + /* if conformance mode is set, swrast can handle any size AA point */ ctx->Const.MaxPointSizeAA = 255.0; diff --git a/i965/brw_context.h b/i965/brw_context.h index df90c20..577497b 100644 --- a/i965/brw_context.h +++ b/i965/brw_context.h @@ -46,7 +46,7 @@ * * CURBE - constant URB entry. An urb region (entry) used to hold * constant values which the fixed function units can be instructed to - * preload into the GRF when spawining a thread. + * preload into the GRF when spawning a thread. * * VUE - vertex URB entry. An urb entry holding a vertex and usually * a vertex header. The header contains control information and @@ -63,7 +63,7 @@ * special and may be overwritten. * * MRF - message register file. Threads communicate (and terminate) - * by sending messages. Message parameters are placed in contigous + * by sending messages. Message parameters are placed in contiguous * MRF registers. All program output is via these messages. URB * entries are populated by sending a message to the shared URB * function containing the new data, together with a control word, @@ -141,7 +141,8 @@ struct brw_context; #define BRW_NEW_BATCH 0x10000 /** brw->depth_region updated */ #define BRW_NEW_DEPTH_BUFFER 0x20000 -#define BRW_NEW_NR_SURFACES 0x40000 +#define BRW_NEW_NR_WM_SURFACES 0x40000 +#define BRW_NEW_NR_VS_SURFACES 0x80000 struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -154,19 +155,25 @@ struct brw_state_flags { GLuint cache; }; + +/** Subclass of Mesa vertex program */ struct brw_vertex_program { struct gl_vertex_program program; GLuint id; + dri_bo *const_buffer; /** Program constant buffer/surface */ + GLboolean use_const_buffer; }; - +/** Subclass of Mesa fragment program */ struct brw_fragment_program { struct gl_fragment_program program; - GLuint id; -}; - + GLuint id; /**< serial no. to identify frag progs, never re-used */ + GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ + dri_bo *const_buffer; /** Program constant buffer/surface */ + GLboolean use_const_buffer; +}; /* Data about a particular attempt to compile a program. Note that @@ -182,7 +189,7 @@ struct brw_wm_prog_data { GLuint total_grf; GLuint total_scratch; - GLuint nr_params; + GLuint nr_params; /**< number of float params/constants */ GLboolean error; /* Pointer to tracked values (only valid once @@ -221,6 +228,7 @@ struct brw_vs_prog_data { GLuint urb_read_length; GLuint total_grf; GLuint outputs_written; + GLuint nr_params; /**< number of float params/constants */ GLuint inputs_read; @@ -237,8 +245,35 @@ struct brw_vs_ouput_sizes { }; +/** Number of texture sampler units */ #define BRW_MAX_TEX_UNIT 16 -#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + MAX_DRAW_BUFFERS + +/** + * Size of our surface binding table for the WM. + * This contains pointers to the drawing surfaces and current texture + * objects and shader constant buffers (+2). + */ +#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) + +/** + * Helpers to convert drawing buffers, textures and constant buffers + * to surface binding table indexes, for WM. + */ +#define SURF_INDEX_DRAW(d) (d) +#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS) +#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 1 + (t)) + +/** + * Size of surface binding table for the VS. + * Only one constant buffer for now. + */ +#define BRW_VS_MAX_SURF 1 + +/** + * Only a VS constant buffer + */ +#define SURF_INDEX_VERT_CONST_BUFFER 0 + enum brw_cache_id { BRW_CC_VP, @@ -418,8 +453,8 @@ struct brw_context struct brw_tracked_state **atoms; GLuint nr_atoms; - GLuint nr_draw_regions; - struct intel_region *draw_regions[MAX_DRAW_BUFFERS]; + GLuint nr_color_regions; + struct intel_region *color_regions[MAX_DRAW_BUFFERS]; struct intel_region *depth_region; /** @@ -512,8 +547,8 @@ struct brw_context /* BRW_NEW_CURBE_OFFSETS: */ struct { - GLuint wm_start; - GLuint wm_size; + GLuint wm_start; /**< pos of first wm const in CURBE buffer */ + GLuint wm_size; /**< number of float[4] consts, multiple of 16 */ GLuint clip_start; GLuint clip_size; GLuint vs_start; @@ -545,6 +580,11 @@ struct brw_context dri_bo *prog_bo; dri_bo *state_bo; + + /** Binding table of pointers to surf_bo entries */ + dri_bo *bind_bo; + dri_bo *surf_bo[BRW_VS_MAX_SURF]; + GLuint nr_surfaces; } vs; struct { @@ -576,9 +616,10 @@ struct brw_context struct brw_wm_prog_data *prog_data; struct brw_wm_compile *compile_data; - /* Input sizes, calculated from active vertex program: + /** Input sizes, calculated from active vertex program. + * One bit per fragment program input attribute. */ - GLuint input_size_masks[4]; + GLbitfield input_size_masks[4]; /** Array of surface default colors (texture border color) */ dri_bo *sdc_bo[BRW_MAX_TEX_UNIT]; @@ -587,7 +628,7 @@ struct brw_context GLuint nr_surfaces; GLuint max_threads; - dri_bo *scratch_buffer; + dri_bo *scratch_bo; GLuint sampler_count; dri_bo *sampler_bo; @@ -627,8 +668,6 @@ struct brw_context * brw_vtbl.c */ void brwInitVtbl( struct brw_context *brw ); -void brw_do_flush( struct brw_context *brw, - GLuint flags ); /*====================================================================== * brw_context.c @@ -670,7 +709,9 @@ void brwInitFragProgFuncs( struct dd_function_table *functions ); */ void brw_upload_urb_fence(struct brw_context *brw); -void brw_upload_constant_buffer_state(struct brw_context *brw); +/* brw_curbe.c + */ +void brw_upload_cs_urb_state(struct brw_context *brw); /*====================================================================== @@ -683,6 +724,32 @@ brw_context( GLcontext *ctx ) return (struct brw_context *)ctx; } +static INLINE struct brw_vertex_program * +brw_vertex_program(struct gl_vertex_program *p) +{ + return (struct brw_vertex_program *) p; +} + +static INLINE const struct brw_vertex_program * +brw_vertex_program_const(const struct gl_vertex_program *p) +{ + return (const struct brw_vertex_program *) p; +} + +static INLINE struct brw_fragment_program * +brw_fragment_program(struct gl_fragment_program *p) +{ + return (struct brw_fragment_program *) p; +} + +static INLINE const struct brw_fragment_program * +brw_fragment_program_const(const struct gl_fragment_program *p) +{ + return (const struct brw_fragment_program *) p; +} + + + #define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1) #endif diff --git a/i965/brw_curbe.c b/i965/brw_curbe.c index 4eaaa5f..9197fed 100644 --- a/i965/brw_curbe.c +++ b/i965/brw_curbe.c @@ -38,23 +38,28 @@ #include "shader/prog_parameter.h" #include "shader/prog_statevars.h" #include "intel_batchbuffer.h" +#include "intel_regions.h" #include "brw_context.h" #include "brw_defines.h" #include "brw_state.h" #include "brw_util.h" -/* Partition the CURBE between the various users of constant values: +/** + * Partition the CURBE between the various users of constant values: + * Note that vertex and fragment shaders can now fetch constants out + * of constant buffers. We no longer allocatea block of the GRF for + * constants. That greatly reduces the demand for space in the CURBE. + * Some of the comments within are dated... */ static void calculate_curbe_offsets( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; /* CACHE_NEW_WM_PROG */ - GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; + const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; /* BRW_NEW_VERTEX_PROGRAM */ - struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; - GLuint nr_vp_regs = (vp->program.Base.Parameters->NumParameters * 4 + 15) / 16; + const GLuint nr_vp_regs = (brw->vs.prog_data->nr_params + 15) / 16; GLuint nr_clip_regs = 0; GLuint total_regs; @@ -138,24 +143,24 @@ const struct brw_tracked_state brw_curbe_offsets = { * fixed-function hardware in a double-buffering scheme to avoid a * pipeline stall each time the contents of the curbe is changed. */ -void brw_upload_constant_buffer_state(struct brw_context *brw) +void brw_upload_cs_urb_state(struct brw_context *brw) { - struct brw_constant_buffer_state cbs; - memset(&cbs, 0, sizeof(cbs)); + struct brw_cs_urb_state cs_urb; + memset(&cs_urb, 0, sizeof(cs_urb)); /* It appears that this is the state packet for the CS unit, ie. the * urb entries detailed here are housed in the CS range from the * URB_FENCE command. */ - cbs.header.opcode = CMD_CONST_BUFFER_STATE; - cbs.header.length = sizeof(cbs)/4 - 2; + cs_urb.header.opcode = CMD_CS_URB_STATE; + cs_urb.header.length = sizeof(cs_urb)/4 - 2; /* BRW_NEW_URB_FENCE */ - cbs.bits0.nr_urb_entries = brw->urb.nr_cs_entries; - cbs.bits0.urb_entry_size = brw->urb.csize - 1; + cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries; + cs_urb.bits0.urb_entry_size = brw->urb.csize - 1; assert(brw->urb.nr_cs_entries); - BRW_CACHED_BATCH_STRUCT(brw, &cbs); + BRW_CACHED_BATCH_STRUCT(brw, &cs_urb); } static GLfloat fixed_plane[6][4] = { @@ -174,10 +179,12 @@ static GLfloat fixed_plane[6][4] = { static void prepare_constant_buffer(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; - struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; - struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; - GLuint sz = brw->curbe.total_size; - GLuint bufsz = sz * 16 * sizeof(GLfloat); + const struct brw_vertex_program *vp = + brw_vertex_program_const(brw->vertex_program); + const struct brw_fragment_program *fp = + brw_fragment_program_const(brw->fragment_program); + const GLuint sz = brw->curbe.total_size; + const GLuint bufsz = sz * 16 * sizeof(GLfloat); GLfloat *buf; GLuint i; @@ -189,27 +196,25 @@ static void prepare_constant_buffer(struct brw_context *brw) brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags; if (sz == 0) { - if (brw->curbe.last_buf) { free(brw->curbe.last_buf); brw->curbe.last_buf = NULL; brw->curbe.last_bufsz = 0; } - return; } - buf = (GLfloat *)malloc(bufsz); - - memset(buf, 0, bufsz); + buf = (GLfloat *) _mesa_calloc(bufsz); + /* fragment shader constants */ if (brw->curbe.wm_size) { GLuint offset = brw->curbe.wm_start * 16; _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); + /* copy float constants */ for (i = 0; i < brw->wm.prog_data->nr_params; i++) - buf[offset + i] = brw->wm.prog_data->param[i][0]; + buf[offset + i] = *brw->wm.prog_data->param[i]; } @@ -244,18 +249,20 @@ static void prepare_constant_buffer(struct brw_context *brw) } } - + /* vertex shader constants */ if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; - GLuint nr = vp->program.Base.Parameters->NumParameters; + GLuint nr = brw->vs.prog_data->nr_params / 4; _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); + /* XXX just use a memcpy here */ for (i = 0; i < nr; i++) { - buf[offset + i * 4 + 0] = vp->program.Base.Parameters->ParameterValues[i][0]; - buf[offset + i * 4 + 1] = vp->program.Base.Parameters->ParameterValues[i][1]; - buf[offset + i * 4 + 2] = vp->program.Base.Parameters->ParameterValues[i][2]; - buf[offset + i * 4 + 3] = vp->program.Base.Parameters->ParameterValues[i][3]; + const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i]; + buf[offset + i * 4 + 0] = value[0]; + buf[offset + i * 4 + 1] = value[1]; + buf[offset + i * 4 + 2] = value[2]; + buf[offset + i * 4 + 3] = value[3]; } } @@ -274,11 +281,14 @@ static void prepare_constant_buffer(struct brw_context *brw) brw->curbe.last_buf && bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { - free(buf); + /* constants have not changed */ + _mesa_free(buf); } else { + /* constants have changed */ if (brw->curbe.last_buf) - free(brw->curbe.last_buf); + _mesa_free(brw->curbe.last_buf); + brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; @@ -326,11 +336,77 @@ static void prepare_constant_buffer(struct brw_context *brw) } +/** + * Copy Mesa program parameters into given constant buffer. + */ +static void +update_constant_buffer(struct brw_context *brw, + const struct gl_program_parameter_list *params, + dri_bo *const_buffer) +{ + struct intel_context *intel = &brw->intel; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + + /* copy Mesa program constants into the buffer */ + if (const_buffer && size > 0) { + + assert(const_buffer); + assert(const_buffer->size >= size); + + if (intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(const_buffer); + memcpy(const_buffer->virtual, params->ParameterValues, size); + drm_intel_gem_bo_unmap_gtt(const_buffer); + } + else { + dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); + } + + if (0) { + int i; + for (i = 0; i < params->NumParameters; i++) { + float *p = params->ParameterValues[i]; + printf("%d: %f %f %f %f\n", i, p[0], p[1], p[2], p[3]); + } + } + } +} + + +/** Copy current vertex program's parameters into the constant buffer */ +static void +update_vertex_constant_buffer(struct brw_context *brw) +{ + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + if (0) { + printf("update VS constants in buffer %p\n", vp->const_buffer); + printf("program %u\n", vp->program.Base.Id); + } + if (vp->use_const_buffer) + update_constant_buffer(brw, vp->program.Base.Parameters, vp->const_buffer); +} + + +/** Copy current fragment program's parameters into the constant buffer */ +static void +update_fragment_constant_buffer(struct brw_context *brw) +{ + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + if (fp->use_const_buffer) + update_constant_buffer(brw, fp->program.Base.Parameters, fp->const_buffer); +} + + static void emit_constant_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLuint sz = brw->curbe.total_size; + update_vertex_constant_buffer(brw); + update_fragment_constant_buffer(brw); + BEGIN_BATCH(2, IGNORE_CLIPRECTS); if (sz == 0) { OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); diff --git a/i965/brw_defines.h b/i965/brw_defines.h index 39c3225..98fc909 100644 --- a/i965/brw_defines.h +++ b/i965/brw_defines.h @@ -225,6 +225,24 @@ #define BRW_RASTRULE_UPPER_LEFT 0 #define BRW_RASTRULE_UPPER_RIGHT 1 +/* These are listed as "Reserved, but not seen as useful" + * in Intel documentation (page 212, "Point Rasterization Rule", + * section 7.4 "SF Pipeline State Summary", of document + * "Intel® 965 Express Chipset Family and Intel® G35 Express + * Chipset Graphics Controller Programmer's Reference Manual, + * Volume 2: 3D/Media", Revision 1.0b as of January 2008, + * available at + * http://intellinuxgraphics.org/documentation.html + * at the time of this writing). + * + * These appear to be supported on at least some + * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT + * is useful when using OpenGL to render to a FBO + * (which has the pixel coordinate Y orientation inverted + * with respect to the normal OpenGL pixel coordinate system). + */ +#define BRW_RASTRULE_LOWER_LEFT 2 +#define BRW_RASTRULE_LOWER_RIGHT 3 #define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 #define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 @@ -349,9 +367,10 @@ #define BRW_SURFACEFORMAT_L8A8_UNORM 0x114 #define BRW_SURFACEFORMAT_I16_FLOAT 0x115 #define BRW_SURFACEFORMAT_L16_FLOAT 0x116 -#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 -#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 -#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 +#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB 0x118 +#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A #define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B #define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C #define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D @@ -368,6 +387,7 @@ #define BRW_SURFACEFORMAT_A4P4_UNORM 0x148 #define BRW_SURFACEFORMAT_R8_SSCALED 0x149 #define BRW_SURFACEFORMAT_R8_USCALED 0x14A +#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C #define BRW_SURFACEFORMAT_R1_UINT 0x181 #define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 #define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 @@ -734,7 +754,7 @@ #define CMD_URB_FENCE 0x6000 -#define CMD_CONST_BUFFER_STATE 0x6001 +#define CMD_CS_URB_STATE 0x6001 #define CMD_CONST_BUFFER 0x6002 #define CMD_STATE_BASE_ADDRESS 0x6101 diff --git a/i965/brw_draw.c b/i965/brw_draw.c index 0b64999..5342622 100644 --- a/i965/brw_draw.c +++ b/i965/brw_draw.c @@ -127,6 +127,7 @@ static void brw_emit_prim(struct brw_context *brw, uint32_t hw_prim) { struct brw_3d_primitive prim_packet; + struct intel_context *intel = &brw->intel; if (INTEL_DEBUG & DEBUG_PRIMS) _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), @@ -146,10 +147,27 @@ static void brw_emit_prim(struct brw_context *brw, /* Can't wrap here, since we rely on the validated state. */ brw->no_batch_wrap = GL_TRUE; + + /* If we're set to always flush, do it before and after the primitive emit. + * We want to catch both missed flushes that hurt instruction/state cache + * and missed flushes of the render cache as it heads to other parts of + * the besides the draw code. + */ + if (intel->always_flush_cache) { + BEGIN_BATCH(1, IGNORE_CLIPRECTS); + OUT_BATCH(intel->vtbl.flush_cmd()); + ADVANCE_BATCH(); + } if (prim_packet.verts_per_instance) { intel_batchbuffer_data( brw->intel.batch, &prim_packet, sizeof(prim_packet), LOOP_CLIPRECTS); } + if (intel->always_flush_cache) { + BEGIN_BATCH(1, IGNORE_CLIPRECTS); + OUT_BATCH(intel->vtbl.flush_cmd()); + ADVANCE_BATCH(); + } + brw->no_batch_wrap = GL_FALSE; } @@ -393,6 +411,8 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, retval = GL_TRUE; } + if (intel->always_flush_batch) + intel_batchbuffer_flush(intel->batch); out: UNLOCK_HARDWARE(intel); diff --git a/i965/brw_draw_upload.c b/i965/brw_draw_upload.c index 73d6dea..b91b20b 100644 --- a/i965/brw_draw_upload.c +++ b/i965/brw_draw_upload.c @@ -156,7 +156,13 @@ static GLuint byte_types_scale[5] = { }; -static GLuint get_surface_type( GLenum type, GLuint size, GLboolean normalized ) +/** + * Given vertex array type/size/format/normalized info, return + * the appopriate hardware surface type. + * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays. + */ +static GLuint get_surface_type( GLenum type, GLuint size, + GLenum format, GLboolean normalized ) { if (INTEL_DEBUG & DEBUG_VERTS) _mesa_printf("type %s size %d normalized %d\n", @@ -171,11 +177,20 @@ static GLuint get_surface_type( GLenum type, GLuint size, GLboolean normalized ) case GL_BYTE: return byte_types_norm[size]; case GL_UNSIGNED_INT: return uint_types_norm[size]; case GL_UNSIGNED_SHORT: return ushort_types_norm[size]; - case GL_UNSIGNED_BYTE: return ubyte_types_norm[size]; + case GL_UNSIGNED_BYTE: + if (format == GL_BGRA) { + /* See GL_EXT_vertex_array_bgra */ + assert(size == 4); + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + } + else { + return ubyte_types_norm[size]; + } default: assert(0); return 0; } } else { + assert(format == GL_RGBA); /* sanity check */ switch (type) { case GL_DOUBLE: return double_types[size]; case GL_FLOAT: return float_types[size]; @@ -262,6 +277,7 @@ copy_array_to_vbo_array( struct brw_context *brw, struct brw_vertex_element *element, GLuint dst_stride) { + struct intel_context *intel = &brw->intel; GLuint size = element->count * dst_stride; get_space(brw, size, &element->bo, &element->offset); @@ -274,29 +290,52 @@ copy_array_to_vbo_array( struct brw_context *brw, } if (dst_stride == element->glarray->StrideB) { - dri_bo_subdata(element->bo, - element->offset, - size, - element->glarray->Ptr); + if (intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(element->bo); + memcpy((char *)element->bo->virtual + element->offset, + element->glarray->Ptr, size); + drm_intel_gem_bo_unmap_gtt(element->bo); + } else { + dri_bo_subdata(element->bo, + element->offset, + size, + element->glarray->Ptr); + } } else { - void *data; char *dest; - const char *src = element->glarray->Ptr; + const unsigned char *src = element->glarray->Ptr; int i; - data = _mesa_malloc(dst_stride * element->count); - dest = data; - for (i = 0; i < element->count; i++) { - memcpy(dest, src, dst_stride); - src += element->glarray->StrideB; - dest += dst_stride; - } + if (intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(element->bo); + dest = element->bo->virtual; + dest += element->offset; - dri_bo_subdata(element->bo, - element->offset, - size, - data); - _mesa_free(data); + for (i = 0; i < element->count; i++) { + memcpy(dest, src, dst_stride); + src += element->glarray->StrideB; + dest += dst_stride; + } + + drm_intel_gem_bo_unmap_gtt(element->bo); + } else { + void *data; + + data = _mesa_malloc(dst_stride * element->count); + dest = data; + for (i = 0; i < element->count; i++) { + memcpy(dest, src, dst_stride); + src += element->glarray->StrideB; + dest += dst_stride; + } + + dri_bo_subdata(element->bo, + element->offset, + size, + data); + + _mesa_free(data); + } } } @@ -484,6 +523,7 @@ static void brw_emit_vertices(struct brw_context *brw) struct brw_vertex_element *input = enabled[i]; uint32_t format = get_surface_type(input->glarray->Type, input->glarray->Size, + input->glarray->Format, input->glarray->Normalized); uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; @@ -547,9 +587,15 @@ static void brw_prepare_indices(struct brw_context *brw) /* Straight upload */ - dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); + if (intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(bo); + memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); + drm_intel_gem_bo_unmap_gtt(bo); + } else { + dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); + } } else { - offset = (GLuint)index_buffer->ptr; + offset = (GLuint) (unsigned long) index_buffer->ptr; /* If the index buffer isn't aligned to its element size, we have to * rebase it into a temporary. diff --git a/i965/brw_eu.h b/i965/brw_eu.h index b36a197..003332f 100644 --- a/i965/brw_eu.h +++ b/i965/brw_eu.h @@ -97,7 +97,7 @@ struct brw_glsl_call; #define BRW_EU_MAX_INSN_STACK 5 -#define BRW_EU_MAX_INSN 1200 +#define BRW_EU_MAX_INSN 4000 struct brw_compile { struct brw_instruction store[BRW_EU_MAX_INSN]; @@ -170,6 +170,13 @@ static INLINE struct brw_reg brw_reg( GLuint file, GLuint writemask ) { struct brw_reg reg; + if (type == BRW_GENERAL_REGISTER_FILE) + assert(nr < 128); + else if (type == BRW_MESSAGE_REGISTER_FILE) + assert(nr < 9); + else if (type == BRW_ARCHITECTURE_REGISTER_FILE) + assert(nr <= BRW_ARF_IP); + reg.type = type; reg.file = file; reg.nr = nr; @@ -723,6 +730,13 @@ static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset return ptr; } +/** Do two brw_regs refer to the same register? */ +static INLINE GLboolean +brw_same_reg(struct brw_reg r1, struct brw_reg r2) +{ + return r1.file == r2.file && r1.nr == r2.nr; +} + static INLINE struct brw_instruction *current_insn( struct brw_compile *p) { return &p->store[p->nr_insn]; @@ -841,12 +855,24 @@ void brw_math( struct brw_compile *p, void brw_dp_READ_16( struct brw_compile *p, struct brw_reg dest, - GLuint msg_reg_nr, GLuint scratch_offset ); +void brw_dp_READ_4( struct brw_compile *p, + struct brw_reg dest, + GLboolean relAddr, + GLuint location, + GLuint bind_table_index ); + +void brw_dp_READ_4_vs( struct brw_compile *p, + struct brw_reg dest, + GLuint oword, + GLboolean relAddr, + struct brw_reg addrReg, + GLuint location, + GLuint bind_table_index ); + void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, - GLuint msg_reg_nr, GLuint scratch_offset ); /* If/else/endif. Works by manipulating the execution flags on each diff --git a/i965/brw_eu_debug.c b/i965/brw_eu_debug.c index 91dbbd5..29f3f6d 100644 --- a/i965/brw_eu_debug.c +++ b/i965/brw_eu_debug.c @@ -65,6 +65,7 @@ void brw_print_reg( struct brw_reg hwreg ) hwreg.width == BRW_WIDTH_8 && hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && hwreg.type == BRW_REGISTER_TYPE_F) { + /* vector register */ _mesa_printf("vec%d", hwreg.nr); } else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && @@ -72,8 +73,12 @@ void brw_print_reg( struct brw_reg hwreg ) hwreg.width == BRW_WIDTH_1 && hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && hwreg.type == BRW_REGISTER_TYPE_F) { + /* "scalar" register */ _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); } + else if (hwreg.file == BRW_IMMEDIATE_VALUE) { + _mesa_printf("imm %f", hwreg.dw1.f); + } else { _mesa_printf("%s%d.%d<%d;%d,%d>:%s", file[hwreg.file], diff --git a/i965/brw_eu_emit.c b/i965/brw_eu_emit.c index 4e099b5..2a147fb 100644 --- a/i965/brw_eu_emit.c +++ b/i965/brw_eu_emit.c @@ -55,6 +55,9 @@ static void guess_execution_size( struct brw_instruction *insn, static void brw_set_dest( struct brw_instruction *insn, struct brw_reg dest ) { + if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE) + assert(dest.nr < 128); + insn->bits1.da1.dest_reg_file = dest.file; insn->bits1.da1.dest_reg_type = dest.type; insn->bits1.da1.dest_address_mode = dest.address_mode; @@ -96,10 +99,13 @@ static void brw_set_dest( struct brw_instruction *insn, } static void brw_set_src0( struct brw_instruction *insn, - struct brw_reg reg ) + struct brw_reg reg ) { assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) + assert(reg.nr < 128); + insn->bits1.da1.src0_reg_file = reg.file; insn->bits1.da1.src0_reg_type = reg.type; insn->bits2.da1.src0_abs = reg.abs; @@ -169,10 +175,12 @@ static void brw_set_src0( struct brw_instruction *insn, void brw_set_src1( struct brw_instruction *insn, - struct brw_reg reg ) + struct brw_reg reg ) { assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + assert(reg.nr < 128); + insn->bits1.da1.src1_reg_file = reg.file; insn->bits1.da1.src1_reg_type = reg.type; insn->bits3.da1.src1_abs = reg.abs; @@ -312,24 +320,25 @@ static void brw_set_dp_read_message( struct brw_instruction *insn, { brw_set_src1(insn, brw_imm_d(0)); - insn->bits3.dp_read.binding_table_index = binding_table_index; - insn->bits3.dp_read.msg_control = msg_control; - insn->bits3.dp_read.msg_type = msg_type; - insn->bits3.dp_read.target_cache = target_cache; - insn->bits3.dp_read.response_length = response_length; - insn->bits3.dp_read.msg_length = msg_length; - insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; - insn->bits3.dp_read.end_of_thread = end_of_thread; + insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ + insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ + insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ + insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ + insn->bits3.dp_read.response_length = response_length; /*16:19*/ + insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ + insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ + insn->bits3.dp_read.pad1 = 0; /*28:30*/ + insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/ } static void brw_set_sampler_message(struct brw_context *brw, - struct brw_instruction *insn, - GLuint binding_table_index, - GLuint sampler, - GLuint msg_type, - GLuint response_length, - GLuint msg_length, - GLboolean eot) + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint sampler, + GLuint msg_type, + GLuint response_length, + GLuint msg_length, + GLboolean eot) { brw_set_src1(insn, brw_imm_d(0)); @@ -407,7 +416,7 @@ static struct brw_instruction *brw_alu2(struct brw_compile *p, * Convenience routines. */ #define ALU1(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ struct brw_reg dest, \ struct brw_reg src0) \ { \ @@ -415,7 +424,7 @@ struct brw_instruction *brw_##OP(struct brw_compile *p, \ } #define ALU2(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ struct brw_reg dest, \ struct brw_reg src0, \ struct brw_reg src1) \ @@ -469,9 +478,9 @@ void brw_NOP(struct brw_compile *p) */ struct brw_instruction *brw_JMPI(struct brw_compile *p, - struct brw_reg dest, - struct brw_reg src0, - struct brw_reg src1) + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) { struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); @@ -674,7 +683,7 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) struct brw_instruction *brw_WHILE(struct brw_compile *p, - struct brw_instruction *do_insn) + struct brw_instruction *do_insn) { struct brw_instruction *insn; @@ -762,7 +771,7 @@ void brw_CMP(struct brw_compile *p, * Helpers for the various SEND message types: */ -/* Invert 8 values +/** Extended math function, float[8]. */ void brw_math( struct brw_compile *p, struct brw_reg dest, @@ -794,7 +803,9 @@ void brw_math( struct brw_compile *p, data_type); } -/* Use 2 send instructions to invert 16 elements +/** + * Extended math function, float[16]. + * Use 2 send instructions. */ void brw_math_16( struct brw_compile *p, struct brw_reg dest, @@ -847,22 +858,26 @@ void brw_math_16( struct brw_compile *p, } - - +/** + * Write block of 16 dwords/floats to the data port Render Cache scratch buffer. + * Scratch offset should be a multiple of 64. + * Used for register spilling. + */ void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, - GLuint msg_reg_nr, GLuint scratch_offset ) { + GLuint msg_reg_nr = 1; { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); + /* set message header global offset field (reg 0, element 2) */ brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), brw_imm_d(scratch_offset)); - + brw_pop_insn_state(p); } @@ -879,7 +894,7 @@ void brw_dp_WRITE_16( struct brw_compile *p, brw_set_src0(insn, src); brw_set_dp_write_message(insn, - 255, /* bti */ + 255, /* binding table index (255=stateless) */ BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ msg_length, @@ -887,24 +902,29 @@ void brw_dp_WRITE_16( struct brw_compile *p, 0, /* response_length */ 0); /* eot */ } - } +/** + * Read block of 16 dwords/floats from the data port Render Cache scratch buffer. + * Scratch offset should be a multiple of 64. + * Used for register spilling. + */ void brw_dp_READ_16( struct brw_compile *p, struct brw_reg dest, - GLuint msg_reg_nr, GLuint scratch_offset ) { + GLuint msg_reg_nr = 1; { brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); + /* set message header global offset field (reg 0, element 2) */ brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), brw_imm_d(scratch_offset)); - + brw_pop_insn_state(p); } @@ -919,10 +939,10 @@ void brw_dp_READ_16( struct brw_compile *p, brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); brw_set_dp_read_message(insn, - 255, /* bti */ - 3, /* msg_control */ + 255, /* binding table index (255=stateless) */ + 3, /* msg_control (3 means 4 Owords) */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ - 1, /* target cache */ + 1, /* target cache (render/scratch) */ 1, /* msg_length */ 2, /* response_length */ 0); /* eot */ @@ -930,14 +950,143 @@ void brw_dp_READ_16( struct brw_compile *p, } +/** + * Read a float[4] vector from the data port Data Cache (const buffer). + * Location (in buffer) should be a multiple of 16. + * Used for fetching shader constants. + * If relAddr is true, we'll do an indirect fetch using the address register. + */ +void brw_dp_READ_4( struct brw_compile *p, + struct brw_reg dest, + GLboolean relAddr, + GLuint location, + GLuint bind_table_index ) +{ + /* XXX: relAddr not implemented */ + GLuint msg_reg_nr = 1; + { + struct brw_reg b; + brw_push_insn_state(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + /* Setup MRF[1] with location/offset into const buffer */ + b = brw_message_reg(msg_reg_nr); + b = retype(b, BRW_REGISTER_TYPE_UD); + /* XXX I think we're setting all the dwords of MRF[1] to 'location'. + * when the docs say only dword[2] should be set. Hmmm. But it works. + */ + brw_MOV(p, b, brw_imm_ud(location)); + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; + + /* cast dest to a uword[8] vector */ + dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); + + brw_set_dest(insn, dest); + brw_set_src0(insn, brw_null_reg()); + + brw_set_dp_read_message(insn, + bind_table_index, + 0, /* msg_control (0 means 1 Oword) */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 0, /* source cache = data cache */ + 1, /* msg_length */ + 1, /* response_length (1 Oword) */ + 0); /* eot */ + } +} + + +/** + * Read float[4] constant(s) from VS constant buffer. + * For relative addressing, two float[4] constants will be read into 'dest'. + * Otherwise, one float[4] constant will be read into the lower half of 'dest'. + */ +void brw_dp_READ_4_vs(struct brw_compile *p, + struct brw_reg dest, + GLuint oword, + GLboolean relAddr, + struct brw_reg addrReg, + GLuint location, + GLuint bind_table_index) +{ + GLuint msg_reg_nr = 1; + + assert(oword < 2); + /* + printf("vs const read msg, location %u, msg_reg_nr %d\n", + location, msg_reg_nr); + */ + + /* Setup MRF[1] with location/offset into const buffer */ + { + struct brw_reg b; + + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + /*brw_set_access_mode(p, BRW_ALIGN_16);*/ + + /* XXX I think we're setting all the dwords of MRF[1] to 'location'. + * when the docs say only dword[2] should be set. Hmmm. But it works. + */ + b = brw_message_reg(msg_reg_nr); + b = retype(b, BRW_REGISTER_TYPE_UD); + /*b = get_element_ud(b, 2);*/ + if (relAddr) { + brw_ADD(p, b, addrReg, brw_imm_ud(location)); + } + else { + brw_MOV(p, b, brw_imm_ud(location)); + } + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; + /*insn->header.access_mode = BRW_ALIGN_16;*/ + + brw_set_dest(insn, dest); + brw_set_src0(insn, brw_null_reg()); + + brw_set_dp_read_message(insn, + bind_table_index, + oword, /* 0 = lower Oword, 1 = upper Oword */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 0, /* source cache = data cache */ + 1, /* msg_length */ + 1, /* response_length (1 Oword) */ + 0); /* eot */ + } +} + + + void brw_fb_WRITE(struct brw_compile *p, - struct brw_reg dest, - GLuint msg_reg_nr, - struct brw_reg src0, - GLuint binding_table_index, - GLuint msg_length, - GLuint response_length, - GLboolean eot) + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint msg_length, + GLuint response_length, + GLboolean eot) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); @@ -958,7 +1107,11 @@ void brw_fb_WRITE(struct brw_compile *p, } - +/** + * Texture sample instruction. + * Note: the msg_type plus msg_length values determine exactly what kind + * of sampling operation is performed. See volume 4, page 161 of docs. + */ void brw_SAMPLE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, @@ -973,8 +1126,8 @@ void brw_SAMPLE(struct brw_compile *p, { GLboolean need_stall = 0; - if(writemask == 0) { -/* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ + if (writemask == 0) { + /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ return; } @@ -1006,7 +1159,7 @@ void brw_SAMPLE(struct brw_compile *p, if (newmask != writemask) { need_stall = 1; -/* _mesa_printf("need stall %x %x\n", newmask , writemask); */ + /* _mesa_printf("need stall %x %x\n", newmask , writemask); */ } else { struct brw_reg m1 = brw_message_reg(msg_reg_nr); @@ -1047,14 +1200,13 @@ void brw_SAMPLE(struct brw_compile *p, eot); } - if (need_stall) - { + if (need_stall) { struct brw_reg reg = vec8(offset(dest, response_length-1)); /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } */ brw_push_insn_state(p); - brw_set_compression_control(p, GL_FALSE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, reg, reg); brw_pop_insn_state(p); } diff --git a/i965/brw_fallback.c b/i965/brw_fallback.c index 5f4f2d5..2993574 100644 --- a/i965/brw_fallback.c +++ b/i965/brw_fallback.c @@ -75,7 +75,7 @@ static GLboolean do_check_fallback(struct brw_context *brw) /* _NEW_STENCIL */ - if (ctx->Stencil.Enabled && + if (ctx->Stencil._Enabled && (ctx->DrawBuffer->Name == 0 && !brw->intel.hw_stencil)) { DBG("FALLBACK: stencil\n"); return GL_TRUE; diff --git a/i965/brw_misc_state.c b/i965/brw_misc_state.c index f311663..9bc5c35 100644 --- a/i965/brw_misc_state.c +++ b/i965/brw_misc_state.c @@ -101,6 +101,7 @@ const struct brw_tracked_state brw_drawing_rect = { static void prepare_binding_table_pointers(struct brw_context *brw) { + brw_add_validated_bo(brw, brw->vs.bind_bo); brw_add_validated_bo(brw, brw->wm.bind_bo); } @@ -117,13 +118,11 @@ static void upload_binding_table_pointers(struct brw_context *brw) BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); - OUT_BATCH(0); /* vs */ + OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ OUT_BATCH(0); /* gs */ OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ - OUT_RELOC(brw->wm.bind_bo, - I915_GEM_DOMAIN_SAMPLER, 0, - 0); + OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */ ADVANCE_BATCH(); } @@ -178,7 +177,7 @@ static void upload_psp_urb_cbs(struct brw_context *brw ) { upload_pipelined_state_pointers(brw); brw_upload_urb_fence(brw); - brw_upload_constant_buffer_state(brw); + brw_upload_cs_urb_state(brw); } const struct brw_tracked_state brw_psp_urb_cbs = { diff --git a/i965/brw_program.c b/i965/brw_program.c index 0c86911..bac6918 100644 --- a/i965/brw_program.c +++ b/i965/brw_program.c @@ -38,6 +38,7 @@ #include "brw_context.h" #include "brw_util.h" +#include "brw_wm.h" static void brwBindProgram( GLcontext *ctx, GLenum target, @@ -94,7 +95,12 @@ static struct gl_program *brwNewProgram( GLcontext *ctx, static void brwDeleteProgram( GLcontext *ctx, struct gl_program *prog ) { - + if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; + struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog); + dri_bo_unreference(brw_fprog->const_buffer); + } + _mesa_delete_program( ctx, prog ); } @@ -110,30 +116,36 @@ static void brwProgramStringNotify( GLcontext *ctx, GLenum target, struct gl_program *prog ) { + struct brw_context *brw = brw_context(ctx); + if (target == GL_FRAGMENT_PROGRAM_ARB) { struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; - struct brw_context *brw = brw_context(ctx); - struct brw_fragment_program *p = (struct brw_fragment_program *)prog; - struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; + struct brw_fragment_program *newFP = brw_fragment_program(fprog); + const struct brw_fragment_program *curFP = + brw_fragment_program_const(brw->fragment_program); + if (fprog->FogOption) { _mesa_append_fog_code(ctx, fprog); fprog->FogOption = GL_NONE; } - if (p == fp) + if (newFP == curFP) brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; - p->id = brw->program_id++; + newFP->id = brw->program_id++; + newFP->isGLSL = brw_wm_is_glsl(fprog); } else if (target == GL_VERTEX_PROGRAM_ARB) { - struct brw_context *brw = brw_context(ctx); - struct brw_vertex_program *p = (struct brw_vertex_program *)prog; - struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; - if (p == vp) + struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog; + struct brw_vertex_program *newVP = brw_vertex_program(vprog); + const struct brw_vertex_program *curVP = + brw_vertex_program_const(brw->vertex_program); + + if (newVP == curVP) brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; - if (p->program.IsPositionInvariant) { - _mesa_insert_mvp_code(ctx, &p->program); + if (newVP->program.IsPositionInvariant) { + _mesa_insert_mvp_code(ctx, &newVP->program); } - p->id = brw->program_id++; + newVP->id = brw->program_id++; /* Also tell tnl about it: */ diff --git a/i965/brw_sf_state.c b/i965/brw_sf_state.c index e22d080..c999187 100644 --- a/i965/brw_sf_state.c +++ b/i965/brw_sf_state.c @@ -43,10 +43,12 @@ static void upload_sf_vp(struct brw_context *brw) const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; struct brw_sf_viewport sfv; GLfloat y_scale, y_bias; + const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); + const GLfloat *v = ctx->Viewport._WindowMap.m; memset(&sfv, 0, sizeof(sfv)); - if (intel_rendering_to_texture(ctx)) { + if (render_to_fbo) { y_scale = 1.0; y_bias = 0; } @@ -57,8 +59,6 @@ static void upload_sf_vp(struct brw_context *brw) /* _NEW_VIEWPORT */ - const GLfloat *v = ctx->Viewport._WindowMap.m; - sfv.viewport.m00 = v[MAT_SX]; sfv.viewport.m11 = v[MAT_SY] * y_scale; sfv.viewport.m22 = v[MAT_SZ] * depth_scale; @@ -66,7 +66,9 @@ static void upload_sf_vp(struct brw_context *brw) sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias; sfv.viewport.m32 = v[MAT_TZ] * depth_scale; - /* _NEW_SCISSOR */ + /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT + * for DrawBuffer->_[XY]{min,max} + */ /* The scissor only needs to handle the intersection of drawable and * scissor rect. Clipping to the boundaries of static shared buffers @@ -75,7 +77,7 @@ static void upload_sf_vp(struct brw_context *brw) * Note that the hardware's coordinates are inclusive, while Mesa's min is * inclusive but max is exclusive. */ - if (intel_rendering_to_texture(ctx)) { + if (render_to_fbo) { /* texmemory: Y=0=bottom */ sfv.scissor.xmin = ctx->DrawBuffer->_Xmin; sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1; @@ -97,7 +99,8 @@ static void upload_sf_vp(struct brw_context *brw) const struct brw_tracked_state brw_sf_vp = { .dirty = { .mesa = (_NEW_VIEWPORT | - _NEW_SCISSOR), + _NEW_SCISSOR | + _NEW_BUFFERS), .brw = 0, .cache = 0 }, @@ -111,10 +114,13 @@ struct brw_sf_unit_key { unsigned int nr_urb_entries, urb_size, sfsize; GLenum front_face, cull_face; - GLboolean scissor, line_smooth, point_sprite, point_attenuated; + unsigned scissor:1; + unsigned line_smooth:1; + unsigned point_sprite:1; + unsigned point_attenuated:1; + unsigned render_to_fbo:1; float line_width; float point_size; - GLboolean render_to_texture; }; static void @@ -144,10 +150,10 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) key->line_smooth = ctx->Line.SmoothFlag; key->point_sprite = ctx->Point.PointSprite; - key->point_size = ctx->Point.Size; + key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); key->point_attenuated = ctx->Point._Attenuated; - key->render_to_texture = intel_rendering_to_texture(&brw->intel.ctx); + key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; } static dri_bo * @@ -194,10 +200,10 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, else sf.sf5.front_winding = BRW_FRONTWINDING_CW; - /* The viewport is inverted for rendering to texture, and that inverts + /* The viewport is inverted for rendering to a FBO, and that inverts * polygon front/back orientation. */ - sf.sf5.front_winding ^= key->render_to_texture; + sf.sf5.front_winding ^= key->render_to_fbo; switch (key->cull_face) { case GL_FRONT: @@ -228,7 +234,33 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.sf6.line_width = 0; /* _NEW_POINT */ - sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; /* opengl conventions */ + key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; + if (!key->render_to_fbo) { + /* Rendering to an OpenGL window */ + sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; + } + else { + /* If rendering to an FBO, the pixel coordinate system is + * inverted with respect to the normal OpenGL coordinate + * system, so BRW_RASTRULE_LOWER_RIGHT is correct. + * But this value is listed as "Reserved, but not seen as useful" + * in Intel documentation (page 212, "Point Rasterization Rule", + * section 7.4 "SF Pipeline State Summary", of document + * "Intel® 965 Express Chipset Family and Intel® G35 Express + * Chipset Graphics Controller Programmer's Reference Manual, + * Volume 2: 3D/Media", Revision 1.0b as of January 2008, + * available at + * http://intellinuxgraphics.org/documentation.html + * at the time of this writing). + * + * It does work on at least some devices, if not all; + * if devices that don't support it can be identified, + * the likely failure case is that points are rasterized + * incorrectly, which is no worse than occurs without + * the value, so we're using it here. + */ + sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT; + } /* XXX clamp max depends on AA vs. non-AA */ sf.sf7.sprite_point = key->point_sprite; diff --git a/i965/brw_state.h b/i965/brw_state.h index df839c5..81b0a45 100644 --- a/i965/brw_state.h +++ b/i965/brw_state.h @@ -52,7 +52,6 @@ const struct brw_tracked_state brw_cc_vp; const struct brw_tracked_state brw_check_fallback; const struct brw_tracked_state brw_clip_prog; const struct brw_tracked_state brw_clip_unit; -const struct brw_tracked_state brw_constant_buffer_state; const struct brw_tracked_state brw_constant_buffer; const struct brw_tracked_state brw_curbe_offsets; const struct brw_tracked_state brw_invarient_state; diff --git a/i965/brw_state_batch.c b/i965/brw_state_batch.c index dc87859..811940e 100644 --- a/i965/brw_state_batch.c +++ b/i965/brw_state_batch.c @@ -97,8 +97,6 @@ void brw_clear_batch_cache_flush( struct brw_context *brw ) { clear_batch_cache(brw); -/* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */ - brw->state.dirty.mesa |= ~0; brw->state.dirty.brw |= ~0; brw->state.dirty.cache |= ~0; diff --git a/i965/brw_state_dump.c b/i965/brw_state_dump.c index b28c57c..a713262 100644 --- a/i965/brw_state_dump.c +++ b/i965/brw_state_dump.c @@ -84,6 +84,19 @@ get_965_surfacetype(unsigned int surfacetype) } } +static const char * +get_965_surface_format(unsigned int surface_format) +{ + switch (surface_format) { + case 0x000: return "r32g32b32a32_float"; + case 0x0c1: return "b8g8r8a8_unorm"; + case 0x100: return "b5g6r5_unorm"; + case 0x102: return "b5g5r5a1_unorm"; + case 0x104: return "b4g4r4a4_unorm"; + default: return "unknown"; + } +} + static void dump_wm_surface_state(struct brw_context *brw) { int i; @@ -95,7 +108,7 @@ static void dump_wm_surface_state(struct brw_context *brw) char name[20]; if (surf_bo == NULL) { - fprintf(stderr, "WM SS%d: NULL\n", i); + fprintf(stderr, " WM SS%d: NULL\n", i); continue; } dri_bo_map(surf_bo, GL_FALSE); @@ -103,8 +116,9 @@ static void dump_wm_surface_state(struct brw_context *brw) surf = (struct brw_surface_state *)(surf_bo->virtual); sprintf(name, "WM SS%d", i); - state_out(name, surf, surfoff, 0, "%s\n", - get_965_surfacetype(surf->ss0.surface_type)); + state_out(name, surf, surfoff, 0, "%s %s\n", + get_965_surfacetype(surf->ss0.surface_type), + get_965_surface_format(surf->ss0.surface_format)); state_out(name, surf, surfoff, 1, "offset\n"); state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count); @@ -162,6 +176,14 @@ static void brw_debug_prog(const char *name, dri_bo *prog) fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", name, (unsigned int)prog->offset + i * 4 * 4, data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); + /* Stop at the end of the program. It'd be nice to keep track of the actual + * intended program size instead of guessing like this. + */ + if (data[i * 4 + 0] == 0 && + data[i * 4 + 1] == 0 && + data[i * 4 + 2] == 0 && + data[i * 4 + 3] == 0) + break; } dri_bo_unmap(prog); diff --git a/i965/brw_structs.h b/i965/brw_structs.h index 4e577d0..89e2981 100644 --- a/i965/brw_structs.h +++ b/i965/brw_structs.h @@ -439,7 +439,7 @@ struct brw_urb_fence } bits1; }; -struct brw_constant_buffer_state /* previously brw_command_streamer */ +struct brw_cs_urb_state { struct header header; @@ -1031,10 +1031,10 @@ struct brw_surface_state GLuint writedisable_green:1; GLuint writedisable_red:1; GLuint writedisable_alpha:1; - GLuint surface_format:9; + GLuint surface_format:9; /**< BRW_SURFACEFORMAT_x */ GLuint data_return_format:1; GLuint pad0:1; - GLuint surface_type:3; + GLuint surface_type:3; /**< BRW_SURFACE_1D/2D/3D/CUBE */ } ss0; struct { diff --git a/i965/brw_tex.c b/i965/brw_tex.c index ef99e9c..71bff16 100644 --- a/i965/brw_tex.c +++ b/i965/brw_tex.c @@ -32,21 +32,12 @@ #include "main/glheader.h" #include "main/mtypes.h" -#include "main/imports.h" -#include "main/simple_list.h" -#include "main/enums.h" -#include "main/image.h" #include "main/teximage.h" -#include "main/texstore.h" -#include "main/texformat.h" - -#include "texmem.h" #include "intel_context.h" #include "intel_regions.h" #include "intel_tex.h" #include "brw_context.h" -#include "brw_defines.h" void brw_FrameBufferTexInit( struct brw_context *brw, diff --git a/i965/brw_vs.h b/i965/brw_vs.h index 99d0e93..1e4f660 100644 --- a/i965/brw_vs.h +++ b/i965/brw_vs.h @@ -75,6 +75,11 @@ struct brw_vs_compile { struct brw_reg userplane[6]; + /** we may need up to 3 constants per instruction (if use_const_buffer) */ + struct { + GLint index; + struct brw_reg reg; + } current_const[3]; }; void brw_vs_emit( struct brw_vs_compile *c ); diff --git a/i965/brw_vs_constval.c b/i965/brw_vs_constval.c index 9977677..249a800 100644 --- a/i965/brw_vs_constval.c +++ b/i965/brw_vs_constval.c @@ -39,8 +39,8 @@ */ struct tracker { GLboolean twoside; - GLubyte active[PROGRAM_OUTPUT+1][128]; - GLuint size_masks[4]; + GLubyte active[PROGRAM_OUTPUT+1][MAX_PROGRAM_TEMPS]; + GLbitfield size_masks[4]; /**< one bit per fragment program input attrib */ }; @@ -53,8 +53,10 @@ static void set_active_component( struct tracker *t, case PROGRAM_TEMPORARY: case PROGRAM_INPUT: case PROGRAM_OUTPUT: + assert(file < PROGRAM_OUTPUT + 1); + assert(index < Elements(t->active[0])); t->active[file][index] |= active; - + break; default: break; } @@ -96,7 +98,7 @@ static GLubyte get_active( struct tracker *t, struct prog_src_register src ) { GLuint i; - GLubyte active = src.NegateBase; /* NOTE! */ + GLubyte active = src.Negate; /* NOTE! */ if (src.RelAddr) return 0xf; @@ -108,10 +110,15 @@ static GLubyte get_active( struct tracker *t, return active; } +/** + * Return the size (1,2,3 or 4) of the output/result for VERT_RESULT_idx. + */ static GLubyte get_output_size( struct tracker *t, GLuint idx ) { - GLubyte active = t->active[PROGRAM_OUTPUT][idx]; + GLubyte active; + assert(idx < VERT_RESULT_MAX); + active = t->active[PROGRAM_OUTPUT][idx]; if (active & (1<<3)) return 4; if (active & (1<<2)) return 3; if (active & (1<<1)) return 2; @@ -123,7 +130,7 @@ static GLubyte get_output_size( struct tracker *t, */ static void calc_sizes( struct tracker *t ) { - GLuint i; + GLint vertRes; if (t->twoside) { t->active[PROGRAM_OUTPUT][VERT_RESULT_COL0] |= @@ -133,12 +140,27 @@ static void calc_sizes( struct tracker *t ) t->active[PROGRAM_OUTPUT][VERT_RESULT_BFC1]; } - for (i = 0; i < FRAG_ATTRIB_MAX; i++) { - switch (get_output_size(t, i)) { - case 4: t->size_masks[4-1] |= 1<<i; - case 3: t->size_masks[3-1] |= 1<<i; - case 2: t->size_masks[2-1] |= 1<<i; - case 1: t->size_masks[1-1] |= 1<<i; + /* Examine vertex program output sizes to set the size_masks[] info + * which describes the fragment program input sizes. + */ + for (vertRes = VERT_RESULT_TEX0; vertRes < VERT_RESULT_MAX; vertRes++) { + GLint fragAttrib; + + /* map vertex program output index to fragment program input index */ + if (vertRes <= VERT_RESULT_TEX7) + fragAttrib = FRAG_ATTRIB_TEX0 + vertRes - VERT_RESULT_TEX0; + else if (vertRes >= VERT_RESULT_VAR0) + fragAttrib = FRAG_ATTRIB_VAR0 + vertRes - VERT_RESULT_VAR0; + else + continue; + assert(fragAttrib >= FRAG_ATTRIB_TEX0); + assert(fragAttrib <= FRAG_ATTRIB_MAX); + + switch (get_output_size(t, vertRes)) { + case 4: t->size_masks[4-1] |= 1 << fragAttrib; + case 3: t->size_masks[3-1] |= 1 << fragAttrib; + case 2: t->size_masks[2-1] |= 1 << fragAttrib; + case 1: t->size_masks[1-1] |= 1 << fragAttrib; break; } } @@ -170,8 +192,8 @@ static void calc_wm_input_sizes( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; /* BRW_NEW_VERTEX_PROGRAM */ - struct brw_vertex_program *vp = - (struct brw_vertex_program *)brw->vertex_program; + const struct brw_vertex_program *vp = + brw_vertex_program_const(brw->vertex_program); /* BRW_NEW_INPUT_DIMENSIONS */ struct tracker t; GLuint insn; diff --git a/i965/brw_vs_emit.c b/i965/brw_vs_emit.c index 235f826..b69616d 100644 --- a/i965/brw_vs_emit.c +++ b/i965/brw_vs_emit.c @@ -38,18 +38,49 @@ #include "brw_vs.h" +static struct brw_reg get_tmp( struct brw_vs_compile *c ) +{ + struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} -/* Do things as simply as possible. Allocate and populate all regs +static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + +static void release_tmps( struct brw_vs_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + + +/** + * Preallocate GRF register before code emit. + * Do things as simply as possible. Allocate and populate all regs * ahead of time. */ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { GLuint i, reg = 0, mrf; - GLuint nr_params; + +#if 0 + if (c->vp->program.Base.Parameters->NumParameters >= 6) + c->vp->use_const_buffer = 1; + else +#endif + c->vp->use_const_buffer = GL_FALSE; + /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/ /* r0 -- reserved as usual */ - c->r0 = brw_vec8_grf(reg, 0); reg++; + c->r0 = brw_vec8_grf(reg, 0); + reg++; /* User clip planes from curbe: */ @@ -60,24 +91,33 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* Deal with curbe alignment: */ - reg += ((6+c->key.nr_userclip+3)/4)*2; + reg += ((6 + c->key.nr_userclip + 3) / 4) * 2; } /* Vertex program parameters from curbe: */ - nr_params = c->vp->program.Base.Parameters->NumParameters; - for (i = 0; i < nr_params; i++) { - c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); - } - reg += (nr_params+1)/2; + if (c->vp->use_const_buffer) { + /* get constants from a real constant buffer */ + c->prog_data.curb_read_length = 0; + c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */ + } + else { + /* use a section of the GRF for constants */ + GLuint nr_params = c->vp->program.Base.Parameters->NumParameters; + for (i = 0; i < nr_params; i++) { + c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); + } + reg += (nr_params + 1) / 2; + c->prog_data.curb_read_length = reg - 1; - c->prog_data.curb_read_length = reg - 1; + c->prog_data.nr_params = nr_params * 4; + } /* Allocate input regs: */ c->nr_inputs = 0; for (i = 0; i < VERT_ATTRIB_MAX; i++) { - if (c->prog_data.inputs_read & (1<<i)) { + if (c->prog_data.inputs_read & (1 << i)) { c->nr_inputs++; c->regs[PROGRAM_INPUT][i] = brw_vec8_grf(reg, 0); reg++; @@ -91,7 +131,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->first_output = reg; mrf = 4; for (i = 0; i < VERT_RESULT_MAX; i++) { - if (c->prog_data.outputs_written & (1<<i)) { + if (c->prog_data.outputs_written & (1 << i)) { c->nr_outputs++; if (i == VERT_RESULT_HPOS) { c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); @@ -132,17 +172,24 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) reg++; } + if (c->vp->use_const_buffer) { + for (i = 0; i < 3; i++) { + c->current_const[i].index = -1; + c->current_const[i].reg = brw_vec8_grf(reg, 0); + reg++; + } + } + for (i = 0; i < 128; i++) { - if (c->output_regs[i].used_in_src) { - c->output_regs[i].reg = brw_vec8_grf(reg, 0); - reg++; - } + if (c->output_regs[i].used_in_src) { + c->output_regs[i].reg = brw_vec8_grf(reg, 0); + reg++; + } } c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); reg += 2; - - + /* Some opcodes need an internal temporary: */ c->first_tmp = reg; @@ -152,35 +199,23 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) * urb_read_length is the number of registers read from *each* * vertex urb, so is half the amount: */ - c->prog_data.urb_read_length = (c->nr_inputs+1)/2; + c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2; - c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4; + c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4; c->prog_data.total_grf = reg; -} - - -static struct brw_reg get_tmp( struct brw_vs_compile *c ) -{ - struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); - if (++c->last_tmp > c->prog_data.total_grf) - c->prog_data.total_grf = c->last_tmp; - - return tmp; -} - -static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) -{ - if (tmp.nr == c->last_tmp-1) - c->last_tmp--; -} - -static void release_tmps( struct brw_vs_compile *c ) -{ - c->last_tmp = c->first_tmp; + if (INTEL_DEBUG & DEBUG_VS) { + _mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs); + _mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries); + _mesa_printf("%s reg = %d\n", __FUNCTION__, reg); + } } +/** + * If an instruction uses a temp reg both as a src and the dest, we + * sometimes need to allocate an intermediate temporary. + */ static void unalias1( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, @@ -200,6 +235,10 @@ static void unalias1( struct brw_vs_compile *c, } } +/** + * \sa unalias2 + * Checkes if 2-operand instruction needs an intermediate temporary. + */ static void unalias2( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, @@ -222,6 +261,10 @@ static void unalias2( struct brw_vs_compile *c, } } +/** + * \sa unalias2 + * Checkes if 3-operand instruction needs an intermediate temporary. + */ static void unalias3( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, @@ -615,6 +658,8 @@ static void emit_lit_noalias( struct brw_vs_compile *c, } brw_ENDIF(p, if_insn); + + release_tmp(c, tmp); } static void emit_lrp_noalias(struct brw_vs_compile *c, @@ -655,13 +700,83 @@ static void emit_nrm( struct brw_vs_compile *c, } +static struct brw_reg +get_constant(struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex) +{ + const struct prog_src_register *src = &inst->SrcReg[argIndex]; + struct brw_compile *p = &c->func; + struct brw_reg const_reg; + struct brw_reg const2_reg; + + assert(argIndex < 3); + + if (c->current_const[argIndex].index != src->Index || src->RelAddr) { + struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; + + c->current_const[argIndex].index = src->Index; + +#if 0 + printf(" fetch const[%d] for arg %d into reg %d\n", + src->Index, argIndex, c->current_const[argIndex].reg.nr); +#endif + /* need to fetch the constant now */ + brw_dp_READ_4_vs(p, + c->current_const[argIndex].reg,/* writeback dest */ + 0, /* oword */ + src->RelAddr, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ + ); + + if (src->RelAddr) { + /* second read */ + const2_reg = get_tmp(c); + + /* use upper half of address reg for second read */ + addrReg = stride(addrReg, 0, 4, 0); + addrReg.subnr = 16; + + brw_dp_READ_4_vs(p, + const2_reg, /* writeback dest */ + 1, /* oword */ + src->RelAddr, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER + ); + } + } + + const_reg = c->current_const[argIndex].reg; + + if (src->RelAddr) { + /* merge the two Owords into the constant register */ + /* const_reg[7..4] = const2_reg[7..4] */ + brw_MOV(p, + suboffset(stride(const_reg, 0, 4, 1), 4), + suboffset(stride(const2_reg, 0, 4, 1), 4)); + release_tmp(c, const2_reg); + } + else { + /* replicate lower four floats into upper half (to get XYZWXYZW) */ + const_reg = stride(const_reg, 0, 4, 0); + const_reg.subnr = 0; + } + + return const_reg; +} + + + /* TODO: relative addressing! */ static struct brw_reg get_reg( struct brw_vs_compile *c, - GLuint file, + gl_register_file file, GLuint index ) { - switch (file) { case PROGRAM_TEMPORARY: case PROGRAM_INPUT: @@ -690,13 +805,17 @@ static struct brw_reg get_reg( struct brw_vs_compile *c, } +/** + * Indirect addressing: get reg[[arg] + offset]. + */ static struct brw_reg deref( struct brw_vs_compile *c, struct brw_reg arg, GLint offset) { struct brw_compile *p = &c->func; struct brw_reg tmp = vec4(get_tmp(c)); - struct brw_reg vp_address = retype(vec1(get_reg(c, PROGRAM_ADDRESS, 0)), BRW_REGISTER_TYPE_UW); + struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0]; + struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW); GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16; struct brw_reg indirect = brw_vec4_indirect(0,0); @@ -717,10 +836,67 @@ static struct brw_reg deref( struct brw_vs_compile *c, brw_pop_insn_state(p); } + /* NOTE: tmp not released */ return vec8(tmp); } +/** + * Get brw reg corresponding to the instruction's [argIndex] src reg. + * TODO: relative addressing! + */ +static struct brw_reg +get_src_reg( struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex ) +{ + const GLuint file = inst->SrcReg[argIndex].File; + const GLint index = inst->SrcReg[argIndex].Index; + const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr; + + switch (file) { + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + case PROGRAM_OUTPUT: + if (relAddr) { + return deref(c, c->regs[file][0], index); + } + else { + assert(c->regs[file][index].nr != 0); + return c->regs[file][index]; + } + + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + if (c->vp->use_const_buffer) { + return get_constant(c, inst, argIndex); + } + else if (relAddr) { + return deref(c, c->regs[PROGRAM_STATE_VAR][0], index); + } + else { + assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); + return c->regs[PROGRAM_STATE_VAR][index]; + } + case PROGRAM_ADDRESS: + assert(index == 0); + return c->regs[file][index]; + + case PROGRAM_UNDEFINED: + /* this is a normal case since we loop over all three src args */ + return brw_null_reg(); + + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_WRITE_ONLY: + default: + assert(0); + return brw_null_reg(); + } +} + + static void emit_arl( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0 ) @@ -732,30 +908,31 @@ static void emit_arl( struct brw_vs_compile *c, if (need_tmp) tmp = get_tmp(c); - brw_RNDD(p, tmp, arg0); - brw_MUL(p, dst, tmp, brw_imm_d(16)); + brw_RNDD(p, tmp, arg0); /* tmp = round(arg0) */ + brw_MUL(p, dst, tmp, brw_imm_d(16)); /* dst = tmp * 16 */ if (need_tmp) release_tmp(c, tmp); } -/* Will return mangled results for SWZ op. The emit_swz() function +/** + * Return the brw reg for the given instruction's src argument. + * Will return mangled results for SWZ op. The emit_swz() function * ignores this result and recalculates taking extended swizzles into * account. */ static struct brw_reg get_arg( struct brw_vs_compile *c, - struct prog_src_register *src ) + const struct prog_instruction *inst, + GLuint argIndex ) { + const struct prog_src_register *src = &inst->SrcReg[argIndex]; struct brw_reg reg; if (src->File == PROGRAM_UNDEFINED) return brw_null_reg(); - if (src->RelAddr) - reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index); - else - reg = get_reg(c, src->File, src->Index); + reg = get_src_reg(c, inst, argIndex); /* Convert 3-bit swizzle to 2-bit. */ @@ -766,16 +943,38 @@ static struct brw_reg get_arg( struct brw_vs_compile *c, /* Note this is ok for non-swizzle instructions: */ - reg.negate = src->NegateBase ? 1 : 0; + reg.negate = src->Negate ? 1 : 0; return reg; } +/** + * Get brw register for the given program dest register. + */ static struct brw_reg get_dst( struct brw_vs_compile *c, struct prog_dst_register dst ) { - struct brw_reg reg = get_reg(c, dst.File, dst.Index); + struct brw_reg reg; + + switch (dst.File) { + case PROGRAM_TEMPORARY: + case PROGRAM_OUTPUT: + assert(c->regs[dst.File][dst.Index].nr != 0); + reg = c->regs[dst.File][dst.Index]; + break; + case PROGRAM_ADDRESS: + assert(dst.Index == 0); + reg = c->regs[dst.File][dst.Index]; + break; + case PROGRAM_UNDEFINED: + /* we may hit this for OPCODE_END, OPCODE_KIL, etc */ + reg = brw_null_reg(); + break; + default: + assert(0); + reg = brw_null_reg(); + } reg.dw1.bits.writemask = dst.WriteMask; @@ -785,14 +984,16 @@ static struct brw_reg get_dst( struct brw_vs_compile *c, static void emit_swz( struct brw_vs_compile *c, struct brw_reg dst, - struct prog_src_register src ) + const struct prog_instruction *inst) { + const GLuint argIndex = 0; + const struct prog_src_register src = inst->SrcReg[argIndex]; struct brw_compile *p = &c->func; GLuint zeros_mask = 0; GLuint ones_mask = 0; GLuint src_mask = 0; GLubyte src_swz[4]; - GLboolean need_tmp = (src.NegateBase && + GLboolean need_tmp = (src.Negate && dst.file != BRW_GENERAL_REGISTER_FILE); struct brw_reg tmp = dst; GLuint i; @@ -826,10 +1027,7 @@ static void emit_swz( struct brw_vs_compile *c, if (src_mask) { struct brw_reg arg0; - if (src.RelAddr) - arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index); - else - arg0 = get_reg(c, src.File, src.Index); + arg0 = get_src_reg(c, inst, argIndex); arg0 = brw_swizzle(arg0, src_swz[0], src_swz[1], @@ -844,8 +1042,8 @@ static void emit_swz( struct brw_vs_compile *c, if (ones_mask) brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1)); - if (src.NegateBase) - brw_MOV(p, brw_writemask(tmp, src.NegateBase), negate(tmp)); + if (src.Negate) + brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp)); if (need_tmp) { brw_MOV(p, dst, tmp); @@ -975,7 +1173,7 @@ post_vs_emit( struct brw_vs_compile *c, } -/* Emit the fragment program instructions here. +/* Emit the vertex program instructions here. */ void brw_vs_emit(struct brw_vs_compile *c ) { @@ -1025,6 +1223,11 @@ void brw_vs_emit(struct brw_vs_compile *c ) struct brw_reg args[3], dst; GLuint i; +#if 0 + printf("%d: ", insn); + _mesa_print_instruction(inst); +#endif + /* Get argument regs. SWZ is special and does this itself. */ if (inst->Opcode != OPCODE_SWZ) @@ -1032,10 +1235,10 @@ void brw_vs_emit(struct brw_vs_compile *c ) struct prog_src_register *src = &inst->SrcReg[i]; index = src->Index; file = src->File; - if (file == PROGRAM_OUTPUT&&c->output_regs[index].used_in_src) + if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) args[i] = c->output_regs[index].reg; else - args[i] = get_arg(c, src); + args[i] = get_arg(c, inst, i); } /* Get dest regs. Note that it is possible for a reg to be both @@ -1163,7 +1366,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) /* The args[0] value can't be used here as it won't have * correctly encoded the full swizzle: */ - emit_swz(c, dst, inst->SrcReg[0] ); + emit_swz(c, dst, inst); break; case OPCODE_TRUNC: /* round toward zero */ diff --git a/i965/brw_vs_state.c b/i965/brw_vs_state.c index 1a63766..3d29538 100644 --- a/i965/brw_vs_state.c +++ b/i965/brw_vs_state.c @@ -44,6 +44,8 @@ struct brw_vs_unit_key { unsigned int curbe_offset; unsigned int nr_urb_entries, urb_size; + + unsigned int nr_surfaces; }; static void @@ -62,6 +64,9 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) key->nr_urb_entries = brw->urb.nr_vs_entries; key->urb_size = brw->urb.vsize; + /* BRW_NEW_NR_VS_SURFACES */ + key->nr_surfaces = brw->vs.nr_surfaces; + /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ if (ctx->Transform.ClipPlanesEnabled) { /* Note that we read in the userclip planes as well, hence @@ -92,6 +97,8 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) * brw_urb_WRITE() results. */ vs.thread1.single_program_flow = 0; + vs.thread1.binding_table_entry_count = key->nr_surfaces; + vs.thread3.urb_entry_read_length = key->urb_entry_read_length; vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length; vs.thread3.dispatch_grf_start_reg = 1; @@ -158,6 +165,7 @@ const struct brw_tracked_state brw_vs_unit = { .dirty = { .mesa = _NEW_TRANSFORM, .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_NR_VS_SURFACES | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_VS_PROG }, diff --git a/i965/brw_vtbl.c b/i965/brw_vtbl.c index e69d4c5..ba03afd 100644 --- a/i965/brw_vtbl.c +++ b/i965/brw_vtbl.c @@ -23,14 +23,12 @@ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - +**********************************************************************/ +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ #include "main/glheader.h" #include "main/mtypes.h" @@ -44,12 +42,11 @@ #include "brw_context.h" #include "brw_defines.h" #include "brw_state.h" - #include "brw_draw.h" #include "brw_state.h" #include "brw_fallback.h" #include "brw_vs.h" -#include <stdarg.h> + static void dri_bo_release(dri_bo **bo) @@ -58,7 +55,9 @@ dri_bo_release(dri_bo **bo) *bo = NULL; } -/* called from intelDestroyContext() + +/** + * called from intelDestroyContext() */ static void brw_destroy_context( struct intel_context *intel ) { @@ -68,16 +67,19 @@ static void brw_destroy_context( struct intel_context *intel ) brw_destroy_state(brw); brw_draw_destroy( brw ); + _mesa_free(brw->wm.compile_data); + brw_FrameBufferTexDestroy( brw ); - for (i = 0; i < brw->state.nr_draw_regions; i++) - intel_region_release(&brw->state.draw_regions[i]); - brw->state.nr_draw_regions = 0; + for (i = 0; i < brw->state.nr_color_regions; i++) + intel_region_release(&brw->state.color_regions[i]); + brw->state.nr_color_regions = 0; intel_region_release(&brw->state.depth_region); dri_bo_release(&brw->curbe.curbe_bo); dri_bo_release(&brw->vs.prog_bo); dri_bo_release(&brw->vs.state_bo); + dri_bo_release(&brw->vs.bind_bo); dri_bo_release(&brw->gs.prog_bo); dri_bo_release(&brw->gs.state_bo); dri_bo_release(&brw->clip.prog_bo); @@ -91,6 +93,7 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->wm.bind_bo); for (i = 0; i < BRW_WM_MAX_SURF; i++) dri_bo_release(&brw->wm.surf_bo[i]); + dri_bo_release(&brw->wm.sampler_bo); dri_bo_release(&brw->wm.prog_bo); dri_bo_release(&brw->wm.state_bo); dri_bo_release(&brw->cc.prog_bo); @@ -98,37 +101,46 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->cc.vp_bo); } -/* called from intelDrawBuffer() + +/** + * called from intelDrawBuffer() */ static void brw_set_draw_region( struct intel_context *intel, - struct intel_region *draw_regions[], - struct intel_region *depth_region, - GLuint num_regions) + struct intel_region *color_regions[], + struct intel_region *depth_region, + GLuint num_color_regions) { struct brw_context *brw = brw_context(&intel->ctx); - int i; + GLuint i; + + /* release old color/depth regions */ if (brw->state.depth_region != depth_region) brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER; - for (i = 0; i < brw->state.nr_draw_regions; i++) - intel_region_release(&brw->state.draw_regions[i]); + for (i = 0; i < brw->state.nr_color_regions; i++) + intel_region_release(&brw->state.color_regions[i]); intel_region_release(&brw->state.depth_region); - for (i = 0; i < num_regions; i++) - intel_region_reference(&brw->state.draw_regions[i], draw_regions[i]); + + /* reference new color/depth regions */ + for (i = 0; i < num_color_regions; i++) + intel_region_reference(&brw->state.color_regions[i], color_regions[i]); intel_region_reference(&brw->state.depth_region, depth_region); - brw->state.nr_draw_regions = num_regions; + brw->state.nr_color_regions = num_color_regions; } -/* called from intel_batchbuffer_flush and children before sending a + +/** + * called from intel_batchbuffer_flush and children before sending a * batchbuffer off. */ static void brw_finish_batch(struct intel_context *intel) { struct brw_context *brw = brw_context(&intel->ctx); - brw_emit_query_end(brw); } -/* called from intelFlushBatchLocked + +/** + * called from intelFlushBatchLocked */ static void brw_new_batch( struct intel_context *intel ) { @@ -159,39 +171,20 @@ static void brw_new_batch( struct intel_context *intel ) } } -static void brw_note_fence( struct intel_context *intel, - GLuint fence ) + +static void brw_note_fence( struct intel_context *intel, GLuint fence ) { brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; } - + + static void brw_note_unlock( struct intel_context *intel ) { struct brw_context *brw = brw_context(&intel->ctx); - brw_state_cache_check_size(brw); } -void brw_do_flush( struct brw_context *brw, - GLuint flags ) -{ - struct brw_mi_flush flush; - memset(&flush, 0, sizeof(flush)); - flush.opcode = CMD_MI_FLUSH; - flush.flags = flags; - BRW_BATCH_STRUCT(brw, &flush); -} - - -static void brw_emit_flush( struct intel_context *intel, - GLuint unused ) -{ - brw_do_flush(brw_context(&intel->ctx), - BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); -} - - /* called from intelWaitForIdle() and intelFlush() * * For now, just flush everything. Could be smarter later. @@ -205,6 +198,7 @@ static GLuint brw_flush_cmd( void ) return *(GLuint *)&flush; } + static void brw_invalidate_state( struct intel_context *intel, GLuint new_state ) { /* nothing */ @@ -214,20 +208,18 @@ static void brw_invalidate_state( struct intel_context *intel, GLuint new_state void brwInitVtbl( struct brw_context *brw ) { brw->intel.vtbl.check_vertex_size = 0; - brw->intel.vtbl.emit_state = 0; - brw->intel.vtbl.reduced_primitive_state = 0; + brw->intel.vtbl.emit_state = 0; + brw->intel.vtbl.reduced_primitive_state = 0; brw->intel.vtbl.render_start = 0; - brw->intel.vtbl.update_texture_state = 0; + brw->intel.vtbl.update_texture_state = 0; - brw->intel.vtbl.invalidate_state = brw_invalidate_state; - brw->intel.vtbl.note_fence = brw_note_fence; - brw->intel.vtbl.note_unlock = brw_note_unlock; + brw->intel.vtbl.invalidate_state = brw_invalidate_state; + brw->intel.vtbl.note_fence = brw_note_fence; + brw->intel.vtbl.note_unlock = brw_note_unlock; brw->intel.vtbl.new_batch = brw_new_batch; brw->intel.vtbl.finish_batch = brw_finish_batch; brw->intel.vtbl.destroy = brw_destroy_context; brw->intel.vtbl.set_draw_region = brw_set_draw_region; brw->intel.vtbl.flush_cmd = brw_flush_cmd; - brw->intel.vtbl.emit_flush = brw_emit_flush; brw->intel.vtbl.debug_batch = brw_debug_batch; } - diff --git a/i965/brw_wm.c b/i965/brw_wm.c index c6791da..8a3b7df 100644 --- a/i965/brw_wm.c +++ b/i965/brw_wm.c @@ -82,6 +82,58 @@ GLuint brw_wm_is_scalar_result( GLuint opcode ) } +/** + * Do GPU code generation for non-GLSL shader. non-GLSL shaders have + * no flow control instructions so we can more readily do SSA-style + * optimizations. + */ +static void +brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) +{ + /* Augment fragment program. Add instructions for pre- and + * post-fragment-program tasks such as interpolation and fogging. + */ + brw_wm_pass_fp(c); + + /* Translate to intermediate representation. Build register usage + * chains. + */ + brw_wm_pass0(c); + + /* Dead code removal. + */ + brw_wm_pass1(c); + + /* Register allocation. + * Divide by two because we operate on 16 pixels at a time and require + * two GRF entries for each logical shader register. + */ + c->grf_limit = BRW_WM_MAX_GRF / 2; + + brw_wm_pass2(c); + + /* how many general-purpose registers are used */ + c->prog_data.total_grf = c->max_wm_grf; + + /* Scratch space is used for register spilling */ + if (c->last_scratch) { + c->prog_data.total_scratch = c->last_scratch + 0x40; + } + else { + c->prog_data.total_scratch = 0; + } + + /* Emit GEN4 code. + */ + brw_wm_emit(c); +} + + +/** + * All Mesa program -> GPU code generation goes through this function. + * Depending on the instructions used (i.e. flow control instructions) + * we'll use one of two code generators. + */ static void do_wm_prog( struct brw_context *brw, struct brw_fragment_program *fp, struct brw_wm_prog_key *key) @@ -92,52 +144,39 @@ static void do_wm_prog( struct brw_context *brw, c = brw->wm.compile_data; if (c == NULL) { - brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); - c = brw->wm.compile_data; + brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); + c = brw->wm.compile_data; + if (c == NULL) { + /* Ouch - big out of memory problem. Can't continue + * without triggering a segfault, no way to signal, + * so just return. + */ + return; + } } else { - memset(c, 0, sizeof(*brw->wm.compile_data)); + memset(c, 0, sizeof(*brw->wm.compile_data)); } memcpy(&c->key, key, sizeof(*key)); c->fp = fp; c->env_param = brw->intel.ctx.FragmentProgram.Parameters; - brw_init_compile(brw, &c->func); - if (brw_wm_is_glsl(&c->fp->program)) { - brw_wm_glsl_emit(brw, c); - } else { - /* Augment fragment program. Add instructions for pre- and - * post-fragment-program tasks such as interpolation and fogging. - */ - brw_wm_pass_fp(c); - - /* Translate to intermediate representation. Build register usage - * chains. - */ - brw_wm_pass0(c); - - /* Dead code removal. - */ - brw_wm_pass1(c); - - /* Register allocation. - */ - c->grf_limit = BRW_WM_MAX_GRF/2; - - brw_wm_pass2(c); - - c->prog_data.total_grf = c->max_wm_grf; - if (c->last_scratch) { - c->prog_data.total_scratch = - c->last_scratch + 0x40; - } else { - c->prog_data.total_scratch = 0; - } - - /* Emit GEN4 code. - */ - brw_wm_emit(c); + brw_init_compile(brw, &c->func); + + /* temporary sanity check assertion */ + ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program)); + + /* + * Shader which use GLSL features such as flow control are handled + * differently from "simple" shaders. + */ + if (fp->isGLSL) { + brw_wm_glsl_emit(brw, c); } + else { + brw_wm_non_glsl_emit(brw, c); + } + if (INTEL_DEBUG & DEBUG_WM) fprintf(stderr, "\n"); @@ -161,7 +200,7 @@ static void brw_wm_populate_key( struct brw_context *brw, { GLcontext *ctx = &brw->intel.ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ - struct brw_fragment_program *fp = + const struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; GLuint lookup = 0; GLuint line_aa; @@ -176,7 +215,7 @@ static void brw_wm_populate_key( struct brw_context *brw, ctx->Color.AlphaEnabled) lookup |= IZ_PS_KILL_ALPHATEST_BIT; - if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPR)) + if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPTH)) lookup |= IZ_PS_COMPUTES_DEPTH_BIT; /* _NEW_DEPTH */ @@ -188,7 +227,7 @@ static void brw_wm_populate_key( struct brw_context *brw, lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; /* _NEW_STENCIL */ - if (ctx->Stencil.Enabled) { + if (ctx->Stencil._Enabled) { lookup |= IZ_STENCIL_TEST_ENABLE_BIT; if (ctx->Stencil.WriteMask[0] || @@ -228,7 +267,7 @@ static void brw_wm_populate_key( struct brw_context *brw, /* BRW_NEW_WM_INPUT_DIMENSIONS */ - key->projtex_mask = brw->wm.input_size_masks[4-1] >> (FRAG_ATTRIB_TEX0 - FRAG_ATTRIB_WPOS); + key->proj_attrib_mask = brw->wm.input_size_masks[4-1]; /* _NEW_LIGHT */ key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); @@ -245,6 +284,11 @@ static void brw_wm_populate_key( struct brw_context *brw, if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR) key->yuvtex_swap_mask |= 1 << i; } + + key->tex_swizzles[i] = t->_Swizzle; + } + else { + key->tex_swizzles[i] = SWIZZLE_NOOP; } } @@ -275,10 +319,11 @@ static void brw_wm_populate_key( struct brw_context *brw, key->drawable_height = brw->intel.driDrawable->h; } - /* Extra info: - */ - key->program_string_id = fp->id; + /* CACHE_NEW_VS_PROG */ + key->vp_outputs_written = brw->vs.prog_data->outputs_written & DO_SETUP_BITS; + /* The unique fragment program ID */ + key->program_string_id = fp->id; } @@ -302,8 +347,6 @@ static void brw_prepare_wm_prog(struct brw_context *brw) } -/* See brw_wm.c: - */ const struct brw_tracked_state brw_wm_prog = { .dirty = { .mesa = (_NEW_COLOR | @@ -317,7 +360,7 @@ const struct brw_tracked_state brw_wm_prog = { .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_WM_INPUT_DIMENSIONS | BRW_NEW_REDUCED_PRIMITIVE), - .cache = 0 + .cache = CACHE_NEW_VS_PROG, }, .prepare = brw_prepare_wm_prog }; diff --git a/i965/brw_wm.h b/i965/brw_wm.h index 3cbdf81..295fed8 100644 --- a/i965/brw_wm.h +++ b/i965/brw_wm.h @@ -65,15 +65,17 @@ struct brw_wm_prog_key { GLuint flat_shade:1; GLuint runtime_check_aads_emit:1; - GLuint projtex_mask:16; + GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */ GLuint shadowtex_mask:16; GLuint yuvtex_mask:16; GLuint yuvtex_swap_mask:16; /* UV swaped */ - // GLuint pad1:16; + + GLuint tex_swizzles[BRW_MAX_TEX_UNIT]; GLuint program_string_id:32; GLuint origin_x, origin_y; GLuint drawable_height; + GLuint vp_outputs_written; }; @@ -142,13 +144,12 @@ struct brw_wm_instruction { GLuint writemask:4; GLuint tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */ GLuint tex_idx:3; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */ + GLuint tex_shadow:1; /* do shadow comparison? */ GLuint eot:1; /* End of thread indicator for FB_WRITE*/ GLuint target:10; /* target binding table index for FB_WRITE*/ }; -#define PROGRAM_INTERNAL_PARAM - #define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3) #define BRW_WM_MAX_GRF 128 /* hardware limit */ #define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) @@ -240,17 +241,25 @@ struct brw_wm_compile { GLuint max_wm_grf; GLuint last_scratch; + /** Mapping from Mesa registers to hardware registers */ struct { - GLboolean inited; - struct brw_reg reg; + GLboolean inited; + struct brw_reg reg; } wm_regs[PROGRAM_PAYLOAD+1][256][4]; + struct brw_reg stack; struct brw_reg emit_mask_reg; - GLuint reg_index; + GLuint reg_index; /**< Index of next free GRF register */ GLuint tmp_regs[BRW_WM_MAX_GRF]; GLuint tmp_index; GLuint tmp_max; GLuint subroutines[BRW_WM_MAX_SUBROUTINE]; + + /** we may need up to 3 constants per instruction (if use_const_buffer) */ + struct { + GLint index; + struct brw_reg reg; + } current_const[3]; }; diff --git a/i965/brw_wm_emit.c b/i965/brw_wm_emit.c index bc8e8c9..a870e75 100644 --- a/i965/brw_wm_emit.c +++ b/i965/brw_wm_emit.c @@ -699,7 +699,6 @@ static void emit_tex( struct brw_wm_compile *c, { struct brw_compile *p = &c->func; GLuint msgLength, responseLength; - GLboolean shadow = (c->key.shadowtex_mask & (1<<inst->tex_unit)) ? 1 : 0; GLuint i, nr; GLuint emit; @@ -721,7 +720,7 @@ static void emit_tex( struct brw_wm_compile *c, break; } - if (shadow) { + if (inst->tex_shadow) { nr = 4; emit |= WRITEMASK_W; } @@ -743,10 +742,10 @@ static void emit_tex( struct brw_wm_compile *c, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */ + SURF_INDEX_TEXTURE(inst->tex_unit), inst->tex_unit, /* sampler */ inst->writemask, - (shadow ? + (inst->tex_shadow ? BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE : BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE), responseLength, @@ -792,7 +791,7 @@ static void emit_txb( struct brw_wm_compile *c, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */ + SURF_INDEX_TEXTURE(inst->tex_unit), inst->tex_unit, /* sampler */ inst->writemask, BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, @@ -914,6 +913,9 @@ static void emit_aa( struct brw_wm_compile *c, /* Post-fragment-program processing. Send the results to the * framebuffer. + * \param arg0 the fragment color + * \param arg1 the pass-through depth value + * \param arg2 the shader-computed depth value */ static void emit_fb_write( struct brw_wm_compile *c, struct brw_reg *arg0, @@ -1022,8 +1024,8 @@ static void emit_fb_write( struct brw_wm_compile *c, } -/* Post-fragment-program processing. Send the results to the - * framebuffer. +/** + * Move a GPR to scratch memory. */ static void emit_spill( struct brw_wm_compile *c, struct brw_reg reg, @@ -1042,11 +1044,13 @@ static void emit_spill( struct brw_wm_compile *c, */ brw_dp_WRITE_16(p, retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW), - 1, slot); } +/** + * Load a GPR from scratch memory. + */ static void emit_unspill( struct brw_wm_compile *c, struct brw_reg reg, GLuint slot ) @@ -1067,13 +1071,13 @@ static void emit_unspill( struct brw_wm_compile *c, brw_dp_READ_16(p, retype(vec16(reg), BRW_REGISTER_TYPE_UW), - 1, slot); } /** - * Retrieve upto 4 GEN4 register pairs for the given wm reg: + * Retrieve up to 4 GEN4 register pairs for the given wm reg: + * Args with unspill_reg != 0 will be loaded from scratch memory. */ static void get_argument_regs( struct brw_wm_compile *c, struct brw_wm_ref *arg[], @@ -1083,13 +1087,12 @@ static void get_argument_regs( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (arg[i]) { - - if (arg[i]->unspill_reg) - emit_unspill(c, + if (arg[i]->unspill_reg) + emit_unspill(c, brw_vec8_grf(arg[i]->unspill_reg, 0), arg[i]->value->spill_slot); - regs[i] = arg[i]->hw_reg; + regs[i] = arg[i]->hw_reg; } else { regs[i] = brw_null_reg(); @@ -1098,6 +1101,9 @@ static void get_argument_regs( struct brw_wm_compile *c, } +/** + * For values that have a spill_slot!=0, write those regs to scratch memory. + */ static void spill_values( struct brw_wm_compile *c, struct brw_wm_value *values, GLuint nr ) diff --git a/i965/brw_wm_fp.c b/i965/brw_wm_fp.c index 7ebe5b9..49aad28 100644 --- a/i965/brw_wm_fp.c +++ b/i965/brw_wm_fp.c @@ -80,9 +80,8 @@ static struct prog_src_register src_reg(GLuint file, GLuint idx) reg.Index = idx; reg.Swizzle = SWIZZLE_NOOP; reg.RelAddr = 0; - reg.NegateBase = 0; + reg.Negate = NEGATE_NONE; reg.Abs = 0; - reg.NegateAbs = 0; return reg; } @@ -112,6 +111,12 @@ static struct prog_src_register src_swizzle1( struct prog_src_register reg, int return src_swizzle(reg, x, x, x, x); } +static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle ) +{ + reg.Swizzle = swizzle; + return reg; +} + /*********************************************************************** * Dest regs @@ -187,6 +192,7 @@ static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c, GLuint saturate, GLuint tex_src_unit, GLuint tex_src_target, + GLuint tex_shadow, struct prog_src_register src0, struct prog_src_register src1, struct prog_src_register src2 ) @@ -200,6 +206,7 @@ static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c, inst->SaturateMode = saturate; inst->TexSrcUnit = tex_src_unit; inst->TexSrcTarget = tex_src_target; + inst->TexShadow = tex_shadow; inst->SrcReg[0] = src0; inst->SrcReg[1] = src1; inst->SrcReg[2] = src2; @@ -216,7 +223,7 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c, struct prog_src_register src2 ) { return emit_tex_op(c, op, dest, saturate, - 0, 0, /* tex unit, target */ + 0, 0, 0, /* tex unit, target, shadow */ src0, src1, src2); } @@ -282,8 +289,7 @@ static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) struct prog_dst_register pixel_w = get_temp(c); struct prog_src_register deltas = get_delta_xy(c); struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS); - - + /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x */ emit_op(c, @@ -548,7 +554,6 @@ static void precalc_dst( struct brw_wm_compile *c, src_undef()); } - if (dst.WriteMask & WRITEMASK_XZ) { struct prog_instruction *swz; GLuint z = GET_SWZ(src0.Swizzle, Z); @@ -563,7 +568,7 @@ static void precalc_dst( struct brw_wm_compile *c, src_undef(), src_undef()); /* Avoid letting negation flag of src0 affect our 1 constant. */ - swz->SrcReg[0].NegateBase &= ~NEGATE_X; + swz->SrcReg[0].Negate &= ~NEGATE_X; } if (dst.WriteMask & WRITEMASK_W) { /* dst.w = mov src1.w @@ -598,10 +603,9 @@ static void precalc_lit( struct brw_wm_compile *c, src_undef(), src_undef()); /* Avoid letting the negation flag of src0 affect our 1 constant. */ - swz->SrcReg[0].NegateBase = 0; + swz->SrcReg[0].Negate = NEGATE_NONE; } - if (dst.WriteMask & WRITEMASK_YZ) { emit_op(c, OPCODE_LIT, @@ -646,7 +650,7 @@ static void precalc_tex( struct brw_wm_compile *c, src0, src_undef(), src_undef()); - out->SrcReg[0].NegateBase = 0; + out->SrcReg[0].Negate = NEGATE_NONE; out->SrcReg[0].Abs = 1; /* tmp0 = MAX(coord.X, coord.Y) */ @@ -745,6 +749,7 @@ static void precalc_tex( struct brw_wm_compile *c, inst->SaturateMode, unit, inst->TexSrcTarget, + inst->TexShadow, coord, src_undef(), src_undef()); @@ -805,21 +810,40 @@ static void precalc_tex( struct brw_wm_compile *c, inst->SaturateMode, unit, inst->TexSrcTarget, + inst->TexShadow, coord, src_undef(), src_undef()); } + /* For GL_EXT_texture_swizzle: */ + if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) { + /* swizzle the result of the TEX instruction */ + struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg); + emit_op(c, OPCODE_SWZ, + inst->DstReg, + SATURATE_OFF, /* saturate already done above */ + src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]), + src_undef(), + src_undef()); + } + if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) || (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)) release_temp(c, tmpcoord); } +/** + * Check if the given TXP instruction really needs the divide-by-W step. + */ static GLboolean projtex( struct brw_wm_compile *c, const struct prog_instruction *inst ) { - struct prog_src_register src = inst->SrcReg[0]; + const struct prog_src_register src = inst->SrcReg[0]; + GLboolean retVal; + + assert(inst->Opcode == OPCODE_TXP); /* Only try to detect the simplest cases. Could detect (later) * cases where we are trying to emit code like RCP {1.0}, MUL x, @@ -829,16 +853,21 @@ static GLboolean projtex( struct brw_wm_compile *c, * user-provided fragment programs anyway: */ if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) - return 0; /* ut2004 gun rendering !?! */ + retVal = GL_FALSE; /* ut2004 gun rendering !?! */ else if (src.File == PROGRAM_INPUT && GET_SWZ(src.Swizzle, W) == W && - (c->key.projtex_mask & (1<<(src.Index + FRAG_ATTRIB_WPOS - FRAG_ATTRIB_TEX0))) == 0) - return 0; + (c->key.proj_attrib_mask & (1 << src.Index)) == 0) + retVal = GL_FALSE; else - return 1; + retVal = GL_TRUE; + + return retVal; } +/** + * Emit code for TXP. + */ static void precalc_txp( struct brw_wm_compile *c, const struct prog_instruction *inst ) { @@ -889,42 +918,41 @@ static void precalc_txp( struct brw_wm_compile *c, static void emit_fb_write( struct brw_wm_compile *c ) { struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); - struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR); + struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH); struct prog_src_register outcolor; GLuint i; struct prog_instruction *inst, *last_inst; struct brw_context *brw = c->func.brw; - /* inst->Sampler is not used by backend, - use it for fb write target and eot */ - - if (brw->state.nr_draw_regions > 1) { - for (i = 0 ; i < brw->state.nr_draw_regions; i++) { - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); - last_inst = inst = emit_op(c, - WM_FB_WRITE, dst_mask(dst_undef(),0), 0, - outcolor, payload_r0_depth, outdepth); - inst->Sampler = (i<<1); - if (c->fp_fragcolor_emitted) { - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); - last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), - 0, outcolor, payload_r0_depth, outdepth); - inst->Sampler = (i<<1); - } - } - last_inst->Sampler |= 1; //eot + /* The inst->Aux field is used for FB write target and the EOT marker */ + + if (brw->state.nr_color_regions > 1) { + for (i = 0 ; i < brw->state.nr_color_regions; i++) { + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); + last_inst = inst = emit_op(c, + WM_FB_WRITE, dst_mask(dst_undef(),0), 0, + outcolor, payload_r0_depth, outdepth); + inst->Aux = (i<<1); + if (c->fp_fragcolor_emitted) { + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); + last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), + 0, outcolor, payload_r0_depth, outdepth); + inst->Aux = (i<<1); + } + } + last_inst->Aux |= 1; //eot } else { /* if gl_FragData[0] is written, use it, else use gl_FragColor */ if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0)) outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0); else - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); - inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), - 0, outcolor, payload_r0_depth, outdepth); - inst->Sampler = 1|(0<<1); + inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), + 0, outcolor, payload_r0_depth, outdepth); + inst->Aux = 1|(0<<1); } } @@ -955,9 +983,9 @@ static void validate_dst_regs( struct brw_wm_compile *c, const struct prog_instruction *inst ) { if (inst->DstReg.File == PROGRAM_OUTPUT) { - GLuint idx = inst->DstReg.Index; - if (idx == FRAG_RESULT_COLR) - c->fp_fragcolor_emitted = 1; + GLuint idx = inst->DstReg.Index; + if (idx == FRAG_RESULT_COLOR) + c->fp_fragcolor_emitted = 1; } } @@ -981,6 +1009,11 @@ static void print_insns( const struct prog_instruction *insn, } } + +/** + * Initial pass for fragment program code generation. + * This function is used by both the GLSL and non-GLSL paths. + */ void brw_wm_pass_fp( struct brw_wm_compile *c ) { struct brw_fragment_program *fp = c->fp; @@ -997,15 +1030,19 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) c->pixel_w = src_undef(); c->nr_fp_insns = 0; - /* Emit preamble instructions: + /* Emit preamble instructions. This is where special instructions such as + * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to + * compute shader inputs from varying vars. */ - - for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; validate_src_regs(c, inst); validate_dst_regs(c, inst); } + + /* Loop over all instructions doing assorted simplifications and + * transformations. + */ for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; struct prog_instruction *out; @@ -1014,7 +1051,6 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) * necessary: */ - switch (inst->Opcode) { case OPCODE_SWZ: out = emit_insn(c, inst); @@ -1024,14 +1060,14 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) case OPCODE_ABS: out = emit_insn(c, inst); out->Opcode = OPCODE_MOV; - out->SrcReg[0].NegateBase = 0; + out->SrcReg[0].Negate = NEGATE_NONE; out->SrcReg[0].Abs = 1; break; case OPCODE_SUB: out = emit_insn(c, inst); out->Opcode = OPCODE_ADD; - out->SrcReg[1].NegateBase ^= 0xf; + out->SrcReg[1].Negate ^= NEGATE_XYZW; break; case OPCODE_SCS: @@ -1094,9 +1130,9 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) } if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("pass_fp:\n"); - print_insns( c->prog_instructions, c->nr_fp_insns ); - _mesa_printf("\n"); + _mesa_printf("pass_fp:\n"); + print_insns( c->prog_instructions, c->nr_fp_insns ); + _mesa_printf("\n"); } } diff --git a/i965/brw_wm_glsl.c b/i965/brw_wm_glsl.c index 5a5497e..a907e1b 100644 --- a/i965/brw_wm_glsl.c +++ b/i965/brw_wm_glsl.c @@ -8,12 +8,17 @@ enum _subroutine { SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4 }; -/* Only guess, need a flag in gl_fragment_program later */ + +/** + * Determine if the given fragment program uses GLSL features such + * as flow conditionals, loops, subroutines. + * Some GLSL shaders may use these features, others might not. + */ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) { int i; for (i = 0; i < fp->Base.NumInstructions; i++) { - struct prog_instruction *inst = &fp->Base.Instructions[i]; + const struct prog_instruction *inst = &fp->Base.Instructions[i]; switch (inst->Opcode) { case OPCODE_IF: case OPCODE_TRUNC: @@ -36,6 +41,10 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) return GL_FALSE; } + +/** + * Record the mapping of a Mesa register to a hardware register. + */ static void set_reg(struct brw_wm_compile *c, int file, int index, int component, struct brw_reg reg) { @@ -43,7 +52,11 @@ static void set_reg(struct brw_wm_compile *c, int file, int index, c->wm_regs[file][index][component].inited = GL_TRUE; } -static int get_scalar_dst_index(struct prog_instruction *inst) +/** + * Examine instruction's write mask to find index of first component + * enabled for writing. + */ +static int get_scalar_dst_index(const struct prog_instruction *inst) { int i; for (i = 0; i < 4; i++) @@ -62,6 +75,10 @@ static struct brw_reg alloc_tmp(struct brw_wm_compile *c) return reg; } +/** + * Save current temp register info. + * There must be a matching call to release_tmps(). + */ static int mark_tmps(struct brw_wm_compile *c) { return c->tmp_index; @@ -77,8 +94,22 @@ static void release_tmps(struct brw_wm_compile *c, int mark) c->tmp_index = mark; } +/** + * Convert Mesa src register to brw register. + * + * Since we're running in SOA mode each Mesa register corresponds to four + * hardware registers. We allocate the hardware registers as needed here. + * + * \param file register file, one of PROGRAM_x + * \param index register number + * \param component src component (X=0, Y=1, Z=2, W=3) + * \param nr not used?!? + * \param neg negate value? + * \param abs take absolute value? + */ static struct brw_reg -get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GLuint neg, GLuint abs) +get_reg(struct brw_wm_compile *c, int file, int index, int component, + int nr, GLuint neg, GLuint abs) { struct brw_reg reg; switch (file) { @@ -99,12 +130,18 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GL return brw_null_reg(); } - if(c->wm_regs[file][index][component].inited) + /* see if we've already allocated a HW register for this Mesa register */ + if (c->wm_regs[file][index][component].inited) { + /* yes, re-use */ reg = c->wm_regs[file][index][component].reg; - else + } + else { + /* no, allocate new register */ reg = brw_vec8_grf(c->reg_index, 0); + } - if(!c->wm_regs[file][index][component].inited) { + /* if this is a new register allocation, record it in the table */ + if (!c->wm_regs[file][index][component].inited) { set_reg(c, file, index, component, reg); c->reg_index++; } @@ -113,7 +150,7 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GL /* ran out of temporary registers! */ #if 1 /* This is a big hack for now. - * Return bad register index, but don't just crash hange the GPU. + * Return bad register index, just don't hang the GPU. */ _mesa_fprintf(stderr, "out of regs %d\n", c->reg_index); c->reg_index = BRW_WM_MAX_GRF - 13; @@ -130,78 +167,273 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GL return reg; } + +/** + * Preallocate registers. This sets up the Mesa to hardware register + * mapping for certain registers, such as constants (uniforms/state vars) + * and shader inputs. + */ static void prealloc_reg(struct brw_wm_compile *c) { int i, j; struct brw_reg reg; - int nr_interp_regs = 0; + int urb_read_length = 0; GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; for (i = 0; i < 4; i++) { - reg = (i < c->key.nr_depth_regs) - ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0); + if (i < c->key.nr_depth_regs) + reg = brw_vec8_grf(i * 2, 0); + else + reg = brw_vec8_grf(0, 0); set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); } - c->reg_index += 2*c->key.nr_depth_regs; + c->reg_index += 2 * c->key.nr_depth_regs; + + /* constants */ { - int nr_params = c->fp->program.Base.Parameters->NumParameters; - struct gl_program_parameter_list *plist = - c->fp->program.Base.Parameters; - int index = 0; - c->prog_data.nr_params = 4*nr_params; - for (i = 0; i < nr_params; i++) { - for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(c->reg_index + index/8, - index%8); - c->prog_data.param[index] = - &plist->ParameterValues[i][j]; - set_reg(c, PROGRAM_STATE_VAR, i, j, reg); - } - } - c->nr_creg = 2*((4*nr_params+15)/16); - c->reg_index += c->nr_creg; + const int nr_params = c->fp->program.Base.Parameters->NumParameters; + + /* use a real constant buffer, or just use a section of the GRF? */ + c->fp->use_const_buffer = GL_FALSE; /* (nr_params > 8);*/ + + if (c->fp->use_const_buffer) { + /* We'll use a real constant buffer and fetch constants from + * it with a dataport read message. + */ + + /* number of float constants in CURBE */ + c->prog_data.nr_params = 0; + } + else { + const struct gl_program_parameter_list *plist = + c->fp->program.Base.Parameters; + int index = 0; + + /* number of float constants in CURBE */ + c->prog_data.nr_params = 4 * nr_params; + + /* loop over program constants (float[4]) */ + for (i = 0; i < nr_params; i++) { + /* loop over XYZW channels */ + for (j = 0; j < 4; j++, index++) { + reg = brw_vec1_grf(c->reg_index + index / 8, index % 8); + /* Save pointer to parameter/constant value. + * Constants will be copied in prepare_constant_buffer() + */ + c->prog_data.param[index] = &plist->ParameterValues[i][j]; + set_reg(c, PROGRAM_STATE_VAR, i, j, reg); + } + } + /* number of constant regs used (each reg is float[8]) */ + c->nr_creg = 2 * ((4 * nr_params + 15) / 16); + c->reg_index += c->nr_creg; + } } - for (i = 0; i < FRAG_ATTRIB_MAX; i++) { - if (inputs & (1<<i)) { - nr_interp_regs++; - reg = brw_vec8_grf(c->reg_index, 0); - for (j = 0; j < 4; j++) - set_reg(c, PROGRAM_PAYLOAD, i, j, reg); - c->reg_index += 2; - } + /* fragment shader inputs */ + for (i = 0; i < VERT_RESULT_MAX; i++) { + int fp_input; + + if (i >= VERT_RESULT_VAR0) + fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0; + else if (i <= VERT_RESULT_TEX7) + fp_input = i; + else + fp_input = -1; + + if (fp_input >= 0 && inputs & (1 << fp_input)) { + urb_read_length = c->reg_index; + reg = brw_vec8_grf(c->reg_index, 0); + for (j = 0; j < 4; j++) + set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); + } + if (c->key.vp_outputs_written & (1 << i)) { + c->reg_index += 2; + } } + c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; - c->prog_data.urb_read_length = nr_interp_regs * 2; + c->prog_data.urb_read_length = urb_read_length; c->prog_data.curb_read_length = c->nr_creg; c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); c->reg_index++; c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); c->reg_index += 2; + + /* An instruction may reference up to three constants. + * They'll be found in these registers. + * XXX alloc these on demand! + */ + if (c->fp->use_const_buffer) { + for (i = 0; i < 3; i++) { + c->current_const[i].index = -1; + c->current_const[i].reg = alloc_tmp(c); + } + } +#if 0 + printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer); + printf("AFTER PRE_ALLOC, reg_index = %d\n", c->reg_index); +#endif } + +/** + * Check if any of the instruction's src registers are constants, uniforms, + * or statevars. If so, fetch any constants that we don't already have in + * the three GRF slots. + */ +static void fetch_constants(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint i; + + /* loop over instruction src regs */ + for (i = 0; i < 3; i++) { + const struct prog_src_register *src = &inst->SrcReg[i]; + if (src->File == PROGRAM_STATE_VAR || + src->File == PROGRAM_CONSTANT || + src->File == PROGRAM_UNIFORM) { + c->current_const[i].index = src->Index; + +#if 0 + printf(" fetch const[%d] for arg %d into reg %d\n", + src->Index, i, c->current_const[i].reg.nr); +#endif + + /* need to fetch the constant now */ + brw_dp_READ_4(p, + c->current_const[i].reg, /* writeback dest */ + src->RelAddr, /* relative indexing? */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */ + ); + } + } +} + + +/** + * Convert Mesa dst register to brw register. + */ static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - struct prog_instruction *inst, int component, int nr) + const struct prog_instruction *inst, + GLuint component) { + const int nr = 1; return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr, 0, 0); } + +static struct brw_reg +get_src_reg_const(struct brw_wm_compile *c, + const struct prog_instruction *inst, + GLuint srcRegIndex, GLuint component) +{ + /* We should have already fetched the constant from the constant + * buffer in fetch_constants(). Now we just have to return a + * register description that extracts the needed component and + * smears it across all eight vector components. + */ + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + struct brw_reg const_reg; + + assert(component < 4); + assert(srcRegIndex < 3); + assert(c->current_const[srcRegIndex].index != -1); + const_reg = c->current_const[srcRegIndex].reg; + + /* extract desired float from the const_reg, and smear */ + const_reg = stride(const_reg, 0, 1, 0); + const_reg.subnr = component * 4; + + if (src->Negate & (1 << component)) + const_reg = negate(const_reg); + if (src->Abs) + const_reg = brw_abs(const_reg); + +#if 0 + printf(" form const[%d].%d for arg %d, reg %d\n", + c->current_const[srcRegIndex].index, + component, + srcRegIndex, + const_reg.nr); +#endif + + return const_reg; +} + + +/** + * Convert Mesa src register to brw register. + */ static struct brw_reg get_src_reg(struct brw_wm_compile *c, - struct prog_src_register *src, int index, int nr) -{ - int component = GET_SWZ(src->Swizzle, index); - return get_reg(c, src->File, src->Index, component, nr, - src->NegateBase, src->Abs); + const struct prog_instruction *inst, + GLuint srcRegIndex, GLuint channel) +{ + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + const GLuint nr = 1; + const GLuint component = GET_SWZ(src->Swizzle, channel); + + if (c->fp->use_const_buffer && + (src->File == PROGRAM_STATE_VAR || + src->File == PROGRAM_CONSTANT || + src->File == PROGRAM_UNIFORM)) { + return get_src_reg_const(c, inst, srcRegIndex, component); + } + else { + /* other type of source register */ + return get_reg(c, src->File, src->Index, component, nr, + src->Negate, src->Abs); + } } -/* Subroutines are minimal support for resusable instruction sequences. - They are implemented as simply as possible to minimise overhead: there - is no explicit support for communication between the caller and callee - other than saving the return address in a temporary register, nor is - there any automatic local storage. This implies that great care is - required before attempting reentrancy or any kind of nested - subroutine invocations. */ + +/** + * Same as \sa get_src_reg() but if the register is a literal, emit + * a brw_reg encoding the literal. + * Note that a brw instruction only allows one src operand to be a literal. + * For instructions with more than one operand, only the second can be a + * literal. This means that we treat some literals as constants/uniforms + * (which why PROGRAM_CONSTANT is checked in fetch_constants()). + * + */ +static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, + const struct prog_instruction *inst, + GLuint srcRegIndex, GLuint channel) +{ + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + if (src->File == PROGRAM_CONSTANT) { + /* a literal */ + const int component = GET_SWZ(src->Swizzle, channel); + const GLfloat *param = + c->fp->program.Base.Parameters->ParameterValues[src->Index]; + GLfloat value = param[component]; + if (src->Negate & (1 << channel)) + value = -value; + if (src->Abs) + value = FABSF(value); +#if 0 + printf(" form immed value %f for chan %d\n", value, channel); +#endif + return brw_imm_f(value); + } + else { + return get_src_reg(c, inst, srcRegIndex, channel); + } +} + + +/** + * Subroutines are minimal support for resusable instruction sequences. + * They are implemented as simply as possible to minimise overhead: there + * is no explicit support for communication between the caller and callee + * other than saving the return address in a temporary register, nor is + * there any automatic local storage. This implies that great care is + * required before attempting reentrancy or any kind of nested + * subroutine invocations. + */ static void invoke_subroutine( struct brw_wm_compile *c, enum _subroutine subroutine, void (*emit)( struct brw_wm_compile * ) ) @@ -258,7 +490,7 @@ static void invoke_subroutine( struct brw_wm_compile *c, } static void emit_abs( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -266,8 +498,8 @@ static void emit_abs( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (inst->DstReg.WriteMask & (1<<i)) { struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i, 1); - src = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src = get_src_reg(c, inst, 0, i); brw_MOV(p, dst, brw_abs(src)); } } @@ -275,7 +507,7 @@ static void emit_abs( struct brw_wm_compile *c, } static void emit_trunc( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -284,8 +516,8 @@ static void emit_trunc( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i, 1) ; - src = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src = get_src_reg(c, inst, 0, i); brw_RNDZ(p, dst, src); } } @@ -293,7 +525,7 @@ static void emit_trunc( struct brw_wm_compile *c, } static void emit_mov( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -302,8 +534,10 @@ static void emit_mov( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i, 1); - src = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + /* XXX some moves from immediate value don't work reliably!!! */ + /*src = get_src_reg_imm(c, inst, 0, i);*/ + src = get_src_reg(c, inst, 0, i); brw_MOV(p, dst, src); } } @@ -311,7 +545,7 @@ static void emit_mov( struct brw_wm_compile *c, } static void emit_pixel_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); @@ -320,8 +554,8 @@ static void emit_pixel_xy(struct brw_wm_compile *c, struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; - dst0 = get_dst_reg(c, inst, 0, 1); - dst1 = get_dst_reg(c, inst, 1, 1); + dst0 = get_dst_reg(c, inst, 0); + dst1 = get_dst_reg(c, inst, 1); /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ @@ -338,21 +572,20 @@ static void emit_pixel_xy(struct brw_wm_compile *c, stride(suboffset(r1_uw, 5), 2, 4, 0), brw_imm_v(0x11001100)); } - } static void emit_delta_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg dst0, dst1, src0, src1; struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; - dst0 = get_dst_reg(c, inst, 0, 1); - dst1 = get_dst_reg(c, inst, 1, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1); + dst0 = get_dst_reg(c, inst, 0); + dst1 = get_dst_reg(c, inst, 1); + src0 = get_src_reg(c, inst, 0, 0); + src1 = get_src_reg(c, inst, 0, 1); /* Calc delta X,Y by subtracting origin in r1 from the pixel * centers. */ @@ -370,10 +603,8 @@ static void emit_delta_xy(struct brw_wm_compile *c, negate(suboffset(r1,1))); } - } - static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, GLuint nr, @@ -404,7 +635,7 @@ static void fire_fb_write( struct brw_wm_compile *c, } static void emit_fb_write(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; int nr = 2; @@ -416,38 +647,64 @@ static void emit_fb_write(struct brw_wm_compile *c, */ if (c->key.aa_dest_stencil_reg) nr += 1; - { - brw_push_insn_state(p); - for (channel = 0; channel < 4; channel++) { - src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1); - /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ - /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ - brw_MOV(p, brw_message_reg(nr + channel), src0); - } - /* skip over the regs populated above: */ - nr += 8; - brw_pop_insn_state(p); + + brw_push_insn_state(p); + for (channel = 0; channel < 4; channel++) { + src0 = get_src_reg(c, inst, 0, channel); + /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ + /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + brw_MOV(p, brw_message_reg(nr + channel), src0); } + /* skip over the regs populated above: */ + nr += 8; + brw_pop_insn_state(p); - if (c->key.source_depth_to_render_target) - { - if (c->key.computes_depth) { - src0 = get_src_reg(c, &inst->SrcReg[2], 2, 1); - brw_MOV(p, brw_message_reg(nr), src0); - } else { - src0 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - brw_MOV(p, brw_message_reg(nr), src0); - } + if (c->key.source_depth_to_render_target) { + if (c->key.computes_depth) { + src0 = get_src_reg(c, inst, 2, 2); + brw_MOV(p, brw_message_reg(nr), src0); + } + else { + src0 = get_src_reg(c, inst, 1, 1); + brw_MOV(p, brw_message_reg(nr), src0); + } + + nr += 2; + } - nr += 2; + if (c->key.dest_depth_reg) { + GLuint comp = c->key.dest_depth_reg / 2; + GLuint off = c->key.dest_depth_reg % 2; + + assert(comp == 1); + assert(off == 0); +#if 0 + /* XXX do we need this code? comp always 1, off always 0, it seems */ + if (off != 0) { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1)); + /* 2nd half? */ + brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]); + brw_pop_insn_state(p); + } + else +#endif + { + struct brw_reg src = get_src_reg(c, inst, 1, 1); + brw_MOV(p, brw_message_reg(nr), src); + } + nr += 2; } - target = inst->Sampler >> 1; - eot = inst->Sampler & 1; + + target = inst->Aux >> 1; + eot = inst->Aux & 1; fire_fb_write(c, 0, nr, target, eot); } static void emit_pixel_w( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -455,10 +712,10 @@ static void emit_pixel_w( struct brw_wm_compile *c, struct brw_reg dst, src0, delta0, delta1; struct brw_reg interp3; - dst = get_dst_reg(c, inst, 3, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + dst = get_dst_reg(c, inst, 3); + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); interp3 = brw_vec1_grf(src0.nr+1, 4); /* Calc 1/w - just linterp wpos[3] optimized by putting the @@ -477,19 +734,19 @@ static void emit_pixel_w( struct brw_wm_compile *c, } static void emit_linterp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg interp[4]; struct brw_reg dst, delta0, delta1; struct brw_reg src0; + GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - GLuint nr = src0.nr; - int i; + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); + nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -498,7 +755,7 @@ static void emit_linterp(struct brw_wm_compile *c, for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_LINE(p, brw_null_reg(), interp[i], delta0); brw_MAC(p, dst, suboffset(interp[i],1), delta1); } @@ -506,17 +763,17 @@ static void emit_linterp(struct brw_wm_compile *c, } static void emit_cinterp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg interp[4]; struct brw_reg dst, src0; + GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - GLuint nr = src0.nr; - int i; + src0 = get_src_reg(c, inst, 0, 0); + nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -525,14 +782,14 @@ static void emit_cinterp(struct brw_wm_compile *c, for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV(p, dst, suboffset(interp[i],3)); } } } static void emit_pinterp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -540,13 +797,13 @@ static void emit_pinterp(struct brw_wm_compile *c, struct brw_reg interp[4]; struct brw_reg dst, delta0, delta1; struct brw_reg src0, w; + GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - w = get_src_reg(c, &inst->SrcReg[2], 3, 1); - GLuint nr = src0.nr; - int i; + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); + w = get_src_reg(c, inst, 2, 3); + nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -555,7 +812,7 @@ static void emit_pinterp(struct brw_wm_compile *c, for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_LINE(p, brw_null_reg(), interp[i], delta0); brw_MAC(p, dst, suboffset(interp[i],1), delta1); @@ -566,7 +823,7 @@ static void emit_pinterp(struct brw_wm_compile *c, /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ static void emit_frontfacing(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); @@ -576,7 +833,7 @@ static void emit_frontfacing(struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV(p, dst, brw_imm_f(0.0)); } } @@ -587,7 +844,7 @@ static void emit_frontfacing(struct brw_wm_compile *c, brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31)); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV(p, dst, brw_imm_f(1.0)); } } @@ -595,7 +852,7 @@ static void emit_frontfacing(struct brw_wm_compile *c, } static void emit_xpd(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -605,12 +862,12 @@ static void emit_xpd(struct brw_wm_compile *c, GLuint i1 = (i+1)%3; if (mask & (1<<i)) { struct brw_reg src0, src1, dst; - dst = get_dst_reg(c, inst, i, 1); - src0 = negate(get_src_reg(c, &inst->SrcReg[0], i2, 1)); - src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1); + dst = get_dst_reg(c, inst, i); + src0 = negate(get_src_reg(c, inst, 0, i2)); + src1 = get_src_reg_imm(c, inst, 1, i1); brw_MUL(p, brw_null_reg(), src0, src1); - src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1); + src0 = get_src_reg(c, inst, 0, i1); + src1 = get_src_reg_imm(c, inst, 1, i2); brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); brw_MAC(p, dst, src0, src1); brw_set_saturate(p, 0); @@ -620,17 +877,17 @@ static void emit_xpd(struct brw_wm_compile *c, } static void emit_dp3(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg src0[3], src1[3], dst; int i; struct brw_compile *p = &c->func; for (i = 0; i < 3; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); @@ -639,16 +896,16 @@ static void emit_dp3(struct brw_wm_compile *c, } static void emit_dp4(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg src0[4], src1[4], dst; int i; struct brw_compile *p = &c->func; for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, brw_null_reg(), src0[2], src1[2]); @@ -658,16 +915,16 @@ static void emit_dp4(struct brw_wm_compile *c, } static void emit_dph(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg src0[4], src1[4], dst; int i; struct brw_compile *p = &c->func; for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, dst, src0[2], src1[2]); @@ -682,7 +939,7 @@ static void emit_dph(struct brw_wm_compile *c, * register's X, Y, Z and W channels (subject to writemasking of course). */ static void emit_math1(struct brw_wm_compile *c, - struct prog_instruction *inst, GLuint func) + const struct prog_instruction *inst, GLuint func) { struct brw_compile *p = &c->func; struct brw_reg src0, dst, tmp; @@ -692,7 +949,7 @@ static void emit_math1(struct brw_wm_compile *c, tmp = alloc_tmp(c); /* Get first component of source register */ - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + src0 = get_src_reg(c, inst, 0, 0); /* tmp = func(src0) */ brw_MOV(p, brw_message_reg(2), src0); @@ -710,7 +967,7 @@ static void emit_math1(struct brw_wm_compile *c, /* replicate tmp value across enabled dest channels */ for (i = 0; i < 4; i++) { if (inst->DstReg.WriteMask & (1 << i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV(p, dst, tmp); } } @@ -719,43 +976,43 @@ static void emit_math1(struct brw_wm_compile *c, } static void emit_rcp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_INV); } static void emit_rsq(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); } static void emit_sin(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); } static void emit_cos(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_COS); } static void emit_ex2(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); } static void emit_lg2(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); } static void emit_add(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; @@ -764,17 +1021,30 @@ static void emit_add(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_ADD(p, dst, src0, src1); } } brw_set_saturate(p, 0); } +static void emit_arl(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, addr_reg; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, 0); + src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */ + brw_MOV(p, addr_reg, src0); + brw_set_saturate(p, 0); +} + static void emit_sub(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; @@ -783,9 +1053,9 @@ static void emit_sub(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_ADD(p, dst, src0, negate(src1)); } } @@ -793,7 +1063,7 @@ static void emit_sub(struct brw_wm_compile *c, } static void emit_mul(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; @@ -802,9 +1072,9 @@ static void emit_mul(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_MUL(p, dst, src0, src1); } } @@ -812,7 +1082,7 @@ static void emit_mul(struct brw_wm_compile *c, } static void emit_frc(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; @@ -821,8 +1091,8 @@ static void emit_frc(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg_imm(c, inst, 0, i); brw_FRC(p, dst, src0); } } @@ -831,7 +1101,7 @@ static void emit_frc(struct brw_wm_compile *c, } static void emit_flr(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; @@ -840,78 +1110,71 @@ static void emit_flr(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg_imm(c, inst, 0, i); brw_RNDD(p, dst, src0); } } brw_set_saturate(p, 0); } -static void emit_max(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg src0, src1, dst; - int i; - brw_push_insn_state(p); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MOV(p, dst, src0); - brw_set_saturate(p, 0); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, src1); - brw_set_saturate(p, 0); - brw_set_predicate_control_flag_value(p, 0xff); - } - } - brw_pop_insn_state(p); -} -static void emit_min(struct brw_wm_compile *c, - struct prog_instruction *inst) +static void emit_min_max(struct brw_wm_compile *c, + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg src0, src1, dst; + const GLuint mask = inst->DstReg.WriteMask; + const int mark = mark_tmps(c); int i; brw_push_insn_state(p); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + struct brw_reg real_dst = get_dst_reg(c, inst, i); + struct brw_reg src0 = get_src_reg(c, inst, 0, i); + struct brw_reg src1 = get_src_reg(c, inst, 1, i); + struct brw_reg dst; + /* if dst==src0 or dst==src1 we need to use a temp reg */ + GLboolean use_temp = brw_same_reg(dst, src0) || + brw_same_reg(dst, src1); + if (use_temp) + dst = alloc_tmp(c); + else + dst = real_dst; + + /* + printf(" Min/max: dst %d src0 %d src1 %d\n", + dst.nr, src0.nr, src1.nr); + */ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_MOV(p, dst, src0); brw_set_saturate(p, 0); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); + if (inst->Opcode == OPCODE_MIN) + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); + else + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, src1, src0); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); brw_MOV(p, dst, src1); brw_set_saturate(p, 0); brw_set_predicate_control_flag_value(p, 0xff); + if (use_temp) + brw_MOV(p, real_dst, dst); } } brw_pop_insn_state(p); + release_tmps(c, mark); } static void emit_pow(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst, src0, src1; - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], 0, 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + src0 = get_src_reg_imm(c, inst, 0, 0); + src1 = get_src_reg_imm(c, inst, 1, 0); brw_MOV(p, brw_message_reg(2), src0); brw_MOV(p, brw_message_reg(3), src1); @@ -927,7 +1190,7 @@ static void emit_pow(struct brw_wm_compile *c, } static void emit_lrp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -936,10 +1199,10 @@ static void emit_lrp(struct brw_wm_compile *c, int mark = mark_tmps(c); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + src1 = get_src_reg_imm(c, inst, 1, i); if (src1.nr == dst.nr) { tmp1 = alloc_tmp(c); @@ -947,7 +1210,7 @@ static void emit_lrp(struct brw_wm_compile *c, } else tmp1 = src1; - src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); + src2 = get_src_reg(c, inst, 2, i); if (src2.nr == dst.nr) { tmp2 = alloc_tmp(c); brw_MOV(p, tmp2, src2); @@ -980,7 +1243,7 @@ static void emit_kil(struct brw_wm_compile *c) } static void emit_mad(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -989,10 +1252,10 @@ static void emit_mad(struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); + src2 = get_src_reg_imm(c, inst, 2, i); brw_MUL(p, dst, src0, src1); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); @@ -1003,7 +1266,7 @@ static void emit_mad(struct brw_wm_compile *c, } static void emit_sop(struct brw_wm_compile *c, - struct prog_instruction *inst, GLuint cond) + const struct prog_instruction *inst, GLuint cond) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1012,9 +1275,9 @@ static void emit_sop(struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_push_insn_state(p); brw_CMP(p, brw_null_reg(), cond, src0, src1); brw_set_predicate_control(p, BRW_PREDICATE_NONE); @@ -1027,43 +1290,43 @@ static void emit_sop(struct brw_wm_compile *c, } static void emit_slt(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_L); } static void emit_sle(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_LE); } static void emit_sgt(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_G); } static void emit_sge(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_GE); } static void emit_seq(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_EQ); } static void emit_sne(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_NEQ); } static void emit_ddx(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1071,8 +1334,8 @@ static void emit_ddx(struct brw_wm_compile *c, struct brw_reg dst; struct brw_reg src0, w; GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - w = get_src_reg(c, &inst->SrcReg[1], 3, 1); + src0 = get_src_reg(c, inst, 0, 0); + w = get_src_reg(c, inst, 1, 3); nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -1081,7 +1344,7 @@ static void emit_ddx(struct brw_wm_compile *c, brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV(p, dst, interp[i]); brw_MUL(p, dst, dst, w); } @@ -1090,7 +1353,7 @@ static void emit_ddx(struct brw_wm_compile *c, } static void emit_ddy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1099,9 +1362,9 @@ static void emit_ddy(struct brw_wm_compile *c, struct brw_reg src0, w; GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + src0 = get_src_reg(c, inst, 0, 0); nr = src0.nr; - w = get_src_reg(c, &inst->SrcReg[1], 3, 1); + w = get_src_reg(c, inst, 1, 3); interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); interp[2] = brw_vec1_grf(nr+1, 0); @@ -1109,7 +1372,7 @@ static void emit_ddy(struct brw_wm_compile *c, brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV(p, dst, suboffset(interp[i], 1)); brw_MUL(p, dst, dst, w); } @@ -1117,23 +1380,23 @@ static void emit_ddy(struct brw_wm_compile *c, brw_set_saturate(p, 0); } -static __inline struct brw_reg high_words( struct brw_reg reg ) +static INLINE struct brw_reg high_words( struct brw_reg reg ) { return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ), 0, 8, 2 ); } -static __inline struct brw_reg low_words( struct brw_reg reg ) +static INLINE struct brw_reg low_words( struct brw_reg reg ) { return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 ); } -static __inline struct brw_reg even_bytes( struct brw_reg reg ) +static INLINE struct brw_reg even_bytes( struct brw_reg reg ) { return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 ); } -static __inline struct brw_reg odd_bytes( struct brw_reg reg ) +static INLINE struct brw_reg odd_bytes( struct brw_reg reg ) { return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ), 0, 16, 2 ); @@ -1233,7 +1496,7 @@ static void noise1_sub( struct brw_wm_compile *c ) { } static void emit_noise1( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src, param, dst; @@ -1243,7 +1506,7 @@ static void emit_noise1( struct brw_wm_compile *c, assert( mark == 0 ); - src = get_src_reg( c, inst->SrcReg, 0, 1 ); + src = get_src_reg( c, inst, 0, 0 ); param = alloc_tmp( c ); @@ -1255,7 +1518,7 @@ static void emit_noise1( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV( p, dst, param ); } } @@ -1403,7 +1666,7 @@ static void noise2_sub( struct brw_wm_compile *c ) { } static void emit_noise2( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, param0, param1, dst; @@ -1413,8 +1676,8 @@ static void emit_noise2( struct brw_wm_compile *c, assert( mark == 0 ); - src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); - src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); + src0 = get_src_reg( c, inst, 0, 0 ); + src1 = get_src_reg( c, inst, 0, 1 ); param0 = alloc_tmp( c ); param1 = alloc_tmp( c ); @@ -1428,7 +1691,7 @@ static void emit_noise2( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV( p, dst, param0 ); } } @@ -1438,9 +1701,11 @@ static void emit_noise2( struct brw_wm_compile *c, release_tmps( c, mark ); } -/* The three-dimensional case is much like the one- and two- versions above, - but since the number of corners is rapidly growing we now pack 16 16-bit - hashes into each register to extract more parallelism from the EUs. */ +/** + * The three-dimensional case is much like the one- and two- versions above, + * but since the number of corners is rapidly growing we now pack 16 16-bit + * hashes into each register to extract more parallelism from the EUs. + */ static void noise3_sub( struct brw_wm_compile *c ) { struct brw_compile *p = &c->func; @@ -1704,7 +1969,7 @@ static void noise3_sub( struct brw_wm_compile *c ) { } static void emit_noise3( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, src2, param0, param1, param2, dst; @@ -1714,9 +1979,9 @@ static void emit_noise3( struct brw_wm_compile *c, assert( mark == 0 ); - src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); - src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); - src2 = get_src_reg( c, inst->SrcReg, 2, 1 ); + src0 = get_src_reg( c, inst, 0, 0 ); + src1 = get_src_reg( c, inst, 0, 1 ); + src2 = get_src_reg( c, inst, 0, 2 ); param0 = alloc_tmp( c ); param1 = alloc_tmp( c ); @@ -1732,7 +1997,7 @@ static void emit_noise3( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV( p, dst, param0 ); } } @@ -1742,13 +2007,15 @@ static void emit_noise3( struct brw_wm_compile *c, release_tmps( c, mark ); } -/* For the four-dimensional case, the little micro-optimisation benefits - we obtain by unrolling all the loops aren't worth the massive bloat it - now causes. Instead, we loop twice around performing a similar operation - to noise3, once for the w=0 cube and once for the w=1, with a bit more - code to glue it all together. */ -static void noise4_sub( struct brw_wm_compile *c ) { - +/** + * For the four-dimensional case, the little micro-optimisation benefits + * we obtain by unrolling all the loops aren't worth the massive bloat it + * now causes. Instead, we loop twice around performing a similar operation + * to noise3, once for the w=0 cube and once for the w=1, with a bit more + * code to glue it all together. + */ +static void noise4_sub( struct brw_wm_compile *c ) +{ struct brw_compile *p = &c->func; struct brw_reg param[ 4 ], x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */ @@ -2125,7 +2392,7 @@ static void noise4_sub( struct brw_wm_compile *c ) { } static void emit_noise4( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst; @@ -2135,10 +2402,10 @@ static void emit_noise4( struct brw_wm_compile *c, assert( mark == 0 ); - src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); - src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); - src2 = get_src_reg( c, inst->SrcReg, 2, 1 ); - src3 = get_src_reg( c, inst->SrcReg, 3, 1 ); + src0 = get_src_reg( c, inst, 0, 0 ); + src1 = get_src_reg( c, inst, 0, 1 ); + src2 = get_src_reg( c, inst, 0, 2 ); + src3 = get_src_reg( c, inst, 0, 3 ); param0 = alloc_tmp( c ); param1 = alloc_tmp( c ); @@ -2156,7 +2423,7 @@ static void emit_noise4( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV( p, dst, param0 ); } } @@ -2167,17 +2434,17 @@ static void emit_noise4( struct brw_wm_compile *c, } static void emit_wpos_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg src0[2], dst[2]; - dst[0] = get_dst_reg(c, inst, 0, 1); - dst[1] = get_dst_reg(c, inst, 1, 1); + dst[0] = get_dst_reg(c, inst, 0); + dst[1] = get_dst_reg(c, inst, 1); - src0[0] = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src0[1] = get_src_reg(c, &inst->SrcReg[0], 1, 1); + src0[0] = get_src_reg(c, inst, 0, 0); + src0[1] = get_src_reg(c, inst, 0, 1); /* Calculate the pixel offset from window bottom left into destination * X and Y channels. @@ -2200,27 +2467,28 @@ static void emit_wpos_xy(struct brw_wm_compile *c, } /* TODO - BIAS on SIMD8 not workind yet... + BIAS on SIMD8 not working yet... */ static void emit_txb(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - GLuint i; + payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); + for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i, 1); + dst[i] = get_dst_reg(c, inst, i); for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); + src[i] = get_src_reg(c, inst, 0, i); switch (inst->TexSrcTarget) { case TEXTURE_1D_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */ + brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */ + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */ break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: @@ -2234,28 +2502,28 @@ static void emit_txb(struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(4), src[2]); break; } - brw_MOV(p, brw_message_reg(5), src[3]); - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(5), src[3]); /* bias */ + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */ brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), - 1, - retype(payload_reg, BRW_REGISTER_TYPE_UW), - unit + MAX_DRAW_BUFFERS, /* surface */ - unit, /* sampler */ - inst->DstReg.WriteMask, - BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, - 4, - 4, - 0); + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ + 1, /* msg_reg_nr */ + retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ + SURF_INDEX_TEXTURE(unit), + unit, /* sampler */ + inst->DstReg.WriteMask, /* writemask */ + BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, /* msg_type */ + 4, /* response_length */ + 4, /* msg_length */ + 0); /* eot */ } + static void emit_tex(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - GLuint msg_len; GLuint i, nr; GLuint emit; @@ -2264,10 +2532,9 @@ static void emit_tex(struct brw_wm_compile *c, payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i, 1); + dst[i] = get_dst_reg(c, inst, i); for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - + src[i] = get_src_reg(c, inst, 0, i); switch (inst->TexSrcTarget) { case TEXTURE_1D_INDEX: @@ -2286,6 +2553,7 @@ static void emit_tex(struct brw_wm_compile *c, } msg_len = 1; + /* move/load S, T, R coords */ for (i = 0; i < nr; i++) { static const GLuint swz[4] = {0,1,2,2}; if (emit & (1<<i)) @@ -2296,26 +2564,27 @@ static void emit_tex(struct brw_wm_compile *c, } if (shadow) { - brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(6), src[2]); + brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */ + brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */ } brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), - 1, - retype(payload_reg, BRW_REGISTER_TYPE_UW), - unit + MAX_DRAW_BUFFERS, /* surface */ - unit, /* sampler */ - inst->DstReg.WriteMask, - BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, - 4, - shadow ? 6 : 4, - 0); + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ + 1, /* msg_reg_nr */ + retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ + SURF_INDEX_TEXTURE(unit), + unit, /* sampler */ + inst->DstReg.WriteMask, /* writemask */ + BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, /* msg_type */ + 4, /* response_length */ + shadow ? 6 : 4, /* msg_length */ + 0); /* eot */ if (shadow) brw_MOV(p, dst[3], brw_imm_f(1.0)); } + /** * Resolve subroutine calls after code emit is done. */ @@ -2340,7 +2609,16 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); for (i = 0; i < c->nr_fp_insns; i++) { - struct prog_instruction *inst = &c->prog_instructions[i]; + const struct prog_instruction *inst = &c->prog_instructions[i]; + +#if 0 + _mesa_printf("Inst %d: ", i); + _mesa_print_instruction(inst); +#endif + + /* fetch any constants that this instruction needs */ + if (c->fp->use_const_buffer) + fetch_constants(c, inst); if (inst->CondUpdate) brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); @@ -2381,6 +2659,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_ADD: emit_add(c, inst); break; + case OPCODE_ARL: + emit_arl(c, inst); + break; case OPCODE_SUB: emit_sub(c, inst); break; @@ -2397,6 +2678,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_trunc(c, inst); break; case OPCODE_MOV: + case OPCODE_SWZ: emit_mov(c, inst); break; case OPCODE_DP3: @@ -2429,11 +2711,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_LG2: emit_lg2(c, inst); break; - case OPCODE_MAX: - emit_max(c, inst); - break; case OPCODE_MIN: - emit_min(c, inst); + case OPCODE_MAX: + emit_min_max(c, inst); break; case OPCODE_DDX: emit_ddx(c, inst); @@ -2531,6 +2811,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) break; case OPCODE_BGNLOOP: + /* XXX may need to invalidate the current_constant regs */ loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8); break; case OPCODE_BRK: @@ -2574,10 +2855,27 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) } } + +/** + * Do GPU code generation for shaders that use GLSL features such as + * flow control. Other shaders will be compiled with the + */ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) { + if (INTEL_DEBUG & DEBUG_WM) { + _mesa_printf("brw_wm_glsl_emit:\n"); + } + + /* initial instruction translation/simplification */ brw_wm_pass_fp(c); + + /* actual code generation */ brw_wm_emit_glsl(brw, c); + + if (INTEL_DEBUG & DEBUG_WM) { + brw_wm_print_program(c, "brw_wm_glsl_emit done"); + } + c->prog_data.total_grf = c->reg_index; c->prog_data.total_scratch = 0; } diff --git a/i965/brw_wm_pass0.c b/i965/brw_wm_pass0.c index fca7b7a..9214276 100644 --- a/i965/brw_wm_pass0.c +++ b/i965/brw_wm_pass0.c @@ -51,6 +51,7 @@ static struct brw_wm_value *get_value( struct brw_wm_compile *c) return &c->vreg[c->nr_vreg++]; } +/** return pointer to a newly allocated instruction */ static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c ) { assert(c->nr_insns < BRW_WM_MAX_INSN); @@ -60,6 +61,7 @@ static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c ) /*********************************************************************** */ +/** Init the "undef" register */ static void pass0_init_undef( struct brw_wm_compile *c) { struct brw_wm_ref *ref = &c->undef_ref; @@ -69,6 +71,7 @@ static void pass0_init_undef( struct brw_wm_compile *c) ref->prevuse = NULL; } +/** Set a FP register to a value */ static void pass0_set_fpreg_value( struct brw_wm_compile *c, GLuint file, GLuint idx, @@ -83,6 +86,7 @@ static void pass0_set_fpreg_value( struct brw_wm_compile *c, c->pass0_fp_reg[file][idx][component] = ref; } +/** Set a FP register to a ref */ static void pass0_set_fpreg_ref( struct brw_wm_compile *c, GLuint file, GLuint idx, @@ -115,12 +119,13 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, ref->value = &c->creg[i/16]; ref->insn = 0; ref->prevuse = NULL; - + return ref; } } +/** Return a ref to a constant/literal value */ static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c, const GLfloat *constval ) { @@ -142,7 +147,7 @@ static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c, */ c->constref[i].constval = *constval; c->constref[i].ref = get_param_ref(c, constval); - + return c->constref[i].ref; } else { @@ -187,7 +192,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, /* There's something really hokey about parameters parsed in * arb programs - they all end up in here, whether they be - * state values, paramters or constants. This duplicates the + * state values, parameters or constants. This duplicates the * structure above & also seems to subvert the limits set for * each type of constant/param. */ @@ -198,7 +203,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, */ ref = get_const_ref(c, &plist->ParameterValues[idx][component]); break; - + case PROGRAM_STATE_VAR: case PROGRAM_UNIFORM: /* These may change from run to run: @@ -229,14 +234,13 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, - /*********************************************************************** * Straight translation to internal instruction format */ static void pass0_set_dst( struct brw_wm_compile *c, - struct brw_wm_instruction *out, - const struct prog_instruction *inst, + struct brw_wm_instruction *out, + const struct prog_instruction *inst, GLuint writemask ) { const struct prog_dst_register *dst = &inst->DstReg; @@ -245,18 +249,17 @@ static void pass0_set_dst( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (writemask & (1<<i)) { out->dst[i] = get_value(c); - pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[i]); } } - + out->writemask = writemask; } static void pass0_set_dst_scalar( struct brw_wm_compile *c, - struct brw_wm_instruction *out, - const struct prog_instruction *inst, + struct brw_wm_instruction *out, + const struct prog_instruction *inst, GLuint writemask ) { if (writemask) { @@ -282,7 +285,6 @@ static void pass0_set_dst_scalar( struct brw_wm_compile *c, } - static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c, struct prog_src_register src, GLuint i ) @@ -292,14 +294,13 @@ static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c, static const GLfloat const_zero = 0.0; static const GLfloat const_one = 1.0; - if (component == SWIZZLE_ZERO) src_ref = get_const_ref(c, &const_zero); else if (component == SWIZZLE_ONE) src_ref = get_const_ref(c, &const_one); else src_ref = pass0_get_reg(c, src.File, src.Index, component); - + return src_ref; } @@ -311,19 +312,19 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, { const struct brw_wm_ref *ref = get_fp_src_reg_ref(c, src, i); struct brw_wm_ref *newref = get_ref(c); - + newref->value = ref->value; newref->hw_reg = ref->hw_reg; - if (insn) { + if (insn) { newref->insn = insn - c->instruction; newref->prevuse = newref->value->lastuse; newref->value->lastuse = newref; } - if (src.NegateBase & (1<<i)) + if (src.Negate & (1 << i)) newref->hw_reg.negate ^= 1; - + if (src.Abs) { newref->hw_reg.negate = 0; newref->hw_reg.abs = 1; @@ -333,9 +334,9 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, } - -static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c, - const struct prog_instruction *inst ) +static void +translate_insn(struct brw_wm_compile *c, + const struct prog_instruction *inst) { struct brw_wm_instruction *out = get_instruction(c); GLuint writemask = inst->DstReg.WriteMask; @@ -348,8 +349,9 @@ static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c, out->saturate = (inst->SaturateMode != SATURATE_OFF); out->tex_unit = inst->TexSrcUnit; out->tex_idx = inst->TexSrcTarget; - out->eot = inst->Sampler & 1; - out->target = inst->Sampler>>1; + out->tex_shadow = inst->TexShadow; + out->eot = inst->Aux & 1; + out->target = inst->Aux >> 1; /* Args: */ @@ -365,8 +367,6 @@ static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c, pass0_set_dst_scalar(c, out, inst, writemask); else pass0_set_dst(c, out, inst, writemask); - - return out; } @@ -426,6 +426,7 @@ static void pass0_init_payload( struct brw_wm_compile *c ) &c->payload.input_interp[i] ); } + /*********************************************************************** * PASS 0 * @@ -448,7 +449,6 @@ void brw_wm_pass0( struct brw_wm_compile *c ) for (insn = 0; insn < c->nr_fp_insns; insn++) { const struct prog_instruction *inst = &c->prog_instructions[insn]; - /* Optimize away moves, otherwise emit translated instruction: */ switch (inst->Opcode) { @@ -461,8 +461,6 @@ void brw_wm_pass0( struct brw_wm_compile *c ) translate_insn(c, inst); } break; - - default: translate_insn(c, inst); break; @@ -473,4 +471,3 @@ void brw_wm_pass0( struct brw_wm_compile *c ) brw_wm_print_program(c, "pass0"); } } - diff --git a/i965/brw_wm_pass1.c b/i965/brw_wm_pass1.c index a1fea6f..ab9aa2f 100644 --- a/i965/brw_wm_pass1.c +++ b/i965/brw_wm_pass1.c @@ -58,7 +58,8 @@ static void unlink_ref(struct brw_wm_ref *ref) if (ref == value->lastuse) { value->lastuse = ref->prevuse; - } else { + } + else { struct brw_wm_ref *i = value->lastuse; while (i->prevuse != ref) i = i->prevuse; i->prevuse = ref->prevuse; @@ -75,8 +76,9 @@ static void track_arg(struct brw_wm_compile *c, for (i = 0; i < 4; i++) { struct brw_wm_ref *ref = inst->src[arg][i]; if (ref) { - if (readmask & (1<<i)) + if (readmask & (1<<i)) { ref->value->contributes_to_output = 1; + } else { unlink_ref(ref); inst->src[arg][i] = NULL; @@ -88,15 +90,21 @@ static void track_arg(struct brw_wm_compile *c, static GLuint get_texcoord_mask( GLuint tex_idx ) { switch (tex_idx) { - case TEXTURE_1D_INDEX: return WRITEMASK_X; - case TEXTURE_2D_INDEX: return WRITEMASK_XY; - case TEXTURE_3D_INDEX: return WRITEMASK_XYZ; - case TEXTURE_CUBE_INDEX: return WRITEMASK_XYZ; - case TEXTURE_RECT_INDEX: return WRITEMASK_XY; + case TEXTURE_1D_INDEX: + return WRITEMASK_X; + case TEXTURE_2D_INDEX: + return WRITEMASK_XY; + case TEXTURE_3D_INDEX: + return WRITEMASK_XYZ; + case TEXTURE_CUBE_INDEX: + return WRITEMASK_XYZ; + case TEXTURE_RECT_INDEX: + return WRITEMASK_XY; default: return 0; } } + /* Step two: Basically this is dead code elimination. * * Iterate backwards over instructions, noting which values @@ -202,9 +210,10 @@ void brw_wm_pass1( struct brw_wm_compile *c ) break; case OPCODE_TEX: + case OPCODE_TXP: read0 = get_texcoord_mask(inst->tex_idx); - if (c->key.shadowtex_mask & (1<<inst->tex_unit)) + if (inst->tex_shadow) read0 |= WRITEMASK_Z; break; @@ -259,7 +268,6 @@ void brw_wm_pass1( struct brw_wm_compile *c ) break; case OPCODE_DST: - case OPCODE_TXP: case WM_FRONTFACING: default: break; @@ -274,6 +282,3 @@ void brw_wm_pass1( struct brw_wm_compile *c ) brw_wm_print_program(c, "pass1"); } } - - - diff --git a/i965/brw_wm_pass2.c b/i965/brw_wm_pass2.c index 6fca9ad..6faea01 100644 --- a/i965/brw_wm_pass2.c +++ b/i965/brw_wm_pass2.c @@ -69,8 +69,6 @@ static void prealloc_reg(struct brw_wm_compile *c, */ static void init_registers( struct brw_wm_compile *c ) { - struct brw_context *brw = c->func.brw; - GLuint inputs = (brw->vs.prog_data->outputs_written & DO_SETUP_BITS); GLuint nr_interp_regs = 0; GLuint i = 0; GLuint j; @@ -84,18 +82,22 @@ static void init_registers( struct brw_wm_compile *c ) for (j = 0; j < c->nr_creg; j++) prealloc_reg(c, &c->creg[j], i++); - for (j = 0; j < FRAG_ATTRIB_MAX; j++) - if (inputs & (1<<j)) { - /* index for vs output and ps input are not the same - in shader varying */ - GLuint index; - if (j > FRAG_ATTRIB_VAR0) - index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); + for (j = 0; j < FRAG_ATTRIB_MAX; j++) { + if (c->key.vp_outputs_written & (1<<j)) { + int fp_index; + + if (j >= VERT_RESULT_VAR0) + fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); + else if (j <= VERT_RESULT_TEX7) + fp_index = j; else - index = j; + fp_index = -1; + nr_interp_regs++; - prealloc_reg(c, &c->payload.input_interp[index], i++); + if (fp_index >= 0) + prealloc_reg(c, &c->payload.input_interp[fp_index], i++); } + } assert(nr_interp_regs >= 1); @@ -120,7 +122,7 @@ static void update_register_usage(struct brw_wm_compile *c, /* Only search those which can change: */ if (grf->nextuse < thisinsn) { - struct brw_wm_ref *ref = grf->value->lastuse; + const struct brw_wm_ref *ref = grf->value->lastuse; /* Has last use of value been passed? */ @@ -148,7 +150,7 @@ static void spill_value(struct brw_wm_compile *c, /* Allocate a spill slot. Note that allocations start from 0x40 - * the first slot is reserved to mean "undef" in brw_wm_emit.c */ - if (!value->spill_slot) { + if (!value->spill_slot) { c->last_scratch += 0x40; value->spill_slot = c->last_scratch; } @@ -189,7 +191,7 @@ static GLuint search_contiguous_regs(struct brw_wm_compile *c, if (grf[i+j].nextuse < group_nextuse) group_nextuse = grf[i+j].nextuse; } - + if (group_nextuse > furthest) { furthest = group_nextuse; reg = i; @@ -197,7 +199,7 @@ static GLuint search_contiguous_regs(struct brw_wm_compile *c, } assert(furthest != thisinsn); - + /* Any non-empty regs will need to be spilled: */ for (j = 0; j < nr; j++) @@ -243,7 +245,7 @@ static void alloc_contiguous_dest(struct brw_wm_compile *c, static void load_args(struct brw_wm_compile *c, struct brw_wm_instruction *inst) -{ +{ GLuint thisinsn = inst - c->instruction; GLuint i,j; @@ -258,17 +260,17 @@ static void load_args(struct brw_wm_compile *c, * register allocation and mark the ref as requiring a fill. */ GLuint reg = search_contiguous_regs(c, 1, thisinsn); - + c->pass2_grf[reg].value = ref->value; c->pass2_grf[reg].nextuse = thisinsn; - + ref->value->resident = &c->pass2_grf[reg]; /* Note that a fill is required: */ ref->unspill_reg = reg*2; } - + /* Adjust the hw_reg to point at the value's current location: */ assert(ref->value == ref->value->resident->value); @@ -294,7 +296,7 @@ void brw_wm_pass2( struct brw_wm_compile *c ) for (insn = 0; insn < c->nr_insns; insn++) { struct brw_wm_instruction *inst = &c->instruction[insn]; - + /* Update registers' nextuse values: */ update_register_usage(c, insn); @@ -322,11 +324,11 @@ void brw_wm_pass2( struct brw_wm_compile *c ) break; } - if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY) + if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY) { for (i = 0; i < 4; i++) if (inst->dst[i]) spill_value(c, inst->dst[i]); - + } } if (INTEL_DEBUG & DEBUG_WM) { @@ -339,6 +341,3 @@ void brw_wm_pass2( struct brw_wm_compile *c ) brw_wm_print_program(c, "pass2/done"); } } - - - diff --git a/i965/brw_wm_sampler_state.c b/i965/brw_wm_sampler_state.c index 68a9296..3fc18ff 100644 --- a/i965/brw_wm_sampler_state.c +++ b/i965/brw_wm_sampler_state.c @@ -152,7 +152,7 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC; if (key->max_aniso > 2.0) { - sampler->ss3.max_aniso = MAX2((key->max_aniso - 2) / 2, + sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2, BRW_ANISORATIO_16); } } @@ -178,6 +178,16 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; } + else if (key->tex_target == GL_TEXTURE_1D) { + /* There's a bug in 1D texture sampling - it actually pays + * attention to the wrap_t value, though it should not. + * Override the wrap_t value here to GL_REPEAT to keep + * any nonexistent border pixels from floating in. + */ + sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); + sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); + sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; + } else { sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); @@ -217,6 +227,7 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */ } + /** Sets up the cache key for sampler state for all texture units */ static void brw_wm_sampler_populate_key(struct brw_context *brw, diff --git a/i965/brw_wm_state.c b/i965/brw_wm_state.c index 1844eba..67b4117 100644 --- a/i965/brw_wm_state.c +++ b/i965/brw_wm_state.c @@ -62,6 +62,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) { GLcontext *ctx = &brw->intel.ctx; const struct gl_fragment_program *fp = brw->fragment_program; + const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp; struct intel_context *intel = &brw->intel; memset(key, 0, sizeof(*key)); @@ -103,11 +104,14 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /* as far as we can tell */ key->computes_depth = - (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) != 0; + (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0; /* _NEW_COLOR */ key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; - key->is_glsl = brw_wm_is_glsl(fp); + key->is_glsl = bfp->isGLSL; + + /* temporary sanity check assertion */ + ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp)); /* _NEW_DEPTH */ key->stats_wm = intel->stats_wm; @@ -121,6 +125,9 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) key->offset_factor = ctx->Polygon.OffsetFactor; } +/** + * Setup wm hardware state. See page 225 of Volume 2 + */ static dri_bo * wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, dri_bo **reloc_bufs) @@ -138,7 +145,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, if (key->total_scratch != 0) { wm.thread2.scratch_space_base_pointer = - brw->wm.scratch_buffer->offset >> 10; /* reloc */ + brw->wm.scratch_bo->offset >> 10; /* reloc */ wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1; } else { wm.thread2.scratch_space_base_pointer = 0; @@ -147,9 +154,9 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg; wm.thread3.urb_entry_read_length = key->urb_entry_read_length; + wm.thread3.urb_entry_read_offset = 0; wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length; wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - wm.thread3.urb_entry_read_offset = 0; wm.wm4.sampler_count = (key->sampler_count + 1) / 4; if (brw->wm.sampler_bo != NULL) { @@ -216,7 +223,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, 0, 0, wm.thread2.per_thread_scratch_space, offsetof(struct brw_wm_unit_state, thread2), - brw->wm.scratch_buffer); + brw->wm.scratch_bo); } /* Emit sampler state relocation */ @@ -247,20 +254,20 @@ static void upload_wm_unit( struct brw_context *brw ) if (key.total_scratch) { GLuint total = key.total_scratch * key.max_threads; - if (brw->wm.scratch_buffer && total > brw->wm.scratch_buffer->size) { - dri_bo_unreference(brw->wm.scratch_buffer); - brw->wm.scratch_buffer = NULL; + if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) { + dri_bo_unreference(brw->wm.scratch_bo); + brw->wm.scratch_bo = NULL; } - if (brw->wm.scratch_buffer == NULL) { - brw->wm.scratch_buffer = dri_bo_alloc(intel->bufmgr, - "wm scratch", - total, - 4096); + if (brw->wm.scratch_bo == NULL) { + brw->wm.scratch_bo = dri_bo_alloc(intel->bufmgr, + "wm scratch", + total, + 4096); } } reloc_bufs[0] = brw->wm.prog_bo; - reloc_bufs[1] = brw->wm.scratch_buffer; + reloc_bufs[1] = brw->wm.scratch_bo; reloc_bufs[2] = brw->wm.sampler_bo; dri_bo_unreference(brw->wm.state_bo); @@ -283,7 +290,7 @@ const struct brw_tracked_state brw_wm_unit = { .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_CURBE_OFFSETS | - BRW_NEW_NR_SURFACES), + BRW_NEW_NR_WM_SURFACES), .cache = (CACHE_NEW_WM_PROG | CACHE_NEW_SAMPLER) diff --git a/i965/brw_wm_surface_state.c b/i965/brw_wm_surface_state.c index 3487b85..805df8a 100644 --- a/i965/brw_wm_surface_state.c +++ b/i965/brw_wm_surface_state.c @@ -33,11 +33,12 @@ #include "main/mtypes.h" #include "main/texformat.h" #include "main/texstore.h" +#include "shader/prog_parameter.h" #include "intel_mipmap_tree.h" #include "intel_batchbuffer.h" #include "intel_tex.h" - +#include "intel_fbo.h" #include "brw_context.h" #include "brw_state.h" @@ -69,7 +70,8 @@ static GLuint translate_tex_target( GLenum target ) } -static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) +static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, + GLenum depth_mode ) { switch( mesa_format ) { case MESA_FORMAT_L8: @@ -89,10 +91,16 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) return BRW_SURFACEFORMAT_R8G8B8_UNORM; case MESA_FORMAT_ARGB8888: - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + if (internal_format == GL_RGB) + return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; + else + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; case MESA_FORMAT_RGBA8888_REV: - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + if (internal_format == GL_RGB) + return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; + else + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; case MESA_FORMAT_RGB565: return BRW_SURFACEFORMAT_B5G6R5_UNORM; @@ -133,13 +141,34 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) case MESA_FORMAT_RGBA_DXT5: return BRW_SURFACEFORMAT_BC3_UNORM; - case MESA_FORMAT_SRGBA8: - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB; + case MESA_FORMAT_SARGB8: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB; + + case MESA_FORMAT_SLA8: + return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB; + + case MESA_FORMAT_SL8: + return BRW_SURFACEFORMAT_L8_UNORM_SRGB; + case MESA_FORMAT_SRGB_DXT1: return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; case MESA_FORMAT_S8_Z24: - return BRW_SURFACEFORMAT_I24X8_UNORM; + /* XXX: these different surface formats don't seem to + * make any difference for shadow sampler/compares. + */ + if (depth_mode == GL_INTENSITY) + return BRW_SURFACEFORMAT_I24X8_UNORM; + else if (depth_mode == GL_ALPHA) + return BRW_SURFACEFORMAT_A24X8_UNORM; + else + return BRW_SURFACEFORMAT_L24X8_UNORM; + + case MESA_FORMAT_DUDV8: + return BRW_SURFACEFORMAT_R8G8_SNORM; + + case MESA_FORMAT_SIGNED_RGBA8888_REV: + return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; default: assert(0); @@ -147,10 +176,14 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) } } -struct brw_wm_surface_key { + +/** + * Use same key for WM and VS surfaces. + */ +struct brw_surface_key { GLenum target, depthmode; dri_bo *bo; - GLint format; + GLint format, internal_format; GLint first_level, last_level; GLint width, height, depth; GLint pitch, cpp; @@ -158,6 +191,7 @@ struct brw_wm_surface_key { GLuint offset; }; + static void brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) { @@ -179,7 +213,7 @@ brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) static dri_bo * brw_create_texture_surface( struct brw_context *brw, - struct brw_wm_surface_key *key ) + struct brw_surface_key *key ) { struct brw_surface_state surf; dri_bo *bo; @@ -188,9 +222,11 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; surf.ss0.surface_type = translate_tex_target(key->target); - - if (key->bo) - surf.ss0.surface_format = translate_tex_format(key->format, key->depthmode); + if (key->bo) { + surf.ss0.surface_format = translate_tex_format(key->format, + key->internal_format, + key->depthmode); + } else { switch (key->depth) { case 32: @@ -256,7 +292,8 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; - struct brw_wm_surface_key key; + struct brw_surface_key key; + const GLuint surf = SURF_INDEX_TEXTURE(unit); memset(&key, 0, sizeof(key)); @@ -267,6 +304,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.offset = intelObj->textureOffset; } else { key.format = firstImage->TexFormat->MesaFormat; + key.internal_format = firstImage->InternalFormat; key.pitch = intelObj->mt->pitch; key.depth = firstImage->Depth; key.bo = intelObj->mt->region->buffer; @@ -282,34 +320,207 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.cpp = intelObj->mt->cpp; key.tiling = intelObj->mt->region->tiling; - dri_bo_unreference(brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS]); - brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); - if (brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] == NULL) { - brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_create_texture_surface(brw, &key); + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->wm.surf_bo[surf] == NULL) { + brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key); } } + + +/** + * Create the constant buffer surface. Vertex/fragment shader constants will be + * read from this buffer with Data Port Read instructions/messages. + */ +static dri_bo * +brw_create_constant_surface( struct brw_context *brw, + struct brw_surface_key *key ) +{ + const GLint w = key->width - 1; + struct brw_surface_state surf; + dri_bo *bo; + + memset(&surf, 0, sizeof(surf)); + + surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + surf.ss0.surface_type = BRW_SURFACE_BUFFER; + surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + + assert(key->bo); + if (key->bo) + surf.ss1.base_addr = key->bo->offset; /* reloc */ + else + surf.ss1.base_addr = key->offset; + + surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ + surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ + surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */ + surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */ + brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ + + bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + key, sizeof(*key), + &key->bo, key->bo ? 1 : 0, + &surf, sizeof(surf), + NULL, NULL); + + if (key->bo) { + /* Emit relocation to surface contents */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_SAMPLER, 0, + 0, + offsetof(struct brw_surface_state, ss1), + key->bo); + } + + return bo; +} + + +/** + * Update the surface state for a WM constant buffer. + * The constant buffer will be (re)allocated here if needed. + */ +static dri_bo * +brw_update_wm_constant_surface( GLcontext *ctx, + GLuint surf, + dri_bo *const_buffer, + const struct gl_program_parameter_list *params) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_surface_key key; + struct intel_context *intel = &brw->intel; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + + /* free old const buffer if too small */ + if (const_buffer && const_buffer->size < size) { + dri_bo_unreference(const_buffer); + const_buffer = NULL; + } + + /* alloc new buffer if needed */ + if (!const_buffer) { + const_buffer = + drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", size, 64); + } + + memset(&key, 0, sizeof(key)); + + key.format = MESA_FORMAT_RGBA_FLOAT32; + key.internal_format = GL_RGBA; + key.bo = const_buffer; + key.depthmode = GL_NONE; + key.pitch = params->NumParameters; + key.width = params->NumParameters; + key.height = 1; + key.depth = 1; + key.cpp = 16; + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->wm.surf_bo[surf] == NULL) { + brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key); + } + + return const_buffer; +} + + +/** + * Update the surface state for a VS constant buffer. + * The constant buffer will be (re)allocated here if needed. + */ +static dri_bo * +brw_update_vs_constant_surface( GLcontext *ctx, + GLuint surf, + dri_bo *const_buffer, + const struct gl_program_parameter_list *params) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_surface_key key; + struct intel_context *intel = &brw->intel; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + + assert(surf == 0); + + /* free old const buffer if too small */ + if (const_buffer && const_buffer->size < size) { + dri_bo_unreference(const_buffer); + const_buffer = NULL; + } + + /* alloc new buffer if needed */ + if (!const_buffer) { + const_buffer = + drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64); + } + + memset(&key, 0, sizeof(key)); + + key.format = MESA_FORMAT_RGBA_FLOAT32; + key.internal_format = GL_RGBA; + key.bo = const_buffer; + key.depthmode = GL_NONE; + key.pitch = params->NumParameters; + key.width = params->NumParameters; + key.height = 1; + key.depth = 1; + key.cpp = 16; + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + dri_bo_unreference(brw->vs.surf_bo[surf]); + brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->vs.surf_bo[surf] == NULL) { + brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); + } + + return const_buffer; +} + + /** * Sets up a surface state structure to point at the given region. * While it is only used for the front/back buffer currently, it should be * usable for further buffers when doing ARB_draw_buffer support. */ static void -brw_update_region_surface(struct brw_context *brw, struct intel_region *region, - unsigned int unit, GLboolean cached) +brw_update_renderbuffer_surface(struct brw_context *brw, + struct gl_renderbuffer *rb, + unsigned int unit, GLboolean cached) { GLcontext *ctx = &brw->intel.ctx; dri_bo *region_bo = NULL; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + struct intel_region *region = irb ? irb->region : NULL; struct { unsigned int surface_type; unsigned int surface_format; - unsigned int width, height, cpp; + unsigned int width, height, pitch, cpp; GLubyte color_mask[4]; GLboolean color_blend; uint32_t tiling; + uint32_t draw_offset; } key; memset(&key, 0, sizeof(key)); @@ -318,14 +529,29 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, region_bo = region->buffer; key.surface_type = BRW_SURFACE_2D; - if (region->cpp == 4) + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_ARGB8888: key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - else + break; + case MESA_FORMAT_RGB565: key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; + break; + case MESA_FORMAT_ARGB1555: + key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM; + break; + case MESA_FORMAT_ARGB4444: + key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM; + break; + default: + _mesa_problem(ctx, "Bad renderbuffer format: %d\n", + irb->texformat->MesaFormat); + } key.tiling = region->tiling; - key.width = region->pitch; /* XXX: not really! */ + key.width = region->width; key.height = region->height; + key.pitch = region->pitch; key.cpp = region->cpp; + key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */ } else { key.surface_type = BRW_SURFACE_NULL; key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; @@ -333,6 +559,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, key.width = 1; key.height = 1; key.cpp = 4; + key.draw_offset = 0; } memcpy(key.color_mask, ctx->Color.ColorMask, sizeof(key.color_mask)); @@ -354,13 +581,14 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, surf.ss0.surface_format = key.surface_format; surf.ss0.surface_type = key.surface_type; + surf.ss1.base_addr = key.draw_offset; if (region_bo != NULL) - surf.ss1.base_addr = region_bo->offset; /* reloc */ + surf.ss1.base_addr += region_bo->offset; /* reloc */ surf.ss2.width = key.width - 1; surf.ss2.height = key.height - 1; brw_set_surface_tiling(&surf, key.tiling); - surf.ss3.pitch = (key.width * key.cpp) - 1; + surf.ss3.pitch = (key.pitch * key.cpp) - 1; /* _NEW_COLOR */ surf.ss0.color_blend = key.color_blend; @@ -371,7 +599,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, /* Key size will never match key size for textures, so we're safe. */ brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), + &key, sizeof(key), ®ion_bo, 1, &surf, sizeof(surf), NULL, NULL); @@ -380,12 +608,12 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, * them both. We might be able to figure out from other state * a more restrictive relocation to emit. */ - dri_bo_emit_reloc(brw->wm.surf_bo[unit], - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER, - 0, - offsetof(struct brw_surface_state, ss1), - region_bo); + drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit], + offsetof(struct brw_surface_state, ss1), + region_bo, + key.draw_offset, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER); } } } @@ -400,6 +628,8 @@ brw_wm_get_binding_table(struct brw_context *brw) { dri_bo *bind_bo; + assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); + bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, @@ -446,54 +676,159 @@ static void prepare_wm_surfaces(struct brw_context *brw ) GLuint i; int old_nr_surfaces; - if (brw->state.nr_draw_regions > 1) { - for (i = 0; i < brw->state.nr_draw_regions; i++) { - brw_update_region_surface(brw, brw->state.draw_regions[i], i, - GL_FALSE); + /* _NEW_BUFFERS */ + /* Update surfaces for drawing buffers */ + if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) { + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + brw_update_renderbuffer_surface(brw, + ctx->DrawBuffer->_ColorDrawBuffers[i], + i, + GL_FALSE); } - }else { - brw_update_region_surface(brw, brw->state.draw_regions[0], 0, GL_TRUE); + } else { + brw_update_renderbuffer_surface(brw, NULL, 0, GL_TRUE); } old_nr_surfaces = brw->wm.nr_surfaces; brw->wm.nr_surfaces = MAX_DRAW_BUFFERS; + /* Update surface / buffer for fragment shader constant buffer */ + { + const GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + fp->const_buffer = + brw_update_wm_constant_surface(ctx, surf, fp->const_buffer, + fp->program.Base.Parameters); + + brw->wm.nr_surfaces = surf + 1; + } + + /* Update surfaces for textures */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; + const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; + const GLuint surf = SURF_INDEX_TEXTURE(i); /* _NEW_TEXTURE, BRW_NEW_TEXDATA */ - if(texUnit->_ReallyEnabled) { + if (texUnit->_ReallyEnabled) { if (texUnit->_Current == intel->frame_buffer_texobj) { - dri_bo_unreference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); - brw->wm.surf_bo[i+MAX_DRAW_BUFFERS] = brw->wm.surf_bo[0]; - dri_bo_reference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); - brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1; + /* render to texture */ + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = brw->wm.surf_bo[0]; + dri_bo_reference(brw->wm.surf_bo[surf]); + brw->wm.nr_surfaces = surf + 1; } else { + /* regular texture */ brw_update_texture_surface(ctx, i); - brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1; + brw->wm.nr_surfaces = surf + 1; } } else { - dri_bo_unreference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); - brw->wm.surf_bo[i+MAX_DRAW_BUFFERS] = NULL; + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; } - } dri_bo_unreference(brw->wm.bind_bo); brw->wm.bind_bo = brw_wm_get_binding_table(brw); if (brw->wm.nr_surfaces != old_nr_surfaces) - brw->state.dirty.brw |= BRW_NEW_NR_SURFACES; + brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; +} + + +/** + * Constructs the binding table for the VS surface state. + */ +static dri_bo * +brw_vs_get_binding_table(struct brw_context *brw) +{ + dri_bo *bind_bo; + + assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF); + + bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, brw->vs.nr_surfaces, + NULL); + + if (bind_bo == NULL) { + GLuint data_size = brw->vs.nr_surfaces * sizeof(GLuint); + uint32_t *data = malloc(data_size); + int i; + + for (i = 0; i < brw->vs.nr_surfaces; i++) + if (brw->vs.surf_bo[i]) + data[i] = brw->vs.surf_bo[i]->offset; + else + data[i] = 0; + + bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, brw->vs.nr_surfaces, + data, data_size, + NULL, NULL); + + /* Emit binding table relocations to surface state */ + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + if (brw->vs.surf_bo[i] != NULL) { + dri_bo_emit_reloc(bind_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0, + i * sizeof(GLuint), + brw->vs.surf_bo[i]); + } + } + + free(data); + } + + return bind_bo; +} + + +/** + * Vertex shader surfaces. Just constant buffer for now. Could add vertex + * shader textures in the future. + */ +static void prepare_vs_surfaces(struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + + /* Update surface / buffer for vertex shader constant buffer */ + { + const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER; + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + vp->const_buffer = + brw_update_vs_constant_surface(ctx, surf, vp->const_buffer, + vp->program.Base.Parameters); + + brw->vs.nr_surfaces = 1; + } + + dri_bo_unreference(brw->vs.bind_bo); + brw->vs.bind_bo = brw_vs_get_binding_table(brw); + + if (1) + brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; +} + + +static void +prepare_surfaces(struct brw_context *brw) +{ + prepare_wm_surfaces(brw); + prepare_vs_surfaces(brw); } const struct brw_tracked_state brw_wm_surfaces = { .dirty = { - .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS, + .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM, .brw = BRW_NEW_CONTEXT, .cache = 0 }, - .prepare = prepare_wm_surfaces, + .prepare = prepare_surfaces, }; diff --git a/i965/intel_state.c b/i965/intel_state.c deleted file mode 100644 index 0c9c670..0000000 --- a/i965/intel_state.c +++ /dev/null @@ -1,233 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "main/glheader.h" -#include "main/context.h" -#include "main/macros.h" -#include "main/enums.h" -#include "main/colormac.h" -#include "main/dd.h" - -#include "intel_screen.h" -#include "intel_context.h" -#include "intel_regions.h" -#include "swrast/swrast.h" - -int -intel_translate_shadow_compare_func( GLenum func ) -{ - switch(func) { - case GL_NEVER: - return COMPAREFUNC_ALWAYS; - case GL_LESS: - return COMPAREFUNC_LEQUAL; - case GL_LEQUAL: - return COMPAREFUNC_LESS; - case GL_GREATER: - return COMPAREFUNC_GEQUAL; - case GL_GEQUAL: - return COMPAREFUNC_GREATER; - case GL_NOTEQUAL: - return COMPAREFUNC_EQUAL; - case GL_EQUAL: - return COMPAREFUNC_NOTEQUAL; - case GL_ALWAYS: - return COMPAREFUNC_NEVER; - } - - fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func); - return COMPAREFUNC_NEVER; -} - -int -intel_translate_compare_func( GLenum func ) -{ - switch(func) { - case GL_NEVER: - return COMPAREFUNC_NEVER; - case GL_LESS: - return COMPAREFUNC_LESS; - case GL_LEQUAL: - return COMPAREFUNC_LEQUAL; - case GL_GREATER: - return COMPAREFUNC_GREATER; - case GL_GEQUAL: - return COMPAREFUNC_GEQUAL; - case GL_NOTEQUAL: - return COMPAREFUNC_NOTEQUAL; - case GL_EQUAL: - return COMPAREFUNC_EQUAL; - case GL_ALWAYS: - return COMPAREFUNC_ALWAYS; - } - - fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func); - return COMPAREFUNC_ALWAYS; -} - -int -intel_translate_stencil_op( GLenum op ) -{ - switch(op) { - case GL_KEEP: - return STENCILOP_KEEP; - case GL_ZERO: - return STENCILOP_ZERO; - case GL_REPLACE: - return STENCILOP_REPLACE; - case GL_INCR: - return STENCILOP_INCRSAT; - case GL_DECR: - return STENCILOP_DECRSAT; - case GL_INCR_WRAP: - return STENCILOP_INCR; - case GL_DECR_WRAP: - return STENCILOP_DECR; - case GL_INVERT: - return STENCILOP_INVERT; - default: - return STENCILOP_ZERO; - } -} - -int -intel_translate_blend_factor( GLenum factor ) -{ - switch(factor) { - case GL_ZERO: - return BLENDFACT_ZERO; - case GL_SRC_ALPHA: - return BLENDFACT_SRC_ALPHA; - case GL_ONE: - return BLENDFACT_ONE; - case GL_SRC_COLOR: - return BLENDFACT_SRC_COLR; - case GL_ONE_MINUS_SRC_COLOR: - return BLENDFACT_INV_SRC_COLR; - case GL_DST_COLOR: - return BLENDFACT_DST_COLR; - case GL_ONE_MINUS_DST_COLOR: - return BLENDFACT_INV_DST_COLR; - case GL_ONE_MINUS_SRC_ALPHA: - return BLENDFACT_INV_SRC_ALPHA; - case GL_DST_ALPHA: - return BLENDFACT_DST_ALPHA; - case GL_ONE_MINUS_DST_ALPHA: - return BLENDFACT_INV_DST_ALPHA; - case GL_SRC_ALPHA_SATURATE: - return BLENDFACT_SRC_ALPHA_SATURATE; - case GL_CONSTANT_COLOR: - return BLENDFACT_CONST_COLOR; - case GL_ONE_MINUS_CONSTANT_COLOR: - return BLENDFACT_INV_CONST_COLOR; - case GL_CONSTANT_ALPHA: - return BLENDFACT_CONST_ALPHA; - case GL_ONE_MINUS_CONSTANT_ALPHA: - return BLENDFACT_INV_CONST_ALPHA; - } - - fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, factor); - return BLENDFACT_ZERO; -} - -int -intel_translate_logic_op( GLenum opcode ) -{ - switch(opcode) { - case GL_CLEAR: - return LOGICOP_CLEAR; - case GL_AND: - return LOGICOP_AND; - case GL_AND_REVERSE: - return LOGICOP_AND_RVRSE; - case GL_COPY: - return LOGICOP_COPY; - case GL_COPY_INVERTED: - return LOGICOP_COPY_INV; - case GL_AND_INVERTED: - return LOGICOP_AND_INV; - case GL_NOOP: - return LOGICOP_NOOP; - case GL_XOR: - return LOGICOP_XOR; - case GL_OR: - return LOGICOP_OR; - case GL_OR_INVERTED: - return LOGICOP_OR_INV; - case GL_NOR: - return LOGICOP_NOR; - case GL_EQUIV: - return LOGICOP_EQUIV; - case GL_INVERT: - return LOGICOP_INV; - case GL_OR_REVERSE: - return LOGICOP_OR_RVRSE; - case GL_NAND: - return LOGICOP_NAND; - case GL_SET: - return LOGICOP_SET; - default: - return LOGICOP_SET; - } -} - - -static void -intelClearColor(GLcontext *ctx, const GLfloat color[4]) -{ - struct intel_context *intel = intel_context(ctx); - GLubyte clear[4]; - - CLAMPED_FLOAT_TO_UBYTE(clear[0], color[0]); - CLAMPED_FLOAT_TO_UBYTE(clear[1], color[1]); - CLAMPED_FLOAT_TO_UBYTE(clear[2], color[2]); - CLAMPED_FLOAT_TO_UBYTE(clear[3], color[3]); - - /* compute both 32 and 16-bit clear values */ - intel->ClearColor8888 = INTEL_PACKCOLOR8888(clear[0], clear[1], - clear[2], clear[3]); - intel->ClearColor565 = INTEL_PACKCOLOR565(clear[0], clear[1], clear[2]); -} - - -/* Fallback to swrast for select and feedback. - */ -static void -intelRenderMode( GLcontext *ctx, GLenum mode ) -{ - struct intel_context *intel = intel_context(ctx); - FALLBACK( intel, INTEL_FALLBACK_RENDERMODE, (mode != GL_RENDER) ); -} - - -void -intelInitStateFuncs( struct dd_function_table *functions ) -{ - functions->RenderMode = intelRenderMode; - functions->ClearColor = intelClearColor; -} diff --git a/shared/intel_batchbuffer.c b/shared/intel_batchbuffer.c index 9d99372..29dc05c 100644 --- a/shared/intel_batchbuffer.c +++ b/shared/intel_batchbuffer.c @@ -207,7 +207,7 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, used); /* Emit a flush if the bufmgr doesn't do it for us. */ - if (!intel->ttm) { + if (intel->always_flush_cache || !intel->ttm) { *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd(); batch->ptr += 4; used = batch->ptr - batch->map; diff --git a/shared/intel_blit.c b/shared/intel_blit.c index e160957..4919828 100644 --- a/shared/intel_blit.c +++ b/shared/intel_blit.c @@ -32,6 +32,8 @@ #include "main/mtypes.h" #include "main/context.h" #include "main/enums.h" +#include "main/texformat.h" +#include "main/colormac.h" #include "intel_blit.h" #include "intel_buffers.h" @@ -98,11 +100,11 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, ASSERT(src->cpp == dst->cpp); if (cpp == 2) { - BR13 = (0xCC << 16) | (1 << 24); + BR13 = (0xCC << 16) | BR13_565; CMD = XY_SRC_COPY_BLT_CMD; } else { - BR13 = (0xCC << 16) | (1 << 24) | (1 << 25); + BR13 = (0xCC << 16) | BR13_8888; CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; } @@ -194,13 +196,15 @@ intelEmitFillBlit(struct intel_context *intel, switch (cpp) { case 1: + BR13 = (0xF0 << 16); + CMD = XY_COLOR_BLT_CMD; + break; case 2: - case 3: - BR13 = (0xF0 << 16) | (1 << 24); + BR13 = (0xF0 << 16) | BR13_565; CMD = XY_COLOR_BLT_CMD; break; case 4: - BR13 = (0xF0 << 16) | (1 << 24) | (1 << 25); + BR13 = (0xF0 << 16) | BR13_8888; CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; break; default: @@ -335,12 +339,11 @@ intelEmitCopyBlit(struct intel_context *intel, CMD = XY_SRC_COPY_BLT_CMD; break; case 2: - case 3: - BR13 |= (1 << 24); + BR13 |= BR13_565; CMD = XY_SRC_COPY_BLT_CMD; break; case 4: - BR13 |= (1 << 24) | (1 << 25); + BR13 |= BR13_8888; CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; break; default: @@ -483,10 +486,9 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) const GLbitfield bufBit = 1 << buf; if ((clearMask & bufBit) && !(bufBit & skipBuffers)) { /* OK, clear this renderbuffer */ - struct intel_region *irb_region = - intel_get_rb_region(fb, buf); + struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, buf); dri_bo *write_buffer = - intel_region_buffer(intel, irb_region, + intel_region_buffer(intel, irb->region, all ? INTEL_WRITE_FULL : INTEL_WRITE_PART); @@ -494,15 +496,13 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) GLint pitch, cpp; GLuint BR13, CMD; - ASSERT(irb_region); - - pitch = irb_region->pitch; - cpp = irb_region->cpp; + pitch = irb->region->pitch; + cpp = irb->region->cpp; DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __FUNCTION__, - irb_region->buffer, (pitch * cpp), - irb_region->draw_offset, + irb->region->buffer, (pitch * cpp), + irb->region->draw_offset, b.x1, b.y1, b.x2 - b.x1, b.y2 - b.y1); BR13 = 0xf0 << 16; @@ -510,7 +510,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) /* Setup the blit command */ if (cpp == 4) { - BR13 |= (1 << 24) | (1 << 25); + BR13 |= BR13_8888; if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) { if (clearMask & BUFFER_BIT_DEPTH) CMD |= XY_BLT_WRITE_RGB; @@ -523,12 +523,12 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) } } else { - ASSERT(cpp == 2 || cpp == 0); - BR13 |= (1 << 24); + ASSERT(cpp == 2); + BR13 |= BR13_565; } #ifndef I915 - if (irb_region->tiling != I915_TILING_NONE) { + if (irb->region->tiling != I915_TILING_NONE) { CMD |= XY_DST_TILED; pitch /= 4; } @@ -539,9 +539,36 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) clearVal = clear_depth; } else { - clearVal = (cpp == 4) - ? intel->ClearColor8888 : intel->ClearColor565; - } + uint8_t clear[4]; + GLclampf *color = ctx->Color.ClearColor; + + CLAMPED_FLOAT_TO_UBYTE(clear[0], color[0]); + CLAMPED_FLOAT_TO_UBYTE(clear[1], color[1]); + CLAMPED_FLOAT_TO_UBYTE(clear[2], color[2]); + CLAMPED_FLOAT_TO_UBYTE(clear[3], color[3]); + + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_ARGB8888: + clearVal = intel->ClearColor8888; + break; + case MESA_FORMAT_RGB565: + clearVal = intel->ClearColor565; + break; + case MESA_FORMAT_ARGB4444: + clearVal = PACK_COLOR_4444(clear[3], clear[0], + clear[1], clear[2]); + break; + case MESA_FORMAT_ARGB1555: + clearVal = PACK_COLOR_1555(clear[3], clear[0], + clear[1], clear[2]); + break; + default: + _mesa_problem(ctx, "Unexpected renderbuffer format: %d\n", + irb->texformat->MesaFormat); + clearVal = 0; + } + } + /* _mesa_debug(ctx, "hardware blit clear buf %d rb id %d\n", buf, irb->Base.Name); @@ -557,14 +584,13 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) OUT_BATCH((b.y2 << 16) | b.x2); OUT_RELOC(write_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - irb_region->draw_offset); + irb->region->draw_offset); OUT_BATCH(clearVal); ADVANCE_BATCH(); clearMask &= ~bufBit; /* turn off bit, for faster loop exit */ } } } - intel_batchbuffer_emit_mi_flush(intel->batch); } UNLOCK_HARDWARE(intel); diff --git a/shared/intel_buffer_objects.c b/shared/intel_buffer_objects.c index 60d7bb3..2e6b778 100644 --- a/shared/intel_buffer_objects.c +++ b/shared/intel_buffer_objects.c @@ -35,9 +35,6 @@ #include "intel_batchbuffer.h" #include "intel_regions.h" -static GLboolean intel_bufferobj_unmap(GLcontext * ctx, - GLenum target, - struct gl_buffer_object *obj); /** Allocates a new dri_bo to store the data for the buffer object. */ static void @@ -103,13 +100,9 @@ intel_bufferobj_free(GLcontext * ctx, struct gl_buffer_object *obj) struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); + assert(!obj->Pointer); /* Mesa should have unmapped it */ - /* Buffer objects are automatically unmapped when deleting according - * to the spec. - */ - if (obj->Pointer) - intel_bufferobj_unmap(ctx, 0, obj); - + _mesa_free(intel_obj->sys_buffer); if (intel_obj->region) { intel_bufferobj_release_region(intel, intel_obj); } @@ -141,11 +134,7 @@ intel_bufferobj_data(GLcontext * ctx, intel_obj->Base.Size = size; intel_obj->Base.Usage = usage; - /* Buffer objects are automatically unmapped when creating new data buffers - * according to the spec. - */ - if (obj->Pointer) - intel_bufferobj_unmap(ctx, 0, obj); + assert(!obj->Pointer); /* Mesa should have unmapped it */ if (intel_obj->region) intel_bufferobj_release_region(intel, intel_obj); @@ -154,7 +143,23 @@ intel_bufferobj_data(GLcontext * ctx, dri_bo_unreference(intel_obj->buffer); intel_obj->buffer = NULL; } + _mesa_free(intel_obj->sys_buffer); + intel_obj->sys_buffer = NULL; + if (size != 0) { +#ifdef I915 + /* On pre-965, stick VBOs in system memory, as we're always doing swtnl + * with their contents anyway. + */ + if (target == GL_ARRAY_BUFFER || target == GL_ELEMENT_ARRAY_BUFFER) { + intel_obj->sys_buffer = _mesa_malloc(size); + if (intel_obj->sys_buffer != NULL) { + if (data != NULL) + memcpy(intel_obj->sys_buffer, data, size); + return; + } + } +#endif intel_bufferobj_alloc_buffer(intel, intel_obj); if (data != NULL) @@ -184,7 +189,10 @@ intel_bufferobj_subdata(GLcontext * ctx, if (intel_obj->region) intel_bufferobj_cow(intel, intel_obj); - dri_bo_subdata(intel_obj->buffer, offset, size, data); + if (intel_obj->sys_buffer) + memcpy((char *)intel_obj->sys_buffer + offset, data, size); + else + dri_bo_subdata(intel_obj->buffer, offset, size, data); } @@ -216,11 +224,16 @@ intel_bufferobj_map(GLcontext * ctx, { struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + GLboolean read_only = (access == GL_READ_ONLY_ARB); + GLboolean write_only = (access == GL_WRITE_ONLY_ARB); - /* XXX: Translate access to flags arg below: - */ assert(intel_obj); + if (intel_obj->sys_buffer) { + obj->Pointer = intel_obj->sys_buffer; + return obj->Pointer; + } + if (intel_obj->region) intel_bufferobj_cow(intel, intel_obj); @@ -229,7 +242,14 @@ intel_bufferobj_map(GLcontext * ctx, return NULL; } - dri_bo_map(intel_obj->buffer, GL_TRUE); + if (write_only && intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(intel_obj->buffer); + intel_obj->mapped_gtt = GL_TRUE; + } else { + drm_intel_bo_map(intel_obj->buffer, !read_only); + intel_obj->mapped_gtt = GL_FALSE; + } + obj->Pointer = intel_obj->buffer->virtual; return obj->Pointer; } @@ -245,9 +265,16 @@ intel_bufferobj_unmap(GLcontext * ctx, struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); - if (intel_obj->buffer != NULL) { + if (intel_obj->sys_buffer != NULL) { assert(obj->Pointer); - dri_bo_unmap(intel_obj->buffer); + obj->Pointer = NULL; + } else if (intel_obj->buffer != NULL) { + assert(obj->Pointer); + if (intel_obj->mapped_gtt) { + drm_intel_gem_bo_unmap_gtt(intel_obj->buffer); + } else { + drm_intel_bo_unmap(intel_obj->buffer); + } obj->Pointer = NULL; } return GL_TRUE; @@ -266,6 +293,18 @@ intel_bufferobj_buffer(struct intel_context *intel, } } + if (intel_obj->buffer == NULL) { + intel_bufferobj_alloc_buffer(intel, intel_obj); + intel_bufferobj_subdata(&intel->ctx, + GL_ARRAY_BUFFER_ARB, + 0, + intel_obj->Base.Size, + intel_obj->sys_buffer, + &intel_obj->Base); + _mesa_free(intel_obj->sys_buffer); + intel_obj->sys_buffer = NULL; + } + return intel_obj->buffer; } diff --git a/shared/intel_buffer_objects.h b/shared/intel_buffer_objects.h index bf6dbd5..0431015 100644 --- a/shared/intel_buffer_objects.h +++ b/shared/intel_buffer_objects.h @@ -42,10 +42,13 @@ struct intel_buffer_object { struct gl_buffer_object Base; dri_bo *buffer; /* the low-level buffer manager's buffer handle */ + /** System memory buffer data, if not using a BO to store the data. */ + void *sys_buffer; struct intel_region *region; /* Is there a zero-copy texture associated with this (pixel) buffer object? */ + GLboolean mapped_gtt; }; diff --git a/shared/intel_buffers.c b/shared/intel_buffers.c index f1908cb..d2fad9e 100644 --- a/shared/intel_buffers.c +++ b/shared/intel_buffers.c @@ -154,7 +154,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) return; } - /* Do this here, note core Mesa, since this function is called from + /* Do this here, not core Mesa, since this function is called from * many places within the driver. */ if (ctx->NewState & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { @@ -172,9 +172,6 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) return; } - if (fb->Name) - intel_validate_paired_depth_stencil(ctx, fb); - /* * How many color buffers are we drawing into? */ @@ -182,7 +179,8 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) /* writing to 0 */ colorRegions[0] = NULL; intel->constant_cliprect = GL_TRUE; - } else if (fb->_NumColorDrawBuffers > 1) { + } + else if (fb->_NumColorDrawBuffers > 1) { int i; struct intel_renderbuffer *irb; @@ -204,6 +202,8 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) intel_batchbuffer_flush(intel->batch); intel->front_cliprects = GL_TRUE; colorRegions[0] = intel_get_rb_region(fb, BUFFER_FRONT_LEFT); + + intel->front_buffer_dirty = GL_TRUE; } else { if (!intel->constant_cliprect && intel->front_cliprects) @@ -221,14 +221,6 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) } } - /* Update culling direction which changes depending on the - * orientation of the buffer: - */ - if (ctx->Driver.FrontFace) - ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace); - else - ctx->NewState |= _NEW_POLYGON; - if (!colorRegions[0]) { FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE); } @@ -260,50 +252,43 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) /*** *** Stencil buffer *** This can only be hardware accelerated if we're using a - *** combined DEPTH_STENCIL buffer (for now anyway). + *** combined DEPTH_STENCIL buffer. ***/ if (fb->_StencilBuffer && fb->_StencilBuffer->Wrapped) { irbStencil = intel_renderbuffer(fb->_StencilBuffer->Wrapped); if (irbStencil && irbStencil->region) { ASSERT(irbStencil->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE); - /* need to re-compute stencil hw state */ - if (ctx->Driver.Enable != NULL) - ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled); - else - ctx->NewState |= _NEW_STENCIL; - if (!depthRegion) - depthRegion = irbStencil->region; } else { FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_TRUE); } } else { - /* XXX FBO: instead of FALSE, pass ctx->Stencil.Enabled ??? */ + /* XXX FBO: instead of FALSE, pass ctx->Stencil._Enabled ??? */ FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE); - /* need to re-compute stencil hw state */ - if (ctx->Driver.Enable != NULL) - ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled); - else - ctx->NewState |= _NEW_STENCIL; } /* - * Update depth test state + * Update depth and stencil test state */ if (ctx->Driver.Enable) { - if (ctx->Depth.Test && fb->Visual.depthBits > 0) { - ctx->Driver.Enable(ctx, GL_DEPTH_TEST, GL_TRUE); - } else { - ctx->Driver.Enable(ctx, GL_DEPTH_TEST, GL_FALSE); - } - } else { - ctx->NewState |= _NEW_DEPTH; + ctx->Driver.Enable(ctx, GL_DEPTH_TEST, + (ctx->Depth.Test && fb->Visual.depthBits > 0)); + ctx->Driver.Enable(ctx, GL_STENCIL_TEST, + (ctx->Stencil.Enabled && fb->Visual.stencilBits > 0)); + } + else { + /* Mesa's Stencil._Enabled field is updated when + * _NEW_BUFFERS | _NEW_STENCIL, but i965 code assumes that the value + * only changes with _NEW_STENCIL (which seems sensible). So flag it + * here since this is the _NEW_BUFFERS path. + */ + ctx->NewState |= (_NEW_DEPTH | _NEW_STENCIL); } intel->vtbl.set_draw_region(intel, colorRegions, depthRegion, - fb->_NumColorDrawBuffers); + fb->_NumColorDrawBuffers); /* update viewport since it depends on window size */ #ifdef I915 @@ -322,12 +307,37 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) ctx->Driver.DepthRange(ctx, ctx->Viewport.Near, ctx->Viewport.Far); + + /* Update culling direction which changes depending on the + * orientation of the buffer: + */ + if (ctx->Driver.FrontFace) + ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace); + else + ctx->NewState |= _NEW_POLYGON; } static void intelDrawBuffer(GLcontext * ctx, GLenum mode) { + if ((ctx->DrawBuffer != NULL) && (ctx->DrawBuffer->Name == 0)) { + struct intel_context *const intel = intel_context(ctx); + const GLboolean was_front_buffer_rendering = + intel->is_front_buffer_rendering; + + intel->is_front_buffer_rendering = (mode == GL_FRONT_LEFT) + || (mode == GL_FRONT); + + /* If we weren't front-buffer rendering before but we are now, make sure + * that the front-buffer has actually been allocated. + */ + if (!was_front_buffer_rendering && intel->is_front_buffer_rendering) { + intel_update_renderbuffers(intel->driContext, + intel->driContext->driDrawablePriv); + } + } + intel_draw_buffer(ctx, ctx->DrawBuffer); } diff --git a/shared/intel_clear.c b/shared/intel_clear.c index c3ba50f..19f4763 100644 --- a/shared/intel_clear.c +++ b/shared/intel_clear.c @@ -30,6 +30,7 @@ #include "main/enums.h" #include "main/image.h" #include "main/mtypes.h" +#include "main/arrayobj.h" #include "main/attrib.h" #include "main/blend.h" #include "main/bufferobj.h" @@ -38,6 +39,7 @@ #include "main/enable.h" #include "main/macros.h" #include "main/matrix.h" +#include "main/polygon.h" #include "main/texstate.h" #include "main/shaders.h" #include "main/stencil.h" @@ -65,6 +67,45 @@ BUFFER_BIT_COLOR6 | \ BUFFER_BIT_COLOR7) + +/** + * Per-context one-time init of things for intl_clear_tris(). + * Basically set up a private array object for vertex/color arrays. + */ +static void +init_clear(GLcontext *ctx) +{ + struct intel_context *intel = intel_context(ctx); + struct gl_array_object *arraySave = NULL; + const GLuint arrayBuffer = ctx->Array.ArrayBufferObj->Name; + const GLuint elementBuffer = ctx->Array.ElementArrayBufferObj->Name; + + /* create new array object */ + intel->clear.arrayObj = _mesa_new_array_object(ctx, ~0); + + /* save current array object, bind new one */ + _mesa_reference_array_object(ctx, &arraySave, ctx->Array.ArrayObj); + _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, intel->clear.arrayObj); + + /* one-time setup of vertex arrays (pos, color) */ + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0); + _mesa_BindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, 0); + _mesa_ColorPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), intel->clear.color); + _mesa_VertexPointer(3, GL_FLOAT, 3 * sizeof(GLfloat), intel->clear.vertices); + _mesa_Enable(GL_COLOR_ARRAY); + _mesa_Enable(GL_VERTEX_ARRAY); + + /* restore original array object */ + _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, arraySave); + _mesa_reference_array_object(ctx, &arraySave, NULL); + + /* restore original buffer objects */ + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, arrayBuffer); + _mesa_BindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, elementBuffer); +} + + + /** * Perform glClear where mask contains only color, depth, and/or stencil. * @@ -77,14 +118,16 @@ void intel_clear_tris(GLcontext *ctx, GLbitfield mask) { struct intel_context *intel = intel_context(ctx); - GLfloat vertices[4][3]; - GLfloat color[4][4]; GLfloat dst_z; struct gl_framebuffer *fb = ctx->DrawBuffer; int i; GLboolean saved_fp_enable = GL_FALSE, saved_vp_enable = GL_FALSE; GLuint saved_shader_program = 0; unsigned int saved_active_texture; + struct gl_array_object *arraySave = NULL; + + if (!intel->clear.arrayObj) + init_clear(ctx); assert((mask & ~(TRI_CLEAR_COLOR_BITS | BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL)) == 0); @@ -93,10 +136,10 @@ intel_clear_tris(GLcontext *ctx, GLbitfield mask) GL_CURRENT_BIT | GL_DEPTH_BUFFER_BIT | GL_ENABLE_BIT | + GL_POLYGON_BIT | GL_STENCIL_BUFFER_BIT | GL_TRANSFORM_BIT | GL_CURRENT_BIT); - _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT); saved_active_texture = ctx->Texture.CurrentUnit; /* Disable existing GL state we don't want to apply to a clear. */ @@ -114,6 +157,7 @@ intel_clear_tris(GLcontext *ctx, GLbitfield mask) _mesa_Disable(GL_CLIP_PLANE3); _mesa_Disable(GL_CLIP_PLANE4); _mesa_Disable(GL_CLIP_PLANE5); + _mesa_PolygonMode(GL_FRONT_AND_BACK, GL_FILL); if (ctx->Extensions.ARB_fragment_program && ctx->FragmentProgram.Enabled) { saved_fp_enable = GL_TRUE; _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB); @@ -146,13 +190,14 @@ intel_clear_tris(GLcontext *ctx, GLbitfield mask) } } + /* save current array object, bind our private one */ + _mesa_reference_array_object(ctx, &arraySave, ctx->Array.ArrayObj); + _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, intel->clear.arrayObj); + intel_meta_set_passthrough_transform(intel); for (i = 0; i < 4; i++) { - color[i][0] = ctx->Color.ClearColor[0]; - color[i][1] = ctx->Color.ClearColor[1]; - color[i][2] = ctx->Color.ClearColor[2]; - color[i][3] = ctx->Color.ClearColor[3]; + COPY_4FV(intel->clear.color[i], ctx->Color.ClearColor); } /* convert clear Z from [0,1] to NDC coord in [-1,1] */ @@ -161,23 +206,18 @@ intel_clear_tris(GLcontext *ctx, GLbitfield mask) /* Prepare the vertices, which are the same regardless of which buffer we're * drawing to. */ - vertices[0][0] = fb->_Xmin; - vertices[0][1] = fb->_Ymin; - vertices[0][2] = dst_z; - vertices[1][0] = fb->_Xmax; - vertices[1][1] = fb->_Ymin; - vertices[1][2] = dst_z; - vertices[2][0] = fb->_Xmax; - vertices[2][1] = fb->_Ymax; - vertices[2][2] = dst_z; - vertices[3][0] = fb->_Xmin; - vertices[3][1] = fb->_Ymax; - vertices[3][2] = dst_z; - - _mesa_ColorPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &color); - _mesa_VertexPointer(3, GL_FLOAT, 3 * sizeof(GLfloat), &vertices); - _mesa_Enable(GL_COLOR_ARRAY); - _mesa_Enable(GL_VERTEX_ARRAY); + intel->clear.vertices[0][0] = fb->_Xmin; + intel->clear.vertices[0][1] = fb->_Ymin; + intel->clear.vertices[0][2] = dst_z; + intel->clear.vertices[1][0] = fb->_Xmax; + intel->clear.vertices[1][1] = fb->_Ymin; + intel->clear.vertices[1][2] = dst_z; + intel->clear.vertices[2][0] = fb->_Xmax; + intel->clear.vertices[2][1] = fb->_Ymax; + intel->clear.vertices[2][2] = dst_z; + intel->clear.vertices[3][0] = fb->_Xmin; + intel->clear.vertices[3][1] = fb->_Ymax; + intel->clear.vertices[3][2] = dst_z; while (mask != 0) { GLuint this_mask = 0; @@ -215,14 +255,16 @@ intel_clear_tris(GLcontext *ctx, GLbitfield mask) /* Control writing of the stencil clear value to stencil. */ if (this_mask & BUFFER_BIT_STENCIL) { _mesa_Enable(GL_STENCIL_TEST); - _mesa_StencilOp(GL_REPLACE, GL_REPLACE, GL_REPLACE); - _mesa_StencilFuncSeparate(GL_FRONT, GL_ALWAYS, ctx->Stencil.Clear, + _mesa_StencilOpSeparate(GL_FRONT_AND_BACK, + GL_REPLACE, GL_REPLACE, GL_REPLACE); + _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS, + ctx->Stencil.Clear, ctx->Stencil.WriteMask[0]); } else { _mesa_Disable(GL_STENCIL_TEST); } - CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4)); + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); mask &= ~this_mask; } @@ -238,8 +280,11 @@ intel_clear_tris(GLcontext *ctx, GLbitfield mask) if (saved_shader_program) _mesa_UseProgramObjectARB(saved_shader_program); - _mesa_PopClientAttrib(); _mesa_PopAttrib(); + + /* restore current array object */ + _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, arraySave); + _mesa_reference_array_object(ctx, &arraySave, NULL); } static const char *buffer_names[] = { @@ -247,13 +292,10 @@ static const char *buffer_names[] = { [BUFFER_BACK_LEFT] = "back", [BUFFER_FRONT_RIGHT] = "front right", [BUFFER_BACK_RIGHT] = "back right", - [BUFFER_AUX0] = "aux0", - [BUFFER_AUX1] = "aux1", - [BUFFER_AUX2] = "aux2", - [BUFFER_AUX3] = "aux3", [BUFFER_DEPTH] = "depth", [BUFFER_STENCIL] = "stencil", [BUFFER_ACCUM] = "accum", + [BUFFER_AUX0] = "aux0", [BUFFER_COLOR0] = "color0", [BUFFER_COLOR1] = "color1", [BUFFER_COLOR2] = "color2", diff --git a/shared/intel_context.c b/shared/intel_context.c index 2e76e93..cfd983d 100644 --- a/shared/intel_context.c +++ b/shared/intel_context.c @@ -28,8 +28,7 @@ #include "main/glheader.h" #include "main/context.h" -#include "main/matrix.h" -#include "main/simple_list.h" +#include "main/arrayobj.h" #include "main/extensions.h" #include "main/framebuffer.h" #include "main/imports.h" @@ -38,68 +37,42 @@ #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" #include "tnl/tnl.h" - -#include "tnl/t_pipeline.h" -#include "tnl/t_vertex.h" - #include "drivers/common/driverfuncs.h" -#include "intel_screen.h" - #include "i830_dri.h" #include "intel_chipset.h" #include "intel_buffers.h" #include "intel_tex.h" #include "intel_batchbuffer.h" -#include "intel_blit.h" #include "intel_clear.h" +#include "intel_extensions.h" #include "intel_pixel.h" #include "intel_regions.h" #include "intel_buffer_objects.h" #include "intel_fbo.h" #include "intel_decode.h" #include "intel_bufmgr.h" +#include "intel_screen.h" #include "intel_swapbuffers.h" #include "drirenderbuffer.h" #include "vblank.h" #include "utils.h" #include "xmlpool.h" /* for symbolic values of enum-type options */ + + #ifndef INTEL_DEBUG int INTEL_DEBUG = (0); #endif -#define need_GL_ARB_multisample -#define need_GL_ARB_occlusion_query -#define need_GL_ARB_point_parameters -#define need_GL_ARB_shader_objects -#define need_GL_ARB_texture_compression -#define need_GL_ARB_vertex_buffer_object -#define need_GL_ARB_vertex_program -#define need_GL_ARB_vertex_shader -#define need_GL_ARB_window_pos -#define need_GL_EXT_blend_color -#define need_GL_EXT_blend_equation_separate -#define need_GL_EXT_blend_func_separate -#define need_GL_EXT_blend_minmax -#define need_GL_EXT_cull_vertex -#define need_GL_EXT_fog_coord -#define need_GL_EXT_framebuffer_object -#define need_GL_EXT_multi_draw_arrays -#define need_GL_EXT_point_parameters -#define need_GL_EXT_secondary_color -#define need_GL_ATI_separate_stencil -#define need_GL_NV_point_sprite -#define need_GL_NV_vertex_program -#define need_GL_VERSION_2_0 -#define need_GL_VERSION_2_1 - -#include "extension_helper.h" - -#define DRIVER_DATE "20090326 2009Q1 RC2" + +#define DRIVER_DATE "20090712 2009Q2 RC3" #define DRIVER_DATE_GEM "GEM " DRIVER_DATE + +static void intel_flush(GLcontext *ctx, GLboolean needs_mi_flush); + static const GLubyte * intelGetString(GLcontext * ctx, GLenum name) { @@ -203,6 +176,24 @@ intelGetString(GLcontext * ctx, GLenum name) } } +static unsigned +intel_bits_per_pixel(const struct intel_renderbuffer *rb) +{ + switch (rb->Base._ActualFormat) { + case GL_RGB5: + case GL_DEPTH_COMPONENT16: + return 16; + case GL_RGB8: + case GL_RGBA8: + case GL_DEPTH_COMPONENT24: + case GL_DEPTH24_STENCIL8_EXT: + case GL_STENCIL_INDEX8_EXT: + return 32; + default: + return 0; + } +} + void intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) { @@ -210,7 +201,7 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) struct intel_renderbuffer *rb; struct intel_region *region, *depth_region; struct intel_context *intel = context->driverPrivate; - __DRIbuffer *buffers; + __DRIbuffer *buffers = NULL; __DRIscreen *screen; int i, count; unsigned int attachments[10]; @@ -222,22 +213,63 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) screen = intel->intelScreen->driScrnPriv; - i = 0; - if (intel_fb->color_rb[0]) - attachments[i++] = __DRI_BUFFER_FRONT_LEFT; - if (intel_fb->color_rb[1]) - attachments[i++] = __DRI_BUFFER_BACK_LEFT; - if (intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH)) - attachments[i++] = __DRI_BUFFER_DEPTH; - if (intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL)) - attachments[i++] = __DRI_BUFFER_STENCIL; - - buffers = (*screen->dri2.loader->getBuffers)(drawable, - &drawable->w, - &drawable->h, - attachments, i, - &count, - drawable->loaderPrivate); + if (screen->dri2.loader + && (screen->dri2.loader->base.version > 2) + && (screen->dri2.loader->getBuffersWithFormat != NULL)) { + struct intel_renderbuffer *depth_rb; + struct intel_renderbuffer *stencil_rb; + + i = 0; + if ((intel->is_front_buffer_rendering || !intel_fb->color_rb[1]) + && intel_fb->color_rb[0]) { + attachments[i++] = __DRI_BUFFER_FRONT_LEFT; + attachments[i++] = intel_bits_per_pixel(intel_fb->color_rb[0]); + } + + if (intel_fb->color_rb[1]) { + attachments[i++] = __DRI_BUFFER_BACK_LEFT; + attachments[i++] = intel_bits_per_pixel(intel_fb->color_rb[1]); + } + + depth_rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); + stencil_rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL); + + if ((depth_rb != NULL) && (stencil_rb != NULL)) { + attachments[i++] = __DRI_BUFFER_DEPTH_STENCIL; + attachments[i++] = intel_bits_per_pixel(depth_rb); + } else if (depth_rb != NULL) { + attachments[i++] = __DRI_BUFFER_DEPTH; + attachments[i++] = intel_bits_per_pixel(depth_rb); + } else if (stencil_rb != NULL) { + attachments[i++] = __DRI_BUFFER_STENCIL; + attachments[i++] = intel_bits_per_pixel(stencil_rb); + } + + buffers = + (*screen->dri2.loader->getBuffersWithFormat)(drawable, + &drawable->w, + &drawable->h, + attachments, i / 2, + &count, + drawable->loaderPrivate); + } else if (screen->dri2.loader) { + i = 0; + if (intel_fb->color_rb[0]) + attachments[i++] = __DRI_BUFFER_FRONT_LEFT; + if (intel_fb->color_rb[1]) + attachments[i++] = __DRI_BUFFER_BACK_LEFT; + if (intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH)) + attachments[i++] = __DRI_BUFFER_DEPTH; + if (intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL)) + attachments[i++] = __DRI_BUFFER_STENCIL; + + buffers = (*screen->dri2.loader->getBuffers)(drawable, + &drawable->w, + &drawable->h, + attachments, i, + &count, + drawable->loaderPrivate); + } if (buffers == NULL) return; @@ -265,6 +297,11 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) region_name = "dri2 front buffer"; break; + case __DRI_BUFFER_FAKE_FRONT_LEFT: + rb = intel_fb->color_rb[0]; + region_name = "dri2 fake front buffer"; + break; + case __DRI_BUFFER_BACK_LEFT: rb = intel_fb->color_rb[1]; region_name = "dri2 back buffer"; @@ -275,6 +312,11 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) region_name = "dri2 depth buffer"; break; + case __DRI_BUFFER_DEPTH_STENCIL: + rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); + region_name = "dri2 depth / stencil buffer"; + break; + case __DRI_BUFFER_STENCIL: rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL); region_name = "dri2 stencil buffer"; @@ -321,6 +363,23 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) intel_renderbuffer_set_region(rb, region); intel_region_release(®ion); + + if (buffers[i].attachment == __DRI_BUFFER_DEPTH_STENCIL) { + rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL); + if (rb != NULL) { + struct intel_region *stencil_region = NULL; + + if (rb->region) { + dri_bo_flink(rb->region->buffer, &name); + if (name == buffers[i].name) + continue; + } + + intel_region_reference(&stencil_region, region); + intel_renderbuffer_set_region(rb, stencil_region); + intel_region_release(&stencil_region); + } + } } driUpdateFramebufferSize(&intel->ctx, drawable); @@ -337,9 +396,20 @@ intel_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h) if (!driContext->driScreenPriv->dri2.enabled) return; - intel_update_renderbuffers(driContext, driContext->driDrawablePriv); - if (driContext->driDrawablePriv != driContext->driReadablePriv) - intel_update_renderbuffers(driContext, driContext->driReadablePriv); + if (!intel->internal_viewport_call && ctx->DrawBuffer->Name == 0) { + /* If we're rendering to the fake front buffer, make sure all the pending + * drawing has landed on the real front buffer. Otherwise when we + * eventually get to DRI2GetBuffersWithFormat the stale real front + * buffer contents will get copied to the new fake front buffer. + */ + if (intel->is_front_buffer_rendering) { + intel_flush(ctx, GL_FALSE); + } + + intel_update_renderbuffers(driContext, driContext->driDrawablePriv); + if (driContext->driDrawablePriv != driContext->driReadablePriv) + intel_update_renderbuffers(driContext, driContext->driReadablePriv); + } old_viewport = ctx->Driver.Viewport; ctx->Driver.Viewport = NULL; @@ -349,112 +419,6 @@ intel_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h) ctx->Driver.Viewport = old_viewport; } -/** - * Extension strings exported by the intel driver. - * - * Extensions supported by all chips supported by i830_dri, i915_dri, or - * i965_dri. - */ -static const struct dri_extension card_extensions[] = { - { "GL_ARB_multisample", GL_ARB_multisample_functions }, - { "GL_ARB_multitexture", NULL }, - { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, - { "GL_ARB_texture_border_clamp", NULL }, - { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, - { "GL_ARB_texture_cube_map", NULL }, - { "GL_ARB_texture_env_add", NULL }, - { "GL_ARB_texture_env_combine", NULL }, - { "GL_ARB_texture_env_crossbar", NULL }, - { "GL_ARB_texture_env_dot3", NULL }, - { "GL_ARB_texture_mirrored_repeat", NULL }, - { "GL_ARB_texture_rectangle", NULL }, - { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, - { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }, - { "GL_ARB_window_pos", GL_ARB_window_pos_functions }, - { "GL_EXT_blend_color", GL_EXT_blend_color_functions }, - { "GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions }, - { "GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions }, - { "GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions }, - { "GL_EXT_blend_logic_op", NULL }, - { "GL_EXT_blend_subtract", NULL }, - { "GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions }, - { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, - { "GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions }, - { "GL_EXT_packed_depth_stencil", NULL }, - { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, - { "GL_EXT_stencil_wrap", NULL }, - { "GL_EXT_texture_edge_clamp", NULL }, - { "GL_EXT_texture_env_combine", NULL }, - { "GL_EXT_texture_env_dot3", NULL }, - { "GL_EXT_texture_filter_anisotropic", NULL }, - { "GL_EXT_texture_lod_bias", NULL }, - { "GL_3DFX_texture_compression_FXT1", NULL }, - { "GL_APPLE_client_storage", NULL }, - { "GL_MESA_pack_invert", NULL }, - { "GL_MESA_ycbcr_texture", NULL }, - { "GL_NV_blend_square", NULL }, - { "GL_NV_point_sprite", GL_NV_point_sprite_functions }, - { "GL_NV_vertex_program", GL_NV_vertex_program_functions }, - { "GL_NV_vertex_program1_1", NULL }, - { "GL_SGIS_generate_mipmap", NULL }, - { NULL, NULL } -}; - -static const struct dri_extension brw_extensions[] = { - { "GL_ARB_depth_texture", NULL }, - { "GL_ARB_draw_buffers", NULL }, - { "GL_ARB_fragment_program", NULL }, - { "GL_ARB_fragment_program_shadow", NULL }, - { "GL_ARB_fragment_shader", NULL }, - { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions }, - { "GL_ARB_point_sprite", NULL }, - { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, - { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, -#if 0 - /* Support for GLSL 1.20 is currently broken in core Mesa. - */ - { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions }, -#endif - { "GL_ARB_shadow", NULL }, - { "GL_ARB_texture_non_power_of_two", NULL }, - { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions }, - { "GL_EXT_shadow_funcs", NULL }, - { "GL_EXT_texture_sRGB", NULL }, - { "GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions }, - { "GL_ATI_texture_env_combine3", NULL }, - { NULL, NULL } -}; - -static const struct dri_extension arb_oq_extensions[] = { - { NULL, NULL } -}; - -static const struct dri_extension ttm_extensions[] = { - { "GL_ARB_pixel_buffer_object", NULL }, - { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, - { NULL, NULL } -}; - -/** - * Initializes potential list of extensions if ctx == NULL, or actually enables - * extensions for a context. - */ -void intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging) -{ - struct intel_context *intel = ctx?intel_context(ctx):NULL; - - /* Disable imaging extension until convolution is working in teximage paths. - */ - enable_imaging = GL_FALSE; - - driInitExtensions(ctx, card_extensions, enable_imaging); - - if (intel == NULL || intel->ttm) - driInitExtensions(ctx, ttm_extensions, GL_FALSE); - - if (intel == NULL || IS_965(intel->intelScreen->deviceID)) - driInitExtensions(ctx, brw_extensions, GL_FALSE); -} static const struct dri_debug_control debug_control[] = { { "tex", DEBUG_TEXTURE}, @@ -525,6 +489,27 @@ intel_flush(GLcontext *ctx, GLboolean needs_mi_flush) if (intel->batch->map != intel->batch->ptr) intel_batchbuffer_flush(intel->batch); + + if ((ctx->DrawBuffer->Name == 0) && intel->front_buffer_dirty) { + __DRIscreen *const screen = intel->intelScreen->driScrnPriv; + + if (screen->dri2.loader && + (screen->dri2.loader->base.version >= 2) + && (screen->dri2.loader->flushFrontBuffer != NULL)) { + (*screen->dri2.loader->flushFrontBuffer)(intel->driDrawable, + intel->driDrawable->loaderPrivate); + + /* Only clear the dirty bit if front-buffer rendering is no longer + * enabled. This is done so that the dirty bit can only be set in + * glDrawBuffer. Otherwise the dirty bit would have to be set at + * each of N places that do rendering. This has worse performances, + * but it is much easier to get correct. + */ + if (intel->is_front_buffer_rendering) { + intel->front_buffer_dirty = GL_FALSE; + } + } + } } void @@ -533,7 +518,7 @@ intelFlush(GLcontext * ctx) intel_flush(ctx, GL_FALSE); } -void +static void intel_glFlush(GLcontext *ctx) { intel_flush(ctx, GL_TRUE); @@ -552,7 +537,7 @@ intelFinish(GLcontext * ctx) irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]); - if (irb->region) + if (irb && irb->region) dri_bo_wait_rendering(irb->region->buffer); } if (fb->_DepthBuffer) { @@ -636,8 +621,6 @@ intelInitContext(struct intel_context *intel, } } - ctx->Const.MaxTextureMaxAnisotropy = 2.0; - /* This doesn't yet catch all non-conformant rendering, but it's a * start. */ @@ -719,8 +702,6 @@ intelInitContext(struct intel_context *intel, intel->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); - _math_matrix_ctr(&intel->ViewportMatrix); - if (IS_965(intelScreen->deviceID) && !intel->intelScreen->irq_active) { _mesa_printf("IRQs not active. Exiting\n"); exit(1); @@ -756,6 +737,16 @@ intelInitContext(struct intel_context *intel, intel->no_rast = 1; } + if (driQueryOptionb(&intel->optionCache, "always_flush_batch")) { + fprintf(stderr, "flushing batchbuffer before/after each draw call\n"); + intel->always_flush_batch = 1; + } + + if (driQueryOptionb(&intel->optionCache, "always_flush_cache")) { + fprintf(stderr, "flushing GPU caches before/after each draw call\n"); + intel->always_flush_cache = 1; + } + /* Disable all hardware rendering (skip emitting batches and fences/waits * to the kernel) */ @@ -776,6 +767,9 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) INTEL_FIREVERTICES(intel); + if (intel->clear.arrayObj) + _mesa_delete_array_object(&intel->ctx, intel->clear.arrayObj); + intel->vtbl.destroy(intel); release_texture_heaps = (intel->ctx.Shared->RefCount == 1); diff --git a/shared/intel_context.h b/shared/intel_context.h index 8a8e59f..b5cf7e6 100644 --- a/shared/intel_context.h +++ b/shared/intel_context.h @@ -48,6 +48,8 @@ #define DV_PF_555 (1<<8) #define DV_PF_565 (2<<8) #define DV_PF_8888 (3<<8) +#define DV_PF_4444 (8<<8) +#define DV_PF_1555 (9<<8) struct intel_region; struct intel_context; @@ -100,7 +102,6 @@ struct intel_context GLuint num_regions); GLuint (*flush_cmd) (void); - void (*emit_flush) (struct intel_context *intel, GLuint unused); void (*reduced_primitive_state) (struct intel_context * intel, GLenum rprim); @@ -181,6 +182,7 @@ struct intel_context struct intel_region *front_region; struct intel_region *back_region; struct intel_region *depth_region; + GLboolean internal_viewport_call; /** * This value indicates that the kernel memory manager is being used @@ -213,6 +215,14 @@ struct intel_context GLuint ClearColor565; GLuint ClearColor8888; + /* info for intel_clear_tris() */ + struct + { + struct gl_array_object *arrayObj; + GLfloat vertices[4][3]; + GLfloat color[4][4]; + } clear; + /* Offsets of fields within the current vertex: */ GLuint coloroffset; @@ -229,6 +239,8 @@ struct intel_context GLboolean hw_stipple; GLboolean depth_buffer_is_float; GLboolean no_rast; + GLboolean always_flush_batch; + GLboolean always_flush_cache; /* 0 - nonconformant, best performance; * 1 - fallback to sw for known conformance bugs @@ -260,11 +272,29 @@ struct intel_context * flush time while the lock is held. */ GLboolean constant_cliprect; + /** * In !constant_cliprect mode, set to true if the front cliprects should be * used instead of back. */ GLboolean front_cliprects; + + /** + * Set if rendering has occured to the drawable's front buffer. + * + * This is used in the DRI2 case to detect that glFlush should also copy + * the contents of the fake front buffer to the real front buffer. + */ + GLboolean front_buffer_dirty; + + /** + * Track whether front-buffer rendering is currently enabled + * + * A separate flag is used to track this in order to support MRT more + * easily. + */ + GLboolean is_front_buffer_rendering; + drm_clip_rect_t fboRect; /**< cliprect for FBO rendering */ int perf_boxes; @@ -317,6 +347,7 @@ extern char *__progname; #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) +#define IS_POWER_OF_TWO(val) (((val) & (val - 1)) == 0) #define INTEL_FIREVERTICES(intel) \ do { \ @@ -440,10 +471,8 @@ extern void intelGetLock(struct intel_context *intel, GLuint flags); extern void intelFinish(GLcontext * ctx); extern void intelFlush(GLcontext * ctx); -extern void intel_glFlush(GLcontext *ctx); extern void intelInitDriverFunctions(struct dd_function_table *functions); -extern void intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging); /* ================================================================ @@ -530,4 +559,10 @@ intel_context(GLcontext * ctx) return (struct intel_context *) ctx; } +static INLINE GLboolean +is_power_of_two(uint32_t value) +{ + return (value & (value - 1)) == 0; +} + #endif diff --git a/shared/intel_decode.c b/shared/intel_decode.c index 136221c..a9dfe28 100644 --- a/shared/intel_decode.c +++ b/shared/intel_decode.c @@ -800,6 +800,7 @@ static int decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i830) { unsigned int len, i, c, opcode, word, map, sampler, instr; + char *format; struct { uint32_t opcode; @@ -1001,6 +1002,35 @@ decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i (*failures)++; } return len; + case 0x85: + len = (data[0] & 0x0000000f) + 2; + + if (len != 2) + fprintf(out, "Bad count in 3DSTATE_DEST_BUFFER_VARIABLES\n"); + if (count < 2) + BUFFER_FAIL(count, len, "3DSTATE_DEST_BUFFER_VARIABLES"); + + instr_out(data, hw_offset, 0, + "3DSTATE_DEST_BUFFER_VARIABLES\n"); + + switch ((data[1] >> 8) & 0xf) { + case 0x0: format = "g8"; break; + case 0x1: format = "x1r5g5b5"; break; + case 0x2: format = "r5g6b5"; break; + case 0x3: format = "a8r8g8b8"; break; + case 0x4: format = "ycrcb_swapy"; break; + case 0x5: format = "ycrcb_normal"; break; + case 0x6: format = "ycrcb_swapuv"; break; + case 0x7: format = "ycrcb_swapuvy"; break; + case 0x8: format = "a4r4g4b4"; break; + case 0x9: format = "a1r5g5b5"; break; + case 0xa: format = "a2r10g10b10"; break; + default: format = "BAD"; break; + } + instr_out(data, hw_offset, 1, "%s format, early Z %sabled\n", + format, + (data[1] & (1 << 31)) ? "en" : "dis"); + return len; } for (opcode = 0; opcode < sizeof(opcodes_3d_1d) / sizeof(opcodes_3d_1d[0]); @@ -1513,7 +1543,7 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures) for (i = 1; i < len;) { instr_out(data, hw_offset, i, "buffer %d: %svalid, type 0x%04x, " - "src offset 0x%04xd bytes\n", + "src offset 0x%04x bytes\n", data[i] >> 27, data[i] & (1 << 26) ? "" : "in", (data[i] >> 16) & 0x1ff, @@ -1595,7 +1625,7 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures) "3DPRIMITIVE: %s %s\n", get_965_prim_type(data[0]), (data[0] & (1 << 15)) ? "random" : "sequential"); - instr_out(data, hw_offset, 1, "primitive count\n"); + instr_out(data, hw_offset, 1, "vertex count\n"); instr_out(data, hw_offset, 2, "start vertex\n"); instr_out(data, hw_offset, 3, "instance count\n"); instr_out(data, hw_offset, 4, "start instance\n"); diff --git a/shared/intel_depthstencil.c b/shared/intel_depthstencil.c deleted file mode 100644 index 354b3bf..0000000 --- a/shared/intel_depthstencil.c +++ /dev/null @@ -1,261 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "main/glheader.h" -#include "main/imports.h" -#include "main/context.h" -#include "main/depthstencil.h" -#include "main/fbobject.h" -#include "main/framebuffer.h" -#include "main/hash.h" -#include "main/mtypes.h" -#include "main/renderbuffer.h" - -#include "intel_context.h" -#include "intel_fbo.h" -#include "intel_depthstencil.h" -#include "intel_regions.h" -#include "intel_span.h" - -/** - * The GL_EXT_framebuffer_object allows the user to create their own - * framebuffer objects consisting of color renderbuffers (0 or more), - * depth renderbuffers (0 or 1) and stencil renderbuffers (0 or 1). - * - * The spec considers depth and stencil renderbuffers to be totally independent - * buffers. In reality, most graphics hardware today uses a combined - * depth+stencil buffer (one 32-bit pixel = 24 bits of Z + 8 bits of stencil). - * - * This causes difficulty because the user may create some number of depth - * renderbuffers and some number of stencil renderbuffers and bind them - * together in framebuffers in any combination. - * - * This code manages all that. - * - * 1. Depth renderbuffers are always allocated in hardware as 32bpp - * GL_DEPTH24_STENCIL8 buffers. - * - * 2. Stencil renderbuffers are initially allocated in software as 8bpp - * GL_STENCIL_INDEX8 buffers. - * - * 3. Depth and Stencil renderbuffers use the PairedStencil and PairedDepth - * fields (respectively) to indicate if the buffer's currently paired - * with another stencil or depth buffer (respectively). - * - * 4. When a depth and stencil buffer are initially both attached to the - * current framebuffer, we merge the stencil buffer values into the - * depth buffer (really a depth+stencil buffer). The then hardware uses - * the combined buffer. - * - * 5. Whenever a depth or stencil buffer is reallocated (with - * glRenderbufferStorage) we undo the pairing and copy the stencil values - * from the combined depth/stencil buffer back to the stencil-only buffer. - * - * 6. We also undo the pairing when we find a change in buffer bindings. - * - * 7. If a framebuffer is only using a depth renderbuffer (no stencil), we - * just use the combined depth/stencil buffer and ignore the stencil values. - * - * 8. If a framebuffer is only using a stencil renderbuffer (no depth) we have - * to promote the 8bpp software stencil buffer to a 32bpp hardware - * depth+stencil buffer. - * - */ - -/** - * Undo the pairing/interleaving between depth and stencil buffers. - * irb should be a depth/stencil or stencil renderbuffer. - */ -void -intel_unpair_depth_stencil(GLcontext *ctx, struct intel_renderbuffer *irb) -{ - struct intel_context *intel = intel_context(ctx); - struct gl_renderbuffer *rb = &irb->Base; - - if (irb->PairedStencil) { - /* irb is a depth/stencil buffer */ - struct gl_renderbuffer *stencilRb; - struct intel_renderbuffer *stencilIrb; - - ASSERT(rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT); - - stencilRb = _mesa_lookup_renderbuffer(ctx, irb->PairedStencil); - stencilIrb = intel_renderbuffer(stencilRb); - if (stencilIrb) { - /* need to extract stencil values from the depth buffer */ - ASSERT(stencilIrb->PairedDepth == rb->Name); - intel_renderbuffer_map(intel, rb); - intel_renderbuffer_map(intel, stencilRb); -#if 0 - /* disable for now */ - _mesa_extract_stencil(ctx, rb, stencilRb); -#endif - intel_renderbuffer_unmap(intel, stencilRb); - intel_renderbuffer_unmap(intel, rb); - stencilIrb->PairedDepth = 0; - } - irb->PairedStencil = 0; - } - else if (irb->PairedDepth) { - /* irb is a stencil buffer */ - struct gl_renderbuffer *depthRb; - struct intel_renderbuffer *depthIrb; - - ASSERT(rb->_ActualFormat == GL_STENCIL_INDEX8_EXT || - rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT); - - depthRb = _mesa_lookup_renderbuffer(ctx, irb->PairedDepth); - depthIrb = intel_renderbuffer(depthRb); - if (depthIrb) { - /* need to extract stencil values from the depth buffer */ - ASSERT(depthIrb->PairedStencil == rb->Name); - intel_renderbuffer_map(intel, rb); - intel_renderbuffer_map(intel, depthRb); -#if 0 - /* disable for now */ - _mesa_extract_stencil(ctx, depthRb, rb); -#endif - intel_renderbuffer_unmap(intel, depthRb); - intel_renderbuffer_unmap(intel, rb); - depthIrb->PairedStencil = 0; - } - irb->PairedDepth = 0; - } - else { - _mesa_problem(ctx, "Problem in undo_depth_stencil_pairing"); - } - - ASSERT(irb->PairedStencil == 0); - ASSERT(irb->PairedDepth == 0); -} - - -/** - * Examine the depth and stencil renderbuffers which are attached to the - * framebuffer. If both depth and stencil are attached, make sure that the - * renderbuffers are 'paired' (combined). If only depth or only stencil is - * attached, undo any previous pairing. - * - * Must be called if NewState & _NEW_BUFFER (when renderbuffer attachments - * change, for example). - */ -void -intel_validate_paired_depth_stencil(GLcontext * ctx, - struct gl_framebuffer *fb) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_renderbuffer *depthRb, *stencilRb; - - depthRb = intel_get_renderbuffer(fb, BUFFER_DEPTH); - stencilRb = intel_get_renderbuffer(fb, BUFFER_STENCIL); - - if (depthRb && stencilRb) { - if (depthRb == stencilRb) { - /* Using a user-created combined depth/stencil buffer. - * Nothing to do. - */ - ASSERT(depthRb->Base._BaseFormat == GL_DEPTH_STENCIL_EXT); - ASSERT(depthRb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); - } - else { - /* Separate depth/stencil buffers, need to interleave now */ - ASSERT(depthRb->Base._BaseFormat == GL_DEPTH_COMPONENT || - depthRb->Base._BaseFormat == GL_DEPTH_STENCIL); - ASSERT(stencilRb->Base._BaseFormat == GL_STENCIL_INDEX || - stencilRb->Base._BaseFormat == GL_DEPTH_STENCIL); - - /* may need to interleave depth/stencil now */ - if (depthRb->PairedStencil == stencilRb->Base.Name) { - /* OK, the depth and stencil buffers are already interleaved */ - ASSERT(stencilRb->PairedDepth == depthRb->Base.Name); - } - else { - /* need to setup new pairing/interleaving */ - if (depthRb->PairedStencil) { - intel_unpair_depth_stencil(ctx, depthRb); - } - if (stencilRb->PairedDepth) { - intel_unpair_depth_stencil(ctx, stencilRb); - } - - ASSERT(depthRb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); - ASSERT(stencilRb->Base._ActualFormat == GL_STENCIL_INDEX8_EXT || - stencilRb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); - - /* establish new pairing: interleave stencil into depth buffer */ - intel_renderbuffer_map(intel, &depthRb->Base); - intel_renderbuffer_map(intel, &stencilRb->Base); - _mesa_insert_stencil(ctx, &depthRb->Base, &stencilRb->Base); - intel_renderbuffer_unmap(intel, &stencilRb->Base); - intel_renderbuffer_unmap(intel, &depthRb->Base); - depthRb->PairedStencil = stencilRb->Base.Name; - stencilRb->PairedDepth = depthRb->Base.Name; - } - - } - } - else if (depthRb) { - /* Depth buffer but no stencil buffer. - * We'll use a GL_DEPTH24_STENCIL8 buffer and ignore the stencil bits. - */ - /* can't assert this until storage is allocated: - ASSERT(depthRb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); - */ - /* intel_undo any previous pairing */ - if (depthRb->PairedStencil) { - intel_unpair_depth_stencil(ctx, depthRb); - } - } - else if (stencilRb) { - /* Stencil buffer but no depth buffer. - * Since h/w doesn't typically support just 8bpp stencil w/out Z, - * we'll use a GL_DEPTH24_STENCIL8 buffer and ignore the depth bits. - */ - /* undo any previous pairing */ - if (stencilRb->PairedDepth) { - intel_unpair_depth_stencil(ctx, stencilRb); - } - if (stencilRb->Base._ActualFormat == GL_STENCIL_INDEX8_EXT) { - /* promote buffer to GL_DEPTH24_STENCIL8 for hw rendering */ - _mesa_promote_stencil(ctx, &stencilRb->Base); - ASSERT(stencilRb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); - } - } - - /* Finally, update the fb->_DepthBuffer and fb->_StencilBuffer fields */ - _mesa_update_depth_buffer(ctx, fb, BUFFER_DEPTH); - if (depthRb && depthRb->PairedStencil) - _mesa_update_stencil_buffer(ctx, fb, BUFFER_DEPTH); - else - _mesa_update_stencil_buffer(ctx, fb, BUFFER_STENCIL); - - - /* The hardware should use fb->Attachment[BUFFER_DEPTH].Renderbuffer - * first, if present, then fb->Attachment[BUFFER_STENCIL].Renderbuffer - * if present. - */ -} diff --git a/shared/intel_depthstencil.h b/shared/intel_depthstencil.h deleted file mode 100644 index 740eb0d..0000000 --- a/shared/intel_depthstencil.h +++ /dev/null @@ -1,15 +0,0 @@ - -#ifndef INTEL_DEPTH_STENCIL_H -#define INTEL_DEPTH_STENCIL_H - -#include "intel_fbo.h" - -extern void -intel_unpair_depth_stencil(GLcontext * ctx, struct intel_renderbuffer *irb); - -extern void -intel_validate_paired_depth_stencil(GLcontext * ctx, - struct gl_framebuffer *fb); - - -#endif /* INTEL_DEPTH_STENCIL_H */ diff --git a/shared/intel_depthtmp.h b/shared/intel_depthtmp.h new file mode 100644 index 0000000..16d7708 --- /dev/null +++ b/shared/intel_depthtmp.h @@ -0,0 +1,54 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +/** + * Wrapper around the depthtmp.h macrofest to generate spans code for + * all the tiling styles. + */ + +#define VALUE_TYPE INTEL_VALUE_TYPE +#define WRITE_DEPTH(_x, _y, d) INTEL_WRITE_DEPTH(NO_TILE(_x, _y), d) +#define READ_DEPTH(d, _x, _y) d = INTEL_READ_DEPTH(NO_TILE(_x, _y)) +#define TAG(x) INTEL_TAG(intel##x) +#include "depthtmp.h" + +#define VALUE_TYPE INTEL_VALUE_TYPE +#define WRITE_DEPTH(_x, _y, d) INTEL_WRITE_DEPTH(X_TILE(_x, _y), d) +#define READ_DEPTH(d, _x, _y) d = INTEL_READ_DEPTH(X_TILE(_x, _y)) +#define TAG(x) INTEL_TAG(intel_XTile_##x) +#include "depthtmp.h" + +#define VALUE_TYPE INTEL_VALUE_TYPE +#define WRITE_DEPTH(_x, _y, d) INTEL_WRITE_DEPTH(Y_TILE(_x, _y), d) +#define READ_DEPTH(d, _x, _y) d = INTEL_READ_DEPTH(Y_TILE(_x, _y)) +#define TAG(x) INTEL_TAG(intel_YTile_##x) +#include "depthtmp.h" + +#undef INTEL_VALUE_TYPE +#undef INTEL_WRITE_DEPTH +#undef INTEL_READ_DEPTH +#undef INTEL_TAG diff --git a/shared/intel_extensions.c b/shared/intel_extensions.c new file mode 100644 index 0000000..9ec1b4e --- /dev/null +++ b/shared/intel_extensions.c @@ -0,0 +1,188 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "intel_chipset.h" +#include "intel_context.h" +#include "intel_extensions.h" + + +#define need_GL_ARB_framebuffer_object +#define need_GL_ARB_occlusion_query +#define need_GL_ARB_point_parameters +#define need_GL_ARB_shader_objects +#define need_GL_ARB_vertex_program +#define need_GL_ARB_vertex_shader +#define need_GL_ARB_window_pos +#define need_GL_EXT_blend_color +#define need_GL_EXT_blend_equation_separate +#define need_GL_EXT_blend_func_separate +#define need_GL_EXT_blend_minmax +#define need_GL_EXT_cull_vertex +#define need_GL_EXT_fog_coord +#define need_GL_EXT_framebuffer_object +#define need_GL_EXT_framebuffer_blit +#define need_GL_EXT_point_parameters +#define need_GL_EXT_secondary_color +#define need_GL_EXT_stencil_two_side +#define need_GL_ATI_separate_stencil +#define need_GL_ATI_envmap_bumpmap +#define need_GL_NV_point_sprite +#define need_GL_NV_vertex_program +#define need_GL_VERSION_2_0 +#define need_GL_VERSION_2_1 + +#include "extension_helper.h" + + +/** + * Extension strings exported by the intel driver. + * + * Extensions supported by all chips supported by i830_dri, i915_dri, or + * i965_dri. + */ +static const struct dri_extension card_extensions[] = { + { "GL_ARB_multitexture", NULL }, + { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, + { "GL_ARB_texture_border_clamp", NULL }, + { "GL_ARB_texture_cube_map", NULL }, + { "GL_ARB_texture_env_add", NULL }, + { "GL_ARB_texture_env_combine", NULL }, + { "GL_ARB_texture_env_crossbar", NULL }, + { "GL_ARB_texture_env_dot3", NULL }, + { "GL_ARB_texture_mirrored_repeat", NULL }, + { "GL_ARB_texture_rectangle", NULL }, + { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }, + { "GL_ARB_window_pos", GL_ARB_window_pos_functions }, + { "GL_EXT_blend_color", GL_EXT_blend_color_functions }, + { "GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions }, + { "GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions }, + { "GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions }, + { "GL_EXT_blend_logic_op", NULL }, + { "GL_EXT_blend_subtract", NULL }, + { "GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions }, + { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, + { "GL_EXT_packed_depth_stencil", NULL }, + { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, + { "GL_EXT_stencil_wrap", NULL }, + { "GL_EXT_texture_edge_clamp", NULL }, + { "GL_EXT_texture_env_combine", NULL }, + { "GL_EXT_texture_env_dot3", NULL }, + { "GL_EXT_texture_filter_anisotropic", NULL }, + { "GL_EXT_texture_lod_bias", NULL }, + { "GL_3DFX_texture_compression_FXT1", NULL }, + { "GL_APPLE_client_storage", NULL }, + { "GL_MESA_pack_invert", NULL }, + { "GL_MESA_ycbcr_texture", NULL }, + { "GL_NV_blend_square", NULL }, + { "GL_NV_point_sprite", GL_NV_point_sprite_functions }, + { "GL_NV_vertex_program", GL_NV_vertex_program_functions }, + { "GL_NV_vertex_program1_1", NULL }, + { "GL_SGIS_generate_mipmap", NULL }, + { NULL, NULL } +}; + + +/** i915 / i945-only extensions */ +static const struct dri_extension i915_extensions[] = { + { "GL_ARB_depth_texture", NULL }, + { "GL_ARB_fragment_program", NULL }, + { "GL_ARB_shadow", NULL }, + { "GL_ARB_texture_non_power_of_two", NULL }, + { "GL_ATI_texture_env_combine3", NULL }, + { "GL_EXT_shadow_funcs", NULL }, + { "GL_NV_texture_env_combine4", NULL }, + { NULL, NULL } +}; + + +/** i965-only extensions */ +static const struct dri_extension brw_extensions[] = { + { "GL_ARB_depth_texture", NULL }, + { "GL_ARB_fragment_program", NULL }, + { "GL_ARB_fragment_program_shadow", NULL }, + { "GL_ARB_fragment_shader", NULL }, + { "GL_ARB_framebuffer_object", GL_ARB_framebuffer_object_functions}, + { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions }, + { "GL_ARB_point_sprite", NULL }, + { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, + { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, + { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions }, + { "GL_ARB_shadow", NULL }, + { "GL_MESA_texture_signed_rgba", NULL }, + { "GL_ARB_texture_non_power_of_two", NULL }, + { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions }, + { "GL_EXT_shadow_funcs", NULL }, + { "GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions }, + { "GL_EXT_texture_sRGB", NULL }, + { "GL_EXT_texture_swizzle", NULL }, + { "GL_EXT_vertex_array_bgra", NULL }, + { "GL_ATI_envmap_bumpmap", GL_ATI_envmap_bumpmap_functions }, + { "GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions }, + { "GL_ATI_texture_env_combine3", NULL }, + { "GL_NV_texture_env_combine4", NULL }, + { NULL, NULL } +}; + + +static const struct dri_extension arb_oq_extensions[] = { + { NULL, NULL } +}; + + +static const struct dri_extension ttm_extensions[] = { + { "GL_ARB_pixel_buffer_object", NULL }, + { "GL_EXT_framebuffer_blit", GL_EXT_framebuffer_blit_functions }, + { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, + { NULL, NULL } +}; + + +/** + * Initializes potential list of extensions if ctx == NULL, or actually enables + * extensions for a context. + */ +void +intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging) +{ + struct intel_context *intel = ctx?intel_context(ctx):NULL; + + /* Disable imaging extension until convolution is working in teximage paths. + */ + enable_imaging = GL_FALSE; + + driInitExtensions(ctx, card_extensions, enable_imaging); + + if (intel == NULL || intel->ttm) + driInitExtensions(ctx, ttm_extensions, GL_FALSE); + + if (intel == NULL || IS_965(intel->intelScreen->deviceID)) + driInitExtensions(ctx, brw_extensions, GL_FALSE); + + if (intel == NULL || IS_915(intel->intelScreen->deviceID) + || IS_945(intel->intelScreen->deviceID)) + driInitExtensions(ctx, i915_extensions, GL_FALSE); +} diff --git a/i915/i915_tex.c b/shared/intel_extensions.h index e38d8fe..97147ec 100644 --- a/i915/i915_tex.c +++ b/shared/intel_extensions.h @@ -25,54 +25,12 @@ * **************************************************************************/ -#include "main/glheader.h" -#include "main/mtypes.h" -#include "main/imports.h" -#include "main/simple_list.h" -#include "main/enums.h" -#include "main/image.h" -#include "main/mm.h" -#include "main/texstore.h" -#include "main/texformat.h" -#include "swrast/swrast.h" +#ifndef INTEL_EXTENSIONS_H +#define INTEL_EXTENSIONS_H -#include "texmem.h" -#include "i915_context.h" -#include "i915_reg.h" +extern void +intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging); - -static void -i915TexEnv(GLcontext * ctx, GLenum target, - GLenum pname, const GLfloat * param) -{ - struct i915_context *i915 = I915_CONTEXT(ctx); - - switch (pname) { - case GL_TEXTURE_LOD_BIAS:{ - GLuint unit = ctx->Texture.CurrentUnit; - GLint b = (int) ((*param) * 16.0); - if (b > 255) - b = 255; - if (b < -256) - b = -256; - I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit)); - i915->lodbias_ss2[unit] = - ((b << SS2_LOD_BIAS_SHIFT) & SS2_LOD_BIAS_MASK); - break; - } - - default: - break; - } -} - - -void -i915InitTextureFuncs(struct dd_function_table *functions) -{ -/* - functions->TexEnv = i915TexEnv; -*/ -} +#endif diff --git a/shared/intel_fbo.c b/shared/intel_fbo.c index 54f6038..30f58b1 100644 --- a/shared/intel_fbo.c +++ b/shared/intel_fbo.c @@ -27,6 +27,7 @@ #include "main/imports.h" +#include "main/macros.h" #include "main/mtypes.h" #include "main/fbobject.h" #include "main/framebuffer.h" @@ -37,58 +38,13 @@ #include "intel_context.h" #include "intel_buffers.h" -#include "intel_depthstencil.h" #include "intel_fbo.h" #include "intel_mipmap_tree.h" #include "intel_regions.h" -#include "intel_span.h" #define FILE_DEBUG_FLAG DEBUG_FBO -#define INTEL_RB_CLASS 0x12345678 - - -/* XXX FBO: move this to intel_context.h (inlined) */ -/** - * Return a gl_renderbuffer ptr casted to intel_renderbuffer. - * NULL will be returned if the rb isn't really an intel_renderbuffer. - * This is determiend by checking the ClassID. - */ -struct intel_renderbuffer * -intel_renderbuffer(struct gl_renderbuffer *rb) -{ - struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb; - if (irb && irb->Base.ClassID == INTEL_RB_CLASS) { - /*_mesa_warning(NULL, "Returning non-intel Rb\n");*/ - return irb; - } - else - return NULL; -} - - -struct intel_renderbuffer * -intel_get_renderbuffer(struct gl_framebuffer *fb, int attIndex) -{ - if (attIndex >= 0) - return intel_renderbuffer(fb->Attachment[attIndex].Renderbuffer); - else - return NULL; -} - -struct intel_region * -intel_get_rb_region(struct gl_framebuffer *fb, GLuint attIndex) -{ - struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, attIndex); - - if (irb) - return irb->region; - else - return NULL; -} - - /** * Create a new framebuffer object. @@ -103,6 +59,7 @@ intel_new_framebuffer(GLcontext * ctx, GLuint name) } +/** Called by gl_renderbuffer::Delete() */ static void intel_delete_renderbuffer(struct gl_renderbuffer *rb) { @@ -112,10 +69,6 @@ intel_delete_renderbuffer(struct gl_renderbuffer *rb) ASSERT(irb); - if (irb->PairedStencil || irb->PairedDepth) { - intel_unpair_depth_stencil(ctx, irb); - } - if (irb->span_cache != NULL) _mesa_free(irb->span_cache); @@ -127,7 +80,6 @@ intel_delete_renderbuffer(struct gl_renderbuffer *rb) } - /** * Return a pointer to a specific pixel in a renderbuffer. */ @@ -142,7 +94,6 @@ intel_get_pointer(GLcontext * ctx, struct gl_renderbuffer *rb, } - /** * Called via glRenderbufferStorageEXT() to set the format and allocate * storage for a user-created renderbuffer. @@ -168,6 +119,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->RedBits = 5; rb->GreenBits = 6; rb->BlueBits = 5; + irb->texformat = &_mesa_texformat_rgb565; cpp = 2; break; case GL_RGB: @@ -181,6 +133,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->GreenBits = 8; rb->BlueBits = 8; rb->AlphaBits = 0; + irb->texformat = &_mesa_texformat_argb8888; /* XXX: Need xrgb8888 */ cpp = 4; break; case GL_RGBA: @@ -197,6 +150,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->GreenBits = 8; rb->BlueBits = 8; rb->AlphaBits = 8; + irb->texformat = &_mesa_texformat_argb8888; cpp = 4; break; case GL_STENCIL_INDEX: @@ -209,20 +163,15 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->DataType = GL_UNSIGNED_INT_24_8_EXT; rb->StencilBits = 8; cpp = 4; + irb->texformat = &_mesa_texformat_s8_z24; break; case GL_DEPTH_COMPONENT16: -#if 0 rb->_ActualFormat = GL_DEPTH_COMPONENT16; rb->DataType = GL_UNSIGNED_SHORT; rb->DepthBits = 16; cpp = 2; + irb->texformat = &_mesa_texformat_z16; break; -#else - /* fall-through. - * 16bpp depth renderbuffer can't be paired with a stencil buffer so - * always used combined depth/stencil format. - */ -#endif case GL_DEPTH_COMPONENT: case GL_DEPTH_COMPONENT24: case GL_DEPTH_COMPONENT32: @@ -230,6 +179,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->DataType = GL_UNSIGNED_INT_24_8_EXT; rb->DepthBits = 24; cpp = 4; + irb->texformat = &_mesa_texformat_s8_z24; break; case GL_DEPTH_STENCIL_EXT: case GL_DEPTH24_STENCIL8_EXT: @@ -238,6 +188,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->DepthBits = 24; rb->StencilBits = 8; cpp = 4; + irb->texformat = &_mesa_texformat_s8_z24; break; default: _mesa_problem(ctx, @@ -281,7 +232,6 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, } - /** * Called for each hardware renderbuffer when a _window_ is resized. * Just update fields. @@ -299,6 +249,7 @@ intel_alloc_window_storage(GLcontext * ctx, struct gl_renderbuffer *rb, return GL_TRUE; } + static void intel_resize_buffers(GLcontext *ctx, struct gl_framebuffer *fb, GLuint width, GLuint height) @@ -325,6 +276,8 @@ intel_resize_buffers(GLcontext *ctx, struct gl_framebuffer *fb, } } + +/** Dummy function for gl_renderbuffer::AllocStorage() */ static GLboolean intel_nop_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, GLenum internalFormat, GLuint width, GLuint height) @@ -344,10 +297,9 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *rb, rb->region = NULL; intel_region_reference(&rb->region, region); intel_region_release(&old); - - rb->pfPitch = region->pitch; } + /** * Create a new intel_renderbuffer which corresponds to an on-screen window, * not a user-created renderbuffer. @@ -377,6 +329,17 @@ intel_create_renderbuffer(GLenum intFormat) irb->Base.GreenBits = 6; irb->Base.BlueBits = 5; irb->Base.DataType = GL_UNSIGNED_BYTE; + irb->texformat = &_mesa_texformat_rgb565; + break; + case GL_RGB8: + irb->Base._ActualFormat = GL_RGB8; + irb->Base._BaseFormat = GL_RGB; + irb->Base.RedBits = 8; + irb->Base.GreenBits = 8; + irb->Base.BlueBits = 8; + irb->Base.AlphaBits = 0; + irb->Base.DataType = GL_UNSIGNED_BYTE; + irb->texformat = &_mesa_texformat_argb8888; /* XXX: Need xrgb8888 */ break; case GL_RGBA8: irb->Base._ActualFormat = GL_RGBA8; @@ -386,24 +349,28 @@ intel_create_renderbuffer(GLenum intFormat) irb->Base.BlueBits = 8; irb->Base.AlphaBits = 8; irb->Base.DataType = GL_UNSIGNED_BYTE; + irb->texformat = &_mesa_texformat_argb8888; break; case GL_STENCIL_INDEX8_EXT: irb->Base._ActualFormat = GL_STENCIL_INDEX8_EXT; irb->Base._BaseFormat = GL_STENCIL_INDEX; irb->Base.StencilBits = 8; irb->Base.DataType = GL_UNSIGNED_BYTE; + irb->texformat = &_mesa_texformat_s8_z24; break; case GL_DEPTH_COMPONENT16: irb->Base._ActualFormat = GL_DEPTH_COMPONENT16; irb->Base._BaseFormat = GL_DEPTH_COMPONENT; irb->Base.DepthBits = 16; irb->Base.DataType = GL_UNSIGNED_SHORT; + irb->texformat = &_mesa_texformat_z16; break; case GL_DEPTH_COMPONENT24: irb->Base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; irb->Base._BaseFormat = GL_DEPTH_COMPONENT; irb->Base.DepthBits = 24; irb->Base.DataType = GL_UNSIGNED_INT; + irb->texformat = &_mesa_texformat_s8_z24; break; case GL_DEPTH24_STENCIL8_EXT: irb->Base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; @@ -411,6 +378,7 @@ intel_create_renderbuffer(GLenum intFormat) irb->Base.DepthBits = 24; irb->Base.StencilBits = 8; irb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT; + irb->texformat = &_mesa_texformat_s8_z24; break; default: _mesa_problem(NULL, @@ -467,9 +435,6 @@ intel_bind_framebuffer(GLcontext * ctx, GLenum target, { if (target == GL_FRAMEBUFFER_EXT || target == GL_DRAW_FRAMEBUFFER_EXT) { intel_draw_buffer(ctx, fb); - /* Integer depth range depends on depth buffer bits */ - if (ctx->Driver.DepthRange != NULL) - ctx->Driver.DepthRange(ctx, ctx->Viewport.Near, ctx->Viewport.Far); } else { /* don't need to do anything if target == GL_READ_FRAMEBUFFER_EXT */ @@ -493,10 +458,13 @@ intel_framebuffer_renderbuffer(GLcontext * ctx, intel_draw_buffer(ctx, fb); } + static GLboolean intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, struct gl_texture_image *texImage) { + irb->texformat = texImage->TexFormat; + if (texImage->TexFormat == &_mesa_texformat_argb8888) { irb->Base._ActualFormat = GL_RGBA8; irb->Base._BaseFormat = GL_RGBA; @@ -506,9 +474,21 @@ intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, else if (texImage->TexFormat == &_mesa_texformat_rgb565) { irb->Base._ActualFormat = GL_RGB5; irb->Base._BaseFormat = GL_RGB; - irb->Base.DataType = GL_UNSIGNED_SHORT; + irb->Base.DataType = GL_UNSIGNED_BYTE; DBG("Render to RGB5 texture OK\n"); } + else if (texImage->TexFormat == &_mesa_texformat_argb1555) { + irb->Base._ActualFormat = GL_RGB5_A1; + irb->Base._BaseFormat = GL_RGBA; + irb->Base.DataType = GL_UNSIGNED_BYTE; + DBG("Render to ARGB1555 texture OK\n"); + } + else if (texImage->TexFormat == &_mesa_texformat_argb4444) { + irb->Base._ActualFormat = GL_RGBA4; + irb->Base._BaseFormat = GL_RGBA; + irb->Base.DataType = GL_UNSIGNED_BYTE; + DBG("Render to ARGB4444 texture OK\n"); + } else if (texImage->TexFormat == &_mesa_texformat_z16) { irb->Base._ActualFormat = GL_DEPTH_COMPONENT16; irb->Base._BaseFormat = GL_DEPTH_COMPONENT; @@ -535,15 +515,15 @@ intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, irb->Base.BlueBits = texImage->TexFormat->BlueBits; irb->Base.AlphaBits = texImage->TexFormat->AlphaBits; irb->Base.DepthBits = texImage->TexFormat->DepthBits; + irb->Base.StencilBits = texImage->TexFormat->StencilBits; irb->Base.Delete = intel_delete_renderbuffer; irb->Base.AllocStorage = intel_nop_alloc_storage; - irb->RenderToTexture = GL_TRUE; - return GL_TRUE; } + /** * When glFramebufferTexture[123]D is called this function sets up the * gl_renderbuffer wrapper around the texture image. @@ -552,7 +532,7 @@ intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, static struct intel_renderbuffer * intel_wrap_texture(GLcontext * ctx, struct gl_texture_image *texImage) { - const GLuint name = ~0; /* not significant, but distinct for debugging */ + const GLuint name = ~0; /* not significant, but distinct for debugging */ struct intel_renderbuffer *irb; /* make an intel_renderbuffer to wrap the texture image */ @@ -599,10 +579,11 @@ intel_render_texture(GLcontext * ctx, /* Fallback on drawing to a texture with a border, which won't have a * miptree. */ - _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); - _mesa_render_texture(ctx, fb, att); - return; - } else if (!irb) { + _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); + _mesa_render_texture(ctx, fb, att); + return; + } + else if (!irb) { irb = intel_wrap_texture(ctx, newImage); if (irb) { /* bind the wrapper to the attachment point */ @@ -613,7 +594,9 @@ intel_render_texture(GLcontext * ctx, _mesa_render_texture(ctx, fb, att); return; } - } if (!intel_update_wrapper(ctx, irb, newImage)) { + } + + if (!intel_update_wrapper(ctx, irb, newImage)) { _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); _mesa_render_texture(ctx, fb, att); return; @@ -658,19 +641,123 @@ static void intel_finish_render_texture(GLcontext * ctx, struct gl_renderbuffer_attachment *att) { - struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer); + /* no-op + * Previously we released the renderbuffer's intel_region but + * that's not necessary and actually caused problems when trying + * to do a glRead/CopyPixels from the renderbuffer later. + * The region will be released later if the texture is replaced + * or the renderbuffer deleted. + * + * The intention of this driver hook is more of a "done rendering + * to texture, please re-twiddle/etc if necessary". + */ +} - DBG("End render texture (tid %x) tex %u\n", _glthread_GetID(), att->Texture->Name); - if (irb) { - /* just release the region */ - intel_region_release(&irb->region); +/** + * Do additional "completeness" testing of a framebuffer object. + */ +static void +intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) +{ + const struct intel_renderbuffer *depthRb = + intel_get_renderbuffer(fb, BUFFER_DEPTH); + const struct intel_renderbuffer *stencilRb = + intel_get_renderbuffer(fb, BUFFER_STENCIL); + int i; + + if (stencilRb && stencilRb != depthRb) { + /* we only support combined depth/stencil buffers, not separate + * stencil buffers. + */ + fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; + } + + for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + if (rb == NULL) + continue; + + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_RGB565: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_ARGB4444: + break; + default: + fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; + } + } +} + + +/** + * Called from glBlitFramebuffer(). + * For now, we're doing an approximation with glCopyPixels(). + * XXX we need to bypass all the per-fragment operations, except scissor. + */ +static void +intel_blit_framebuffer(GLcontext *ctx, + GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter) +{ + const GLfloat xZoomSave = ctx->Pixel.ZoomX; + const GLfloat yZoomSave = ctx->Pixel.ZoomY; + GLsizei width, height; + GLfloat xFlip = 1.0F, yFlip = 1.0F; + + if (srcX1 < srcX0) { + GLint tmp = srcX1; + srcX1 = srcX0; + srcX0 = tmp; + xFlip = -1.0F; + } + + if (srcY1 < srcY0) { + GLint tmp = srcY1; + srcY1 = srcY0; + srcY0 = tmp; + yFlip = -1.0F; + } + + width = srcX1 - srcX0; + height = srcY1 - srcY0; + + ctx->Pixel.ZoomX = xFlip * (dstX1 - dstX0) / (srcX1 - srcY0); + ctx->Pixel.ZoomY = yFlip * (dstY1 - dstY0) / (srcY1 - srcY0); + + if (ctx->Pixel.ZoomX < 0.0F) { + dstX0 = MAX2(dstX0, dstX1); + } + else { + dstX0 = MIN2(dstX0, dstX1); + } + + if (ctx->Pixel.ZoomY < 0.0F) { + dstY0 = MAX2(dstY0, dstY1); + } + else { + dstY0 = MIN2(dstY0, dstY1); + } + + if (mask & GL_COLOR_BUFFER_BIT) { + ctx->Driver.CopyPixels(ctx, srcX0, srcY0, width, height, + dstX0, dstY0, GL_COLOR); + } + if (mask & GL_DEPTH_BUFFER_BIT) { + ctx->Driver.CopyPixels(ctx, srcX0, srcY0, width, height, + dstX0, dstY0, GL_DEPTH); } - else if (att->Renderbuffer) { - /* software fallback */ - _mesa_finish_render_texture(ctx, att); - /* XXX FBO: Need to unmap the buffer (or in intelSpanRenderStart???) */ + if (mask & GL_STENCIL_BUFFER_BIT) { + ctx->Driver.CopyPixels(ctx, srcX0, srcY0, width, height, + dstX0, dstY0, GL_STENCIL); } + + ctx->Pixel.ZoomX = xZoomSave; + ctx->Pixel.ZoomY = yZoomSave; } @@ -688,4 +775,6 @@ intel_fbo_init(struct intel_context *intel) intel->ctx.Driver.RenderTexture = intel_render_texture; intel->ctx.Driver.FinishRenderTexture = intel_finish_render_texture; intel->ctx.Driver.ResizeBuffers = intel_resize_buffers; + intel->ctx.Driver.ValidateFramebuffer = intel_validate_framebuffer; + intel->ctx.Driver.BlitFramebuffer = intel_blit_framebuffer; } diff --git a/shared/intel_fbo.h b/shared/intel_fbo.h index b7e9280..f0665af 100644 --- a/shared/intel_fbo.h +++ b/shared/intel_fbo.h @@ -55,19 +55,13 @@ struct intel_framebuffer /** * Intel renderbuffer, derived from gl_renderbuffer. - * Note: The PairedDepth and PairedStencil fields use renderbuffer IDs, - * not pointers because in some circumstances a deleted renderbuffer could - * result in a dangling pointer here. */ struct intel_renderbuffer { struct gl_renderbuffer Base; struct intel_region *region; - GLuint pfPitch; /* possibly paged flipped pitch */ - GLboolean RenderToTexture; /* RTT? */ - GLuint PairedDepth; /**< only used if this is a depth renderbuffer */ - GLuint PairedStencil; /**< only used if this is a stencil renderbuffer */ + const struct gl_texture_format *texformat; GLuint vbl_pending; /**< vblank sequence number of pending flip */ @@ -75,48 +69,70 @@ struct intel_renderbuffer unsigned long span_cache_offset; }; -extern struct intel_renderbuffer *intel_renderbuffer(struct gl_renderbuffer - *rb); + +/** + * gl_renderbuffer is a base class which we subclass. The Class field + * is used for simple run-time type checking. + */ +#define INTEL_RB_CLASS 0x12345678 + + +/** + * Return a gl_renderbuffer ptr casted to intel_renderbuffer. + * NULL will be returned if the rb isn't really an intel_renderbuffer. + * This is determined by checking the ClassID. + */ +static INLINE struct intel_renderbuffer * +intel_renderbuffer(struct gl_renderbuffer *rb) +{ + struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb; + if (irb && irb->Base.ClassID == INTEL_RB_CLASS) { + /*_mesa_warning(NULL, "Returning non-intel Rb\n");*/ + return irb; + } + else + return NULL; +} + + +/** + * Return a framebuffer's renderbuffer, named by a BUFFER_x index. + */ +static INLINE struct intel_renderbuffer * +intel_get_renderbuffer(struct gl_framebuffer *fb, int attIndex) +{ + if (attIndex >= 0) + return intel_renderbuffer(fb->Attachment[attIndex].Renderbuffer); + else + return NULL; +} + extern void intel_renderbuffer_set_region(struct intel_renderbuffer *irb, struct intel_region *region); + extern struct intel_renderbuffer * intel_create_renderbuffer(GLenum intFormat); -extern void intel_fbo_init(struct intel_context *intel); - - -/* XXX make inline or macro */ -extern struct intel_renderbuffer *intel_get_renderbuffer(struct gl_framebuffer - *fb, - int attIndex); - -extern void intel_flip_renderbuffers(struct intel_framebuffer *intel_fb); +extern void +intel_fbo_init(struct intel_context *intel); -/* XXX make inline or macro */ -extern struct intel_region *intel_get_rb_region(struct gl_framebuffer *fb, - GLuint attIndex); +extern void +intel_flip_renderbuffers(struct intel_framebuffer *intel_fb); -/** - * Are we currently rendering into a texture? - */ -static INLINE GLboolean -intel_rendering_to_texture(const GLcontext *ctx) +static INLINE struct intel_region * +intel_get_rb_region(struct gl_framebuffer *fb, GLuint attIndex) { - if (ctx->DrawBuffer->Name) { - /* User-created FBO */ - const struct intel_renderbuffer *irb = - intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]); - return irb && irb->RenderToTexture; - } - else { - return GL_FALSE; - } + struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, attIndex); + if (irb) + return irb->region; + else + return NULL; } diff --git a/shared/intel_pixel.c b/shared/intel_pixel.c index f440a77..fc0ac0b 100644 --- a/shared/intel_pixel.c +++ b/shared/intel_pixel.c @@ -30,6 +30,7 @@ #include "main/context.h" #include "main/enable.h" #include "main/matrix.h" +#include "main/viewport.h" #include "swrast/swrast.h" #include "shader/arbprogram.h" #include "shader/program.h" @@ -112,7 +113,7 @@ intel_check_blit_fragment_ops(GLcontext * ctx, GLboolean src_alpha_is_one) return GL_FALSE; } - if (ctx->Stencil.Enabled) { + if (ctx->Stencil._Enabled) { DBG("fallback due to image stencil\n"); return GL_FALSE; } @@ -183,7 +184,9 @@ intel_meta_set_passthrough_transform(struct intel_context *intel) intel->meta.saved_vp_height = ctx->Viewport.Height; intel->meta.saved_matrix_mode = ctx->Transform.MatrixMode; - /* _mesa_Viewport(0, 0, ctx->DrawBuffer->Width, ctx->DrawBuffer->Height);*/ + intel->internal_viewport_call = GL_TRUE; + _mesa_Viewport(0, 0, ctx->DrawBuffer->Width, ctx->DrawBuffer->Height); + intel->internal_viewport_call = GL_FALSE; _mesa_MatrixMode(GL_PROJECTION); _mesa_PushMatrix(); @@ -205,8 +208,10 @@ intel_meta_restore_transform(struct intel_context *intel) _mesa_MatrixMode(intel->meta.saved_matrix_mode); - /* _mesa_Viewport(intel->meta.saved_vp_x, intel->meta.saved_vp_y, - intel->meta.saved_vp_width, intel->meta.saved_vp_height);*/ + intel->internal_viewport_call = GL_TRUE; + _mesa_Viewport(intel->meta.saved_vp_x, intel->meta.saved_vp_y, + intel->meta.saved_vp_width, intel->meta.saved_vp_height); + intel->internal_viewport_call = GL_FALSE; } /** diff --git a/shared/intel_pixel_bitmap.c b/shared/intel_pixel_bitmap.c index 1db7f55..a2ccae1 100644 --- a/shared/intel_pixel_bitmap.c +++ b/shared/intel_pixel_bitmap.c @@ -401,6 +401,14 @@ intel_texture_bitmap(GLcontext * ctx, return GL_FALSE; } + if (!ctx->Extensions.ARB_texture_non_power_of_two && + (!is_power_of_two(width) || !is_power_of_two(height))) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, + "glBitmap() fallback: NPOT texture\n"); + return GL_FALSE; + } + /* Check that we can load in a texture this big. */ if (width > (1 << (ctx->Const.MaxTextureLevels - 1)) || height > (1 << (ctx->Const.MaxTextureLevels - 1))) { @@ -499,7 +507,7 @@ intel_texture_bitmap(GLcontext * ctx, _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords); _mesa_Enable(GL_VERTEX_ARRAY); _mesa_Enable(GL_TEXTURE_COORD_ARRAY); - CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4)); + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); intel_meta_restore_transform(intel); intel_meta_restore_fragment_program(intel); diff --git a/shared/intel_pixel_copy.c b/shared/intel_pixel_copy.c index 7c7aa60..d50dd68 100644 --- a/shared/intel_pixel_copy.c +++ b/shared/intel_pixel_copy.c @@ -87,7 +87,7 @@ intel_check_copypixel_blit_fragment_ops(GLcontext * ctx) ctx->Color.AlphaEnabled || ctx->Depth.Test || ctx->Fog.Enabled || - ctx->Stencil.Enabled || + ctx->Stencil._Enabled || !ctx->Color.ColorMask[0] || !ctx->Color.ColorMask[1] || !ctx->Color.ColorMask[2] || diff --git a/shared/intel_pixel_draw.c b/shared/intel_pixel_draw.c index 7be7ea8..d80069d 100644 --- a/shared/intel_pixel_draw.c +++ b/shared/intel_pixel_draw.c @@ -97,7 +97,7 @@ intel_texture_drawpixels(GLcontext * ctx, /* We don't have a way to generate fragments with stencil values which * will set the resulting stencil value. */ - if (format == GL_STENCIL_INDEX) + if (format == GL_STENCIL_INDEX || format == GL_DEPTH_STENCIL) return GL_FALSE; /* Check that we can load in a texture this big. */ @@ -120,6 +120,14 @@ intel_texture_drawpixels(GLcontext * ctx, return GL_FALSE; } + if (!ctx->Extensions.ARB_texture_non_power_of_two && + (!is_power_of_two(width) || !is_power_of_two(height))) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, + "glDrawPixels() fallback: NPOT texture\n"); + return GL_FALSE; + } + _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT | GL_CURRENT_BIT); _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT); @@ -183,7 +191,7 @@ intel_texture_drawpixels(GLcontext * ctx, _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords); _mesa_Enable(GL_VERTEX_ARRAY); _mesa_Enable(GL_TEXTURE_COORD_ARRAY); - CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4)); + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); intel_meta_restore_transform(intel); @@ -233,7 +241,7 @@ intel_stencil_drawpixels(GLcontext * ctx, } /* We don't support stencil testing/ops here */ - if (ctx->Stencil.Enabled) + if (ctx->Stencil._Enabled) return GL_FALSE; /* We use FBOs for our wrapping of the depthbuffer into a color @@ -273,6 +281,14 @@ intel_stencil_drawpixels(GLcontext * ctx, return GL_FALSE; } + if (!ctx->Extensions.ARB_texture_non_power_of_two && + (!is_power_of_two(width) || !is_power_of_two(height))) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, + "glDrawPixels(GL_STENCIL_INDEX) fallback: NPOT texture\n"); + return GL_FALSE; + } + _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT | GL_CURRENT_BIT | GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT); @@ -367,7 +383,7 @@ intel_stencil_drawpixels(GLcontext * ctx, _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords); _mesa_Enable(GL_VERTEX_ARRAY); _mesa_Enable(GL_TEXTURE_COORD_ARRAY); - CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4)); + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); intel_meta_restore_transform(intel); diff --git a/shared/intel_regions.c b/shared/intel_regions.c index 7de1e2b..0aa5b8c 100644 --- a/shared/intel_regions.c +++ b/shared/intel_regions.c @@ -158,7 +158,7 @@ void intel_region_reference(struct intel_region **dst, struct intel_region *src) { if (src) - DBG("%s %d\n", __FUNCTION__, src->refcount); + DBG("%s %p %d\n", __FUNCTION__, src, src->refcount); assert(*dst == NULL); if (src) { @@ -175,7 +175,7 @@ intel_region_release(struct intel_region **region_handle) if (region == NULL) return; - DBG("%s %d\n", __FUNCTION__, region->refcount - 1); + DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1); ASSERT(region->refcount > 0); region->refcount--; @@ -466,7 +466,8 @@ intel_recreate_static(struct intel_context *intel, else region->cpp = intel->ctx.Visual.rgbBits / 8; region->pitch = intelScreen->pitch; - region->height = intelScreen->height; /* needed? */ + region->width = intelScreen->width; + region->height = intelScreen->height; if (region->buffer != NULL) { dri_bo_unreference(region->buffer); diff --git a/shared/intel_screen.c b/shared/intel_screen.c index 4bd11dd..0f278b3 100644 --- a/shared/intel_screen.c +++ b/shared/intel_screen.c @@ -28,28 +28,27 @@ #include "main/glheader.h" #include "main/context.h" #include "main/framebuffer.h" -#include "main/matrix.h" #include "main/renderbuffer.h" -#include "main/simple_list.h" + #include "utils.h" #include "vblank.h" #include "xmlpool.h" - -#include "intel_screen.h" - +#include "intel_batchbuffer.h" #include "intel_buffers.h" -#include "intel_tex.h" -#include "intel_span.h" -#include "intel_fbo.h" +#include "intel_bufmgr.h" #include "intel_chipset.h" +#include "intel_extensions.h" +#include "intel_fbo.h" +#include "intel_regions.h" #include "intel_swapbuffers.h" +#include "intel_screen.h" +#include "intel_span.h" +#include "intel_tex.h" #include "i915_drm.h" #include "i830_dri.h" -#include "intel_regions.h" -#include "intel_batchbuffer.h" -#include "intel_bufmgr.h" + PUBLIC const char __driConfigOptions[] = DRI_CONF_BEGIN @@ -72,10 +71,12 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_SECTION_END DRI_CONF_SECTION_DEBUG DRI_CONF_NO_RAST(false) + DRI_CONF_ALWAYS_FLUSH_BATCH(false) + DRI_CONF_ALWAYS_FLUSH_CACHE(false) DRI_CONF_SECTION_END DRI_CONF_END; -const GLuint __driNConfigOptions = 6; +const GLuint __driNConfigOptions = 8; #ifdef USE_NEW_INTERFACE static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; @@ -210,6 +211,7 @@ static const __DRItexOffsetExtension intelTexOffsetExtension = { static const __DRItexBufferExtension intelTexBufferExtension = { { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION }, intelSetTexBuffer, + intelSetTexBuffer2, }; static const __DRIextension *intelScreenExtensions[] = { @@ -234,7 +236,7 @@ intel_get_param(__DRIscreenPrivate *psp, int param, int *value) ret = drmCommandWriteRead(psp->fd, DRM_I915_GETPARAM, &gp, sizeof(gp)); if (ret) { - fprintf(stderr, "drm_i915_getparam: %d\n", ret); + _mesa_warning(NULL, "drm_i915_getparam: %d", ret); return GL_FALSE; } @@ -303,6 +305,7 @@ intelDestroyScreen(__DRIscreenPrivate * sPriv) dri_bufmgr_destroy(intelScreen->bufmgr); intelUnmapScreenRegions(intelScreen); + driDestroyOptionCache(&intelScreen->optionCache); FREE(intelScreen); sPriv->private = NULL; @@ -323,7 +326,7 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, else { GLboolean swStencil = (mesaVis->stencilBits > 0 && mesaVis->depthBits != 24); - GLenum rgbFormat = (mesaVis->redBits == 5 ? GL_RGB5 : GL_RGBA8); + GLenum rgbFormat; struct intel_framebuffer *intel_fb = CALLOC_STRUCT(intel_framebuffer); @@ -332,6 +335,13 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, _mesa_initialize_framebuffer(&intel_fb->Base, mesaVis); + if (mesaVis->redBits == 5) + rgbFormat = GL_RGB5; + else if (mesaVis->alphaBits == 0) + rgbFormat = GL_RGB8; + else + rgbFormat = GL_RGBA8; + /* setup the hardware-based renderbuffers */ intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT, @@ -385,7 +395,31 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, static void intelDestroyBuffer(__DRIdrawablePrivate * driDrawPriv) { - _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); + struct intel_framebuffer *intel_fb = driDrawPriv->driverPrivate; + struct intel_renderbuffer *depth_rb; + struct intel_renderbuffer *stencil_rb; + + if (intel_fb) { + if (intel_fb->color_rb[0]) { + intel_renderbuffer_set_region(intel_fb->color_rb[0], NULL); + } + + if (intel_fb->color_rb[1]) { + intel_renderbuffer_set_region(intel_fb->color_rb[1], NULL); + } + + depth_rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); + if (depth_rb) { + intel_renderbuffer_set_region(depth_rb, NULL); + } + + stencil_rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL); + if (stencil_rb) { + intel_renderbuffer_set_region(stencil_rb, NULL); + } + } + + _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL); } @@ -466,8 +500,6 @@ intelFillInModes(__DRIscreenPrivate *psp, __GLcontextModes *m; unsigned depth_buffer_factor; unsigned back_buffer_factor; - GLenum fb_format; - GLenum fb_type; int i; /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't @@ -479,6 +511,7 @@ intelFillInModes(__DRIscreenPrivate *psp, uint8_t depth_bits_array[3]; uint8_t stencil_bits_array[3]; + uint8_t msaa_samples_array[1]; depth_bits_array[0] = 0; depth_bits_array[1] = depth_bits; @@ -495,22 +528,39 @@ intelFillInModes(__DRIscreenPrivate *psp, stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits; + msaa_samples_array[0] = 0; + depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1; back_buffer_factor = (have_back_buffer) ? 3 : 1; if (pixel_bits == 16) { - fb_format = GL_RGB; - fb_type = GL_UNSIGNED_SHORT_5_6_5; + configs = driCreateConfigs(GL_RGB, GL_UNSIGNED_SHORT_5_6_5, + depth_bits_array, stencil_bits_array, + depth_buffer_factor, back_buffer_modes, + back_buffer_factor, + msaa_samples_array, 1); } else { - fb_format = GL_BGRA; - fb_type = GL_UNSIGNED_INT_8_8_8_8_REV; + __DRIconfig **configs_a8r8g8b8; + __DRIconfig **configs_x8r8g8b8; + + configs_a8r8g8b8 = driCreateConfigs(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, + depth_bits_array, + stencil_bits_array, + depth_buffer_factor, + back_buffer_modes, + back_buffer_factor, + msaa_samples_array, 1); + configs_x8r8g8b8 = driCreateConfigs(GL_BGR, GL_UNSIGNED_INT_8_8_8_8_REV, + depth_bits_array, + stencil_bits_array, + depth_buffer_factor, + back_buffer_modes, + back_buffer_factor, + msaa_samples_array, 1); + configs = driConcatConfigs(configs_a8r8g8b8, configs_x8r8g8b8); } - configs = driCreateConfigs(fb_format, fb_type, - depth_bits_array, stencil_bits_array, - depth_buffer_factor, back_buffer_modes, - back_buffer_factor); if (configs == NULL) { fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__, __LINE__); @@ -537,6 +587,7 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen) GLboolean gem_supported; struct drm_i915_getparam gp; __DRIscreenPrivate *spriv = intelScreen->driScrnPriv; + int num_fences; intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL; @@ -587,8 +638,10 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen) &intelScreen->sarea->last_dispatch); } - /* XXX bufmgr should be per-screen, not per-context */ - intelScreen->ttm = intelScreen->ttm; + if (intel_get_param(spriv, I915_PARAM_NUM_FENCES_AVAIL, &num_fences)) + intelScreen->kernel_exec_fencing = !!num_fences; + else + intelScreen->kernel_exec_fencing = GL_FALSE; return GL_TRUE; } @@ -672,6 +725,17 @@ static const __DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp) { intelScreenPrivate *intelScreen; + GLenum fb_format[3]; + GLenum fb_type[3]; + /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't + * support pageflipping at all. + */ + static const GLenum back_buffer_modes[] = { + GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML + }; + uint8_t depth_bits[4], stencil_bits[4], msaa_samples_array[1]; + int color; + __DRIconfig **configs = NULL; /* Calling driInitExtensions here, with a NULL context pointer, * does not actually enable the extensions. It just makes sure @@ -711,8 +775,71 @@ __DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp) intelScreen->irq_active = 1; psp->extensions = intelScreenExtensions; - return driConcatConfigs(intelFillInModes(psp, 16, 16, 0, 1), - intelFillInModes(psp, 32, 24, 8, 1)); + depth_bits[0] = 0; + stencil_bits[0] = 0; + depth_bits[1] = 16; + stencil_bits[1] = 0; + depth_bits[2] = 24; + stencil_bits[2] = 0; + depth_bits[3] = 24; + stencil_bits[3] = 8; + + msaa_samples_array[0] = 0; + + fb_format[0] = GL_RGB; + fb_type[0] = GL_UNSIGNED_SHORT_5_6_5; + + fb_format[1] = GL_BGR; + fb_type[1] = GL_UNSIGNED_INT_8_8_8_8_REV; + + fb_format[2] = GL_BGRA; + fb_type[2] = GL_UNSIGNED_INT_8_8_8_8_REV; + + depth_bits[0] = 0; + stencil_bits[0] = 0; + + for (color = 0; color < ARRAY_SIZE(fb_format); color++) { + __DRIconfig **new_configs; + int depth_factor; + + /* With DRI2 right now, GetBuffers always returns a depth/stencil buffer + * with the same cpp as the drawable. So we can't support depth cpp != + * color cpp currently. + */ + if (fb_type[color] == GL_UNSIGNED_SHORT_5_6_5) { + depth_bits[1] = 16; + stencil_bits[1] = 0; + + depth_factor = 2; + } else { + depth_bits[1] = 24; + stencil_bits[1] = 0; + depth_bits[2] = 24; + stencil_bits[2] = 8; + + depth_factor = 3; + } + new_configs = driCreateConfigs(fb_format[color], fb_type[color], + depth_bits, + stencil_bits, + depth_factor, + back_buffer_modes, + ARRAY_SIZE(back_buffer_modes), + msaa_samples_array, + ARRAY_SIZE(msaa_samples_array)); + if (configs == NULL) + configs = new_configs; + else + configs = driConcatConfigs(configs, new_configs); + } + + if (configs == NULL) { + fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__, + __LINE__); + return NULL; + } + + return (const __DRIconfig **)configs; } const struct __DriverAPIRec driDriverAPI = { diff --git a/shared/intel_screen.h b/shared/intel_screen.h index e1036de..a9b9e10 100644 --- a/shared/intel_screen.h +++ b/shared/intel_screen.h @@ -79,6 +79,7 @@ typedef struct GLboolean no_vbo; int ttm; dri_bufmgr *bufmgr; + GLboolean kernel_exec_fencing; /** * Configuration cache with default values for all contexts diff --git a/shared/intel_span.c b/shared/intel_span.c index d931504..34b78eb 100644 --- a/shared/intel_span.c +++ b/shared/intel_span.c @@ -29,6 +29,7 @@ #include "main/macros.h" #include "main/mtypes.h" #include "main/colormac.h" +#include "main/texformat.h" #include "intel_buffers.h" #include "intel_fbo.h" @@ -131,6 +132,18 @@ pwrite_8(struct intel_renderbuffer *irb, uint32_t offset, uint8_t val) dri_bo_subdata(irb->region->buffer, offset, 1, &val); } +static uint32_t +z24s8_to_s8z24(uint32_t val) +{ + return (val << 24) | (val >> 8); +} + +static uint32_t +s8z24_to_z24s8(uint32_t val) +{ + return (val >> 24) | (val << 8); +} + static uint32_t no_tile_swizzle(struct intel_renderbuffer *irb, int x, int y) { @@ -150,7 +163,7 @@ static uint32_t x_tile_swizzle(struct intel_renderbuffer *irb, int x_tile_number, y_tile_number; int tile_off, tile_base; - tile_stride = (irb->pfPitch * irb->region->cpp) << 3; + tile_stride = (irb->region->pitch * irb->region->cpp) << 3; xbyte = x * irb->region->cpp; @@ -190,7 +203,7 @@ static uint32_t x_tile_swizzle(struct intel_renderbuffer *irb, printf("(%d,%d) -> %d + %d = %d (pitch = %d, tstride = %d)\n", x, y, tile_off, tile_base, tile_off + tile_base, - irb->pfPitch, tile_stride); + irb->region->pitch, tile_stride); #endif return tile_base + tile_off; @@ -205,7 +218,7 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, int x_tile_number, y_tile_number; int tile_off, tile_base; - tile_stride = (irb->pfPitch * irb->region->cpp) << 5; + tile_stride = (irb->region->pitch * irb->region->cpp) << 5; xbyte = x * irb->region->cpp; @@ -255,8 +268,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define LOCAL_VARS \ struct intel_context *intel = intel_context(ctx); \ struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ - const GLint yScale = irb->RenderToTexture ? 1 : -1; \ - const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; \ + const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \ + const GLint yBias = ctx->DrawBuffer->Name ? 0 : irb->Base.Height - 1;\ unsigned int num_cliprects; \ struct drm_clip_rect *cliprects; \ int x_off, y_off; \ @@ -293,107 +306,51 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define X_TILE(_X, _Y) x_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off) #define Y_TILE(_X, _Y) y_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off) -/* 16 bit, RGB565 color spanline and pixel functions - */ -#define SPANTMP_PIXEL_FMT GL_RGB -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 - -#define TAG(x) intel##x##_RGB565 -#define TAG2(x,y) intel##x##_RGB565##y -#define GET_VALUE(X, Y) pread_16(irb, NO_TILE(X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_16(irb, NO_TILE(X, Y), V) -#include "spantmp2.h" - -/* 32 bit, ARGB8888 color spanline and pixel functions - */ -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel##x##_ARGB8888 -#define TAG2(x,y) intel##x##_ARGB8888##y -#define GET_VALUE(X, Y) pread_32(irb, NO_TILE(X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_32(irb, NO_TILE(X, Y), V) -#include "spantmp2.h" - -/* 32 bit, xRGB8888 color spanline and pixel functions - */ -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel##x##_xRGB8888 -#define TAG2(x,y) intel##x##_xRGB8888##y -#define GET_VALUE(X, Y) pread_xrgb8888(irb, NO_TILE(X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, NO_TILE(X, Y), V) -#include "spantmp2.h" - -/* 16 bit RGB565 color tile spanline and pixel functions - */ - -#define SPANTMP_PIXEL_FMT GL_RGB -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 - -#define TAG(x) intel_XTile_##x##_RGB565 -#define TAG2(x,y) intel_XTile_##x##_RGB565##y -#define GET_VALUE(X, Y) pread_16(irb, X_TILE(X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_16(irb, X_TILE(X, Y), V) -#include "spantmp2.h" - -#define SPANTMP_PIXEL_FMT GL_RGB -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 - -#define TAG(x) intel_YTile_##x##_RGB565 -#define TAG2(x,y) intel_YTile_##x##_RGB565##y -#define GET_VALUE(X, Y) pread_16(irb, Y_TILE(X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_16(irb, Y_TILE(X, Y), V) -#include "spantmp2.h" - -/* 32 bit ARGB888 color tile spanline and pixel functions - */ - -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel_XTile_##x##_ARGB8888 -#define TAG2(x,y) intel_XTile_##x##_ARGB8888##y -#define GET_VALUE(X, Y) pread_32(irb, X_TILE(X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_32(irb, X_TILE(X, Y), V) -#include "spantmp2.h" - -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel_YTile_##x##_ARGB8888 -#define TAG2(x,y) intel_YTile_##x##_ARGB8888##y -#define GET_VALUE(X, Y) pread_32(irb, Y_TILE(X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_32(irb, Y_TILE(X, Y), V) -#include "spantmp2.h" - -/* 32 bit xRGB888 color tile spanline and pixel functions - */ - -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel_XTile_##x##_xRGB8888 -#define TAG2(x,y) intel_XTile_##x##_xRGB8888##y -#define GET_VALUE(X, Y) pread_xrgb8888(irb, X_TILE(X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, X_TILE(X, Y), V) -#include "spantmp2.h" - -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) intel_YTile_##x##_xRGB8888 -#define TAG2(x,y) intel_YTile_##x##_xRGB8888##y -#define GET_VALUE(X, Y) pread_xrgb8888(irb, Y_TILE(X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, Y_TILE(X, Y), V) -#include "spantmp2.h" +/* r5g6b5 color span and pixel functions */ +#define INTEL_PIXEL_FMT GL_RGB +#define INTEL_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 +#define INTEL_READ_VALUE(offset) pread_16(irb, offset) +#define INTEL_WRITE_VALUE(offset, v) pwrite_16(irb, offset, v) +#define INTEL_TAG(x) x##_RGB565 +#include "intel_spantmp.h" + +/* a4r4g4b4 color span and pixel functions */ +#define INTEL_PIXEL_FMT GL_BGRA +#define INTEL_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV +#define INTEL_READ_VALUE(offset) pread_16(irb, offset) +#define INTEL_WRITE_VALUE(offset, v) pwrite_16(irb, offset, v) +#define INTEL_TAG(x) x##_ARGB4444 +#include "intel_spantmp.h" + +/* a1r5g5b5 color span and pixel functions */ +#define INTEL_PIXEL_FMT GL_BGRA +#define INTEL_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV +#define INTEL_READ_VALUE(offset) pread_16(irb, offset) +#define INTEL_WRITE_VALUE(offset, v) pwrite_16(irb, offset, v) +#define INTEL_TAG(x) x##_ARGB1555 +#include "intel_spantmp.h" + +/* a8r8g8b8 color span and pixel functions */ +#define INTEL_PIXEL_FMT GL_BGRA +#define INTEL_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV +#define INTEL_READ_VALUE(offset) pread_32(irb, offset) +#define INTEL_WRITE_VALUE(offset, v) pwrite_32(irb, offset, v) +#define INTEL_TAG(x) x##_ARGB8888 +#include "intel_spantmp.h" + +/* x8r8g8b8 color span and pixel functions */ +#define INTEL_PIXEL_FMT GL_BGRA +#define INTEL_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV +#define INTEL_READ_VALUE(offset) pread_xrgb8888(irb, offset) +#define INTEL_WRITE_VALUE(offset, v) pwrite_xrgb8888(irb, offset, v) +#define INTEL_TAG(x) x##_xRGB8888 +#include "intel_spantmp.h" #define LOCAL_DEPTH_VARS \ struct intel_context *intel = intel_context(ctx); \ struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ - const GLint yScale = irb->RenderToTexture ? 1 : -1; \ - const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; \ + const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \ + const GLint yBias = ctx->DrawBuffer->Name ? 0 : irb->Base.Height - 1;\ unsigned int num_cliprects; \ struct drm_clip_rect *cliprects; \ int x_off, y_off; \ @@ -402,98 +359,26 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS -/** - ** 16-bit depthbuffer functions. - **/ -#define VALUE_TYPE GLushort -#define WRITE_DEPTH(_x, _y, d) pwrite_16(irb, NO_TILE(_x, _y), d) -#define READ_DEPTH(d, _x, _y) d = pread_16(irb, NO_TILE(_x, _y)) -#define TAG(x) intel##x##_z16 -#include "depthtmp.h" - - -/** - ** 16-bit x tile depthbuffer functions. - **/ -#define VALUE_TYPE GLushort -#define WRITE_DEPTH(_x, _y, d) pwrite_16(irb, X_TILE(_x, _y), d) -#define READ_DEPTH(d, _x, _y) d = pread_16(irb, X_TILE(_x, _y)) -#define TAG(x) intel_XTile_##x##_z16 -#include "depthtmp.h" - -/** - ** 16-bit y tile depthbuffer functions. - **/ -#define VALUE_TYPE GLushort -#define WRITE_DEPTH(_x, _y, d) pwrite_16(irb, Y_TILE(_x, _y), d) -#define READ_DEPTH(d, _x, _y) d = pread_16(irb, Y_TILE(_x, _y)) -#define TAG(x) intel_YTile_##x##_z16 -#include "depthtmp.h" - - -/** - ** 24/8-bit interleaved depth/stencil functions - ** Note: we're actually reading back combined depth+stencil values. - ** The wrappers in main/depthstencil.c are used to extract the depth - ** and stencil values. - **/ -#define VALUE_TYPE GLuint - -/* Change ZZZS -> SZZZ */ -#define WRITE_DEPTH(_x, _y, d) \ - pwrite_32(irb, NO_TILE(_x, _y), ((d) >> 8) | ((d) << 24)) - -/* Change SZZZ -> ZZZS */ -#define READ_DEPTH( d, _x, _y ) { \ - GLuint tmp = pread_32(irb, NO_TILE(_x, _y)); \ - d = (tmp << 8) | (tmp >> 24); \ -} - -#define TAG(x) intel##x##_z24_s8 -#include "depthtmp.h" - - -/** - ** 24/8-bit x-tile interleaved depth/stencil functions - ** Note: we're actually reading back combined depth+stencil values. - ** The wrappers in main/depthstencil.c are used to extract the depth - ** and stencil values. - **/ -#define VALUE_TYPE GLuint +/* z16 depthbuffer functions. */ +#define INTEL_VALUE_TYPE GLushort +#define INTEL_WRITE_DEPTH(offset, d) pwrite_16(irb, offset, d) +#define INTEL_READ_DEPTH(offset) pread_16(irb, offset) +#define INTEL_TAG(name) name##_z16 +#include "intel_depthtmp.h" -/* Change ZZZS -> SZZZ */ -#define WRITE_DEPTH(_x, _y, d) \ - pwrite_32(irb, X_TILE(_x, _y), ((d) >> 8) | ((d) << 24)) +/* z24 depthbuffer functions. */ +#define INTEL_VALUE_TYPE GLuint +#define INTEL_WRITE_DEPTH(offset, d) pwrite_32(irb, offset, d) +#define INTEL_READ_DEPTH(offset) pread_32(irb, offset) +#define INTEL_TAG(name) name##_z24 +#include "intel_depthtmp.h" -/* Change SZZZ -> ZZZS */ -#define READ_DEPTH( d, _x, _y ) { \ - GLuint tmp = pread_32(irb, X_TILE(_x, _y)); \ - d = (tmp << 8) | (tmp >> 24); \ -} - -#define TAG(x) intel_XTile_##x##_z24_s8 -#include "depthtmp.h" - -/** - ** 24/8-bit y-tile interleaved depth/stencil functions - ** Note: we're actually reading back combined depth+stencil values. - ** The wrappers in main/depthstencil.c are used to extract the depth - ** and stencil values. - **/ -#define VALUE_TYPE GLuint - -/* Change ZZZS -> SZZZ */ -#define WRITE_DEPTH(_x, _y, d) \ - pwrite_32(irb, Y_TILE(_x, _y), ((d) >> 8) | ((d) << 24)) - -/* Change SZZZ -> ZZZS */ -#define READ_DEPTH( d, _x, _y ) { \ - GLuint tmp = pread_32(irb, Y_TILE(_x, _y)); \ - d = (tmp << 8) | (tmp >> 24); \ -} - -#define TAG(x) intel_YTile_##x##_z24_s8 -#include "depthtmp.h" +/* z24s8 depthbuffer functions. */ +#define INTEL_VALUE_TYPE GLuint +#define INTEL_WRITE_DEPTH(offset, d) pwrite_32(irb, offset, z24s8_to_s8z24(d)) +#define INTEL_READ_DEPTH(offset) s8z24_to_z24s8(pread_32(irb, offset)) +#define INTEL_TAG(name) name##_z24_s8 +#include "intel_depthtmp.h" /** @@ -528,8 +413,6 @@ intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb) if (irb == NULL || irb->region == NULL) return; - irb->pfPitch = irb->region->pitch; - intel_set_span_functions(intel, rb); } @@ -543,7 +426,6 @@ intel_renderbuffer_unmap(struct intel_context *intel, return; clear_span_cache(irb); - irb->pfPitch = 0; rb->GetRow = NULL; rb->PutRow = NULL; @@ -696,8 +578,8 @@ intel_set_span_functions(struct intel_context *intel, else tiling = I915_TILING_NONE; - if (rb->_ActualFormat == GL_RGB5) { - /* 565 RGB */ + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_RGB565: switch (tiling) { case I915_TILING_NONE: default: @@ -710,38 +592,67 @@ intel_set_span_functions(struct intel_context *intel, intel_YTile_InitPointers_RGB565(rb); break; } - } - else if (rb->_ActualFormat == GL_RGB8) { - /* 8888 RGBx */ + break; + case MESA_FORMAT_ARGB4444: switch (tiling) { case I915_TILING_NONE: default: - intelInitPointers_xRGB8888(rb); + intelInitPointers_ARGB4444(rb); break; case I915_TILING_X: - intel_XTile_InitPointers_xRGB8888(rb); + intel_XTile_InitPointers_ARGB4444(rb); break; case I915_TILING_Y: - intel_YTile_InitPointers_xRGB8888(rb); + intel_YTile_InitPointers_ARGB4444(rb); break; } - } - else if (rb->_ActualFormat == GL_RGBA8) { - /* 8888 RGBA */ + break; + case MESA_FORMAT_ARGB1555: switch (tiling) { case I915_TILING_NONE: default: - intelInitPointers_ARGB8888(rb); + intelInitPointers_ARGB1555(rb); break; case I915_TILING_X: - intel_XTile_InitPointers_ARGB8888(rb); + intel_XTile_InitPointers_ARGB1555(rb); break; case I915_TILING_Y: - intel_YTile_InitPointers_ARGB8888(rb); + intel_YTile_InitPointers_ARGB1555(rb); break; } - } - else if (rb->_ActualFormat == GL_DEPTH_COMPONENT16) { + break; + case MESA_FORMAT_ARGB8888: + if (rb->AlphaBits == 0) { /* XXX: Need xRGB8888 Mesa format */ + /* 8888 RGBx */ + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitPointers_xRGB8888(rb); + break; + case I915_TILING_X: + intel_XTile_InitPointers_xRGB8888(rb); + break; + case I915_TILING_Y: + intel_YTile_InitPointers_xRGB8888(rb); + break; + } + } else { + /* 8888 RGBA */ + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitPointers_ARGB8888(rb); + break; + case I915_TILING_X: + intel_XTile_InitPointers_ARGB8888(rb); + break; + case I915_TILING_Y: + intel_YTile_InitPointers_ARGB8888(rb); + break; + } + } + break; + case MESA_FORMAT_Z16: switch (tiling) { case I915_TILING_NONE: default: @@ -754,38 +665,57 @@ intel_set_span_functions(struct intel_context *intel, intel_YTile_InitDepthPointers_z16(rb); break; } - } - else if (rb->_ActualFormat == GL_DEPTH_COMPONENT24 || /* XXX FBO remove */ - rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) { - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitDepthPointers_z24_s8(rb); - break; - case I915_TILING_X: - intel_XTile_InitDepthPointers_z24_s8(rb); - break; - case I915_TILING_Y: - intel_YTile_InitDepthPointers_z24_s8(rb); - break; + break; + case MESA_FORMAT_S8_Z24: + /* There are a few different ways SW asks us to access the S8Z24 data: + * Z24 depth-only depth reads + * S8Z24 depth reads + * S8Z24 stencil reads. + */ + if (rb->_ActualFormat == GL_DEPTH_COMPONENT24) { + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitDepthPointers_z24(rb); + break; + case I915_TILING_X: + intel_XTile_InitDepthPointers_z24(rb); + break; + case I915_TILING_Y: + intel_YTile_InitDepthPointers_z24(rb); + break; + } + } else if (rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) { + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitDepthPointers_z24_s8(rb); + break; + case I915_TILING_X: + intel_XTile_InitDepthPointers_z24_s8(rb); + break; + case I915_TILING_Y: + intel_YTile_InitDepthPointers_z24_s8(rb); + break; + } + } else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) { + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitStencilPointers_z24_s8(rb); + break; + case I915_TILING_X: + intel_XTile_InitStencilPointers_z24_s8(rb); + break; + case I915_TILING_Y: + intel_YTile_InitStencilPointers_z24_s8(rb); + break; + } } - } - else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) { - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitStencilPointers_z24_s8(rb); - break; - case I915_TILING_X: - intel_XTile_InitStencilPointers_z24_s8(rb); - break; - case I915_TILING_Y: - intel_YTile_InitStencilPointers_z24_s8(rb); - break; - } - } - else { + break; + default: _mesa_problem(NULL, - "Unexpected _ActualFormat in intelSetSpanFunctions"); + "Unexpected MesaFormat in intelSetSpanFunctions"); + break; } } diff --git a/shared/intel_spantmp.h b/shared/intel_spantmp.h new file mode 100644 index 0000000..ead0b1c --- /dev/null +++ b/shared/intel_spantmp.h @@ -0,0 +1,61 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +/** + * Wrapper around the spantmp.h macrofest to generate spans code for + * all the tiling styles. + */ + +#define SPANTMP_PIXEL_FMT INTEL_PIXEL_FMT +#define SPANTMP_PIXEL_TYPE INTEL_PIXEL_TYPE +#define PUT_VALUE(_x, _y, v) INTEL_WRITE_VALUE(NO_TILE(_x, _y), v) +#define GET_VALUE(_x, _y) INTEL_READ_VALUE(NO_TILE(_x, _y)) +#define TAG(x) INTEL_TAG(intel##x) +#define TAG2(x, y) INTEL_TAG(intel##x)##y +#include "spantmp2.h" + +#define SPANTMP_PIXEL_FMT INTEL_PIXEL_FMT +#define SPANTMP_PIXEL_TYPE INTEL_PIXEL_TYPE +#define PUT_VALUE(_x, _y, v) INTEL_WRITE_VALUE(X_TILE(_x, _y), v) +#define GET_VALUE(_x, _y) INTEL_READ_VALUE(X_TILE(_x, _y)) +#define TAG(x) INTEL_TAG(intel_XTile_##x) +#define TAG2(x, y) INTEL_TAG(intel_XTile_##x)##y +#include "spantmp2.h" + +#define SPANTMP_PIXEL_FMT INTEL_PIXEL_FMT +#define SPANTMP_PIXEL_TYPE INTEL_PIXEL_TYPE +#define PUT_VALUE(_x, _y, v) INTEL_WRITE_VALUE(X_TILE(_x, _y), v) +#define GET_VALUE(_x, _y) INTEL_READ_VALUE(X_TILE(_x, _y)) +#define TAG(x) INTEL_TAG(intel_YTile_##x) +#define TAG2(x, y) INTEL_TAG(intel_YTile_##x)##y +#include "spantmp2.h" + +#undef INTEL_PIXEL_FMT +#undef INTEL_PIXEL_TYPE +#undef INTEL_WRITE_VALUE +#undef INTEL_READ_VALUE +#undef INTEL_TAG diff --git a/i915/intel_state.c b/shared/intel_state.c index 8d96e9b..4ee7423 100644 --- a/i915/intel_state.c +++ b/shared/intel_state.c @@ -38,30 +38,30 @@ #include "intel_regions.h" #include "swrast/swrast.h" -int -intel_translate_shadow_compare_func( GLenum func ) +int +intel_translate_shadow_compare_func(GLenum func) { - switch(func) { + switch (func) { case GL_NEVER: - return COMPAREFUNC_ALWAYS; + return COMPAREFUNC_ALWAYS; case GL_LESS: - return COMPAREFUNC_LEQUAL; + return COMPAREFUNC_LEQUAL; case GL_LEQUAL: return COMPAREFUNC_LESS; case GL_GREATER: - return COMPAREFUNC_GEQUAL; + return COMPAREFUNC_GEQUAL; case GL_GEQUAL: - return COMPAREFUNC_GREATER; + return COMPAREFUNC_GREATER; case GL_NOTEQUAL: - return COMPAREFUNC_EQUAL; + return COMPAREFUNC_EQUAL; case GL_EQUAL: - return COMPAREFUNC_NOTEQUAL; + return COMPAREFUNC_NOTEQUAL; case GL_ALWAYS: - return COMPAREFUNC_NEVER; + return COMPAREFUNC_NEVER; } fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func); - return COMPAREFUNC_NEVER; + return COMPAREFUNC_NEVER; } int @@ -198,7 +198,7 @@ intel_translate_logic_op(GLenum opcode) static void -intelClearColor(GLcontext * ctx, const GLfloat color[4]) +intelClearColor(GLcontext *ctx, const GLfloat color[4]) { struct intel_context *intel = intel_context(ctx); GLubyte clear[4]; @@ -218,7 +218,7 @@ intelClearColor(GLcontext * ctx, const GLfloat color[4]) /* Fallback to swrast for select and feedback. */ static void -intelRenderMode(GLcontext * ctx, GLenum mode) +intelRenderMode(GLcontext *ctx, GLenum mode) { struct intel_context *intel = intel_context(ctx); FALLBACK(intel, INTEL_FALLBACK_RENDERMODE, (mode != GL_RENDER)); diff --git a/shared/intel_swapbuffers.c b/shared/intel_swapbuffers.c index c135166..7d035b9 100644 --- a/shared/intel_swapbuffers.c +++ b/shared/intel_swapbuffers.c @@ -43,7 +43,6 @@ GLuint intelFixupVblank(struct intel_context *intel, __DRIdrawablePrivate *dPriv) { - if (!intel->intelScreen->driScrnPriv->dri2.enabled && intel->intelScreen->driScrnPriv->ddx_version.minor >= 7) { volatile drm_i915_sarea_t *sarea = intel->sarea; @@ -77,11 +76,14 @@ intelFixupVblank(struct intel_context *intel, __DRIdrawablePrivate *dPriv) return flags; } else { - return dPriv->vblFlags & ~VBLANK_FLAG_SECONDARY; + return dPriv->vblFlags & ~VBLANK_FLAG_SECONDARY; } } +/** + * Called from driSwapBuffers() + */ void intelSwapBuffers(__DRIdrawablePrivate * dPriv) { @@ -130,7 +132,6 @@ intelSwapBuffers(__DRIdrawablePrivate * dPriv) intel_fb->swap_ust = ust; } drmCommandNone(intel->driFd, DRM_I915_GEM_THROTTLE); - } else { /* XXX this shouldn't be an error but we can't handle it for now */ @@ -138,6 +139,10 @@ intelSwapBuffers(__DRIdrawablePrivate * dPriv) } } + +/** + * Called from driCopySubBuffer() + */ void intelCopySubBuffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h) { diff --git a/shared/intel_tex.c b/shared/intel_tex.c index e64d8a1..ae0994b 100644 --- a/shared/intel_tex.c +++ b/shared/intel_tex.c @@ -93,7 +93,7 @@ intelFreeTextureImageData(GLcontext * ctx, struct gl_texture_image *texImage) static void * do_memcpy(void *dest, const void *src, size_t n) { - if ((((unsigned) src) & 63) || (((unsigned) dest) & 63)) { + if ((((unsigned long) src) & 63) || (((unsigned long) dest) & 63)) { return __memcpy(dest, src, n); } else diff --git a/shared/intel_tex.h b/shared/intel_tex.h index 742ccc0..f5372d8 100644 --- a/shared/intel_tex.h +++ b/shared/intel_tex.h @@ -149,6 +149,8 @@ void intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname, unsigned long long offset, GLint depth, GLuint pitch); void intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *pDraw); +void intelSetTexBuffer2(__DRIcontext *pDRICtx, + GLint target, GLint format, __DRIdrawable *pDraw); GLuint intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit); diff --git a/shared/intel_tex_copy.c b/shared/intel_tex_copy.c index a7143b8..90bbb8c 100644 --- a/shared/intel_tex_copy.c +++ b/shared/intel_tex_copy.c @@ -104,7 +104,7 @@ do_copy_texsubimage(struct intel_context *intel, return GL_FALSE; } - intel_glFlush(ctx); + intelFlush(ctx); LOCK_HARDWARE(intel); { GLuint image_offset = intel_miptree_image_offset(intelImage->mt, @@ -155,7 +155,6 @@ do_copy_texsubimage(struct intel_context *intel, } UNLOCK_HARDWARE(intel); - intel_glFlush(ctx); /* GL_SGIS_generate_mipmap */ if (intelImage->level == texObj->BaseLevel && texObj->GenerateMipmap) { @@ -232,6 +231,14 @@ intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level, if (border) goto fail; + /* Setup or redefine the texture object, mipmap tree and texture + * image. Don't populate yet. + */ + ctx->Driver.TexImage2D(ctx, target, level, internalFormat, + width, height, border, + GL_RGBA, CHAN_TYPE, NULL, + &ctx->DefaultPacking, texObj, texImage); + srcx = x; srcy = y; dstx = 0; @@ -242,15 +249,6 @@ intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level, &width, &height)) return; - /* Setup or redefine the texture object, mipmap tree and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, - GL_RGBA, CHAN_TYPE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - if (!do_copy_texsubimage(intel_context(ctx), target, intel_texture_image(texImage), internalFormat, 0, 0, x, y, width, height)) diff --git a/shared/intel_tex_format.c b/shared/intel_tex_format.c index 5e418ac..3322a71 100644 --- a/shared/intel_tex_format.c +++ b/shared/intel_tex_format.c @@ -1,13 +1,18 @@ #include "intel_context.h" #include "intel_tex.h" +#include "intel_chipset.h" #include "main/texformat.h" #include "main/enums.h" -/* It works out that this function is fine for all the supported + +/** + * Choose hardware texture format given the user's glTexImage parameters. + * + * It works out that this function is fine for all the supported * hardware. However, there is still a need to map the formats onto * hardware descriptors. - */ -/* Note that the i915 can actually support many more formats than + * + * Note that the i915 can actually support many more formats than * these if we take the step of simply swizzling the colors * immediately after sampling... */ @@ -16,7 +21,12 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, GLenum format, GLenum type) { struct intel_context *intel = intel_context(ctx); - const GLboolean do32bpt = (intel->ctx.Visual.rgbBits == 32); + const GLboolean do32bpt = (intel->ctx.Visual.rgbBits >= 24); + +#if 0 + printf("%s intFmt=0x%x format=0x%x type=0x%x\n", + __FUNCTION__, internalFormat, format, type); +#endif switch (internalFormat) { case 4: @@ -151,20 +161,36 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, case GL_SRGB8_EXT: case GL_SRGB_ALPHA_EXT: case GL_SRGB8_ALPHA8_EXT: - case GL_SLUMINANCE_EXT: - case GL_SLUMINANCE8_EXT: - case GL_SLUMINANCE_ALPHA_EXT: - case GL_SLUMINANCE8_ALPHA8_EXT: case GL_COMPRESSED_SRGB_EXT: case GL_COMPRESSED_SRGB_ALPHA_EXT: case GL_COMPRESSED_SLUMINANCE_EXT: case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT: - return &_mesa_texformat_srgba8; + return &_mesa_texformat_sargb8; + case GL_SLUMINANCE_EXT: + case GL_SLUMINANCE8_EXT: + if (IS_G4X(intel->intelScreen->deviceID)) + return &_mesa_texformat_sl8; + else + return &_mesa_texformat_sargb8; + case GL_SLUMINANCE_ALPHA_EXT: + case GL_SLUMINANCE8_ALPHA8_EXT: + if (IS_G4X(intel->intelScreen->deviceID)) + return &_mesa_texformat_sla8; + else + return &_mesa_texformat_sargb8; case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT: case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: return &_mesa_texformat_srgb_dxt1; + + /* i915 could also do this */ + case GL_DUDV_ATI: + case GL_DU8DV8_ATI: + return &_mesa_texformat_dudv8; + case GL_RGBA_SNORM: + case GL_RGBA8_SNORM: + return &_mesa_texformat_signed_rgba8888_rev; #endif default: diff --git a/shared/intel_tex_image.c b/shared/intel_tex_image.c index 90894f9..5e61e9e 100644 --- a/shared/intel_tex_image.c +++ b/shared/intel_tex_image.c @@ -12,6 +12,7 @@ #include "main/simple_list.h" #include "main/texcompress.h" #include "main/texformat.h" +#include "main/texgetimage.h" #include "main/texobj.h" #include "main/texstore.h" #include "main/teximage.h" @@ -207,11 +208,12 @@ try_pbo_upload(struct intel_context *intel, if (!pbo || intel->ctx._ImageTransferState || unpack->SkipPixels || unpack->SkipRows) { - _mesa_printf("%s: failure 1\n", __FUNCTION__); + DBG("%s: failure 1\n", __FUNCTION__); return GL_FALSE; } - src_offset = (GLuint) pixels; + /* note: potential 64-bit ptr to 32-bit int cast */ + src_offset = (GLuint) (unsigned long) pixels; if (unpack->RowLength > 0) src_stride = unpack->RowLength; @@ -262,11 +264,12 @@ try_pbo_zcopy(struct intel_context *intel, if (!pbo || intel->ctx._ImageTransferState || unpack->SkipPixels || unpack->SkipRows) { - _mesa_printf("%s: failure 1\n", __FUNCTION__); + DBG("%s: failure 1\n", __FUNCTION__); return GL_FALSE; } - src_offset = (GLuint) pixels; + /* note: potential 64-bit ptr to 32-bit int cast */ + src_offset = (GLuint) (unsigned long) pixels; if (unpack->RowLength > 0) src_stride = unpack->RowLength; @@ -280,7 +283,7 @@ try_pbo_zcopy(struct intel_context *intel, dst_stride = intelImage->mt->pitch; if (src_stride != dst_stride || dst_offset != 0 || src_offset != 0) { - _mesa_printf("%s: failure 2\n", __FUNCTION__); + DBG("%s: failure 2\n", __FUNCTION__); return GL_FALSE; } @@ -312,8 +315,8 @@ intelTexImage(GLcontext * ctx, GLint postConvWidth = width; GLint postConvHeight = height; GLint texelBytes, sizeInBytes; - GLuint dstRowStride, srcRowStride = texImage->RowStride; - + GLuint dstRowStride = 0, srcRowStride = texImage->RowStride; + GLboolean needs_map; DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), level, width, height, depth, border); @@ -479,13 +482,21 @@ intelTexImage(GLcontext * ctx, LOCK_HARDWARE(intel); + /* Two cases where we need a mapping of the miptree: when the user supplied + * data is mapped as well (non-PBO, memcpy upload) or when we're going to do + * (software) mipmap generation. + */ + needs_map = (pixels != NULL) || (level == texObj->BaseLevel && + texObj->GenerateMipmap); + if (intelImage->mt) { - texImage->Data = intel_miptree_image_map(intel, - intelImage->mt, - intelImage->face, - intelImage->level, - &dstRowStride, - intelImage->base.ImageOffsets); + if (needs_map) + texImage->Data = intel_miptree_image_map(intel, + intelImage->mt, + intelImage->face, + intelImage->level, + &dstRowStride, + intelImage->base.ImageOffsets); texImage->RowStride = dstRowStride / intelImage->mt->cpp; } else { @@ -505,8 +516,9 @@ intelTexImage(GLcontext * ctx, } DBG("Upload image %dx%dx%d row_len %d " - "pitch %d\n", - width, height, depth, width * texelBytes, dstRowStride); + "pitch %d pixels %d compressed %d\n", + width, height, depth, width * texelBytes, dstRowStride, + pixels ? 1 : 0, compressed); /* Copy data. Would like to know when it's ok for us to eg. use * the blitter to copy. Or, use the hardware to do the format @@ -519,7 +531,7 @@ intelTexImage(GLcontext * ctx, _mesa_copy_rect(texImage->Data, dst->cpp, dst->pitch, 0, 0, intelImage->mt->level[level].width, - intelImage->mt->level[level].height/4, + (intelImage->mt->level[level].height+3)/4, pixels, srcRowStride, 0, 0); @@ -535,17 +547,18 @@ intelTexImage(GLcontext * ctx, format, type, pixels, unpack)) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage"); } - } - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - intel_generate_mipmap(ctx, target, texObj); + /* GL_SGIS_generate_mipmap */ + if (level == texObj->BaseLevel && texObj->GenerateMipmap) { + intel_generate_mipmap(ctx, target, texObj); + } } _mesa_unmap_teximage_pbo(ctx, unpack); if (intelImage->mt) { - intel_miptree_image_unmap(intel, intelImage->mt); + if (needs_map) + intel_miptree_image_unmap(intel, intelImage->mt); texImage->Data = NULL; } @@ -624,6 +637,12 @@ intel_get_tex_image(GLcontext * ctx, GLenum target, GLint level, struct intel_context *intel = intel_context(ctx); struct intel_texture_image *intelImage = intel_texture_image(texImage); + /* If we're reading from a texture that has been rendered to, need to + * make sure rendering is complete. + * We could probably predicate this on texObj->_RenderToTexture + */ + intelFlush(ctx); + /* Map */ if (intelImage->mt) { /* Image is stored in hardware format in a buffer managed by the @@ -712,7 +731,9 @@ intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname, } void -intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) +intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, + GLint glx_texture_format, + __DRIdrawable *dPriv) { struct intel_framebuffer *intel_fb = dPriv->driverPrivate; struct intel_context *intel = pDRICtx->driverPrivate; @@ -743,7 +764,10 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) type = GL_BGRA; format = GL_UNSIGNED_BYTE; - internalFormat = (rb->region->cpp == 3 ? 3 : 4); + if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT) + internalFormat = GL_RGB; + else + internalFormat = GL_RGBA; mt = intel_miptree_create_for_region(intel, target, internalFormat, @@ -783,3 +807,12 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) _mesa_unlock_texture(&intel->ctx, texObj); } + +void +intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) +{ + /* The old interface didn't have the format argument, so copy our + * implementation's behavior at the time. + */ + intelSetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv); +} |