diff options
author | Luc Verhaegen <libv@skynet.be> | 2010-03-14 08:00:12 +0100 |
---|---|---|
committer | Luc Verhaegen <libv@skynet.be> | 2010-03-14 08:00:12 +0100 |
commit | c40fdff4f4a1c7f53c0c3cdff1ff8bce4f5e168f (patch) | |
tree | 5c1367ec9bfe97c076c11f01c02c21f60c0d39a3 | |
parent | 28d0497909299b17d9aff12a869cf8f8ae819097 (diff) |
Import radeon, r200 and r300 dri drivers from mesa 7.5.0.7.5.0
33 files changed, 776 insertions, 719 deletions
diff --git a/configure.ac b/configure.ac index a1e1b6d..4cadb87 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ # Process this file with autoconf to produce a configure script AC_PREREQ(2.57) -AC_INIT([mesa-dri-radeon], 7.4.0, [], mesa-dri-radeon) +AC_INIT([mesa-dri-radeon], 7.5.0, [], mesa-dri-radeon) AM_INIT_AUTOMAKE([dist-bzip2]) @@ -16,8 +16,8 @@ AC_PROG_CC AC_HEADER_STDC PKG_CHECK_MODULES([DRM], [libdrm >= 2.3.0]) -PKG_CHECK_MODULES([DRI], [libmesadri >= 7.4.0 libmesadri < 7.5.0 - libmesadricommon >= 7.4.0 libmesadricommon < 7.5.0]) +PKG_CHECK_MODULES([DRI], [libmesadri >= 7.5.0 libmesadri < 7.7.0 + libmesadricommon >= 7.5.0 libmesadricommon < 7.7.0]) AC_OUTPUT([ Makefile diff --git a/r200/r200_context.c b/r200/r200_context.c index 5531e0a..058296e 100644 --- a/r200/r200_context.c +++ b/r200/r200_context.c @@ -62,14 +62,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_maos.h" #include "r200_vertprog.h" -#define need_GL_ARB_multisample -#define need_GL_ARB_texture_compression -#define need_GL_ARB_vertex_buffer_object #define need_GL_ARB_vertex_program #define need_GL_ATI_fragment_shader #define need_GL_EXT_blend_minmax #define need_GL_EXT_fog_coord -#define need_GL_EXT_multi_draw_arrays #define need_GL_EXT_secondary_color #define need_GL_EXT_blend_equation_separate #define need_GL_EXT_blend_func_separate @@ -120,20 +116,16 @@ static const GLubyte *r200GetString( GLcontext *ctx, GLenum name ) */ const struct dri_extension card_extensions[] = { - { "GL_ARB_multisample", GL_ARB_multisample_functions }, { "GL_ARB_multitexture", NULL }, { "GL_ARB_texture_border_clamp", NULL }, - { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, { "GL_ARB_texture_env_add", NULL }, { "GL_ARB_texture_env_combine", NULL }, { "GL_ARB_texture_env_dot3", NULL }, { "GL_ARB_texture_env_crossbar", NULL }, { "GL_ARB_texture_mirrored_repeat", NULL }, - { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, { "GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions }, { "GL_EXT_blend_subtract", NULL }, { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, - { "GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions }, { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, { "GL_EXT_stencil_wrap", NULL }, { "GL_EXT_texture_edge_clamp", NULL }, @@ -411,6 +403,10 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, ctx->Const.VertexProgram.MaxNativeParameters = R200_VSF_MAX_PARAM; ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; + ctx->Const.MaxDrawBuffers = 1; + + _mesa_set_mvp_with_dp4( ctx, GL_TRUE ); + /* Initialize the software rasterizer and helper modules. */ _swrast_CreateContext( ctx ); diff --git a/r200/r200_pixel.c b/r200/r200_pixel.c index be68821..2797cbb 100644 --- a/r200/r200_pixel.c +++ b/r200/r200_pixel.c @@ -87,7 +87,7 @@ check_color_per_fragment_ops( const GLcontext *ctx ) ctx->Depth.Test || ctx->Fog.Enabled || ctx->Scissor.Enabled || - ctx->Stencil.Enabled || + ctx->Stencil._Enabled || !ctx->Color.ColorMask[0] || !ctx->Color.ColorMask[1] || !ctx->Color.ColorMask[2] || diff --git a/r200/r200_state.c b/r200/r200_state.c index 0eaaaf6..2fcc87c 100644 --- a/r200/r200_state.c +++ b/r200/r200_state.c @@ -1210,7 +1210,7 @@ void r200UpdateMaterial( GLcontext *ctx ) * _VP_inf_norm * _h_inf_norm * _Position - * _NormDirection + * _NormSpotDirection * _ModelViewInvScale * _NeedEyeCoords * _EyeZDir @@ -1267,9 +1267,9 @@ static void update_light( GLcontext *ctx ) fcmd[LIT_DIRECTION_W] = 0; } else { COPY_4V( &fcmd[LIT_POSITION_X], l->_Position ); - fcmd[LIT_DIRECTION_X] = -l->_NormDirection[0]; - fcmd[LIT_DIRECTION_Y] = -l->_NormDirection[1]; - fcmd[LIT_DIRECTION_Z] = -l->_NormDirection[2]; + fcmd[LIT_DIRECTION_X] = -l->_NormSpotDirection[0]; + fcmd[LIT_DIRECTION_Y] = -l->_NormSpotDirection[1]; + fcmd[LIT_DIRECTION_Z] = -l->_NormSpotDirection[2]; fcmd[LIT_DIRECTION_W] = 0; } diff --git a/r200/r200_tex.c b/r200/r200_tex.c index 5a4db33..259f35a 100644 --- a/r200/r200_tex.c +++ b/r200/r200_tex.c @@ -267,8 +267,13 @@ static void r200SetTexFilter( r200TexObjPtr t, GLenum minf, GLenum magf ) } } -static void r200SetTexBorderColor( r200TexObjPtr t, GLubyte c[4] ) +static void r200SetTexBorderColor( r200TexObjPtr t, const GLfloat color[4] ) { + GLubyte c[4]; + CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]); + CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]); + CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]); + CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]); t->pp_border_color = r200PackColor( 4, c[0], c[1], c[2], c[3] ); } @@ -301,7 +306,7 @@ static r200TexObjPtr r200AllocTexObj( struct gl_texture_object *texObj ) r200SetTexWrap( t, texObj->WrapS, texObj->WrapT, texObj->WrapR ); r200SetTexMaxAnisotropy( t, texObj->MaxAnisotropy ); r200SetTexFilter( t, texObj->MinFilter, texObj->MagFilter ); - r200SetTexBorderColor( t, texObj->_BorderChan ); + r200SetTexBorderColor( t, texObj->BorderColor ); } return t; @@ -1056,7 +1061,7 @@ static void r200TexParameter( GLcontext *ctx, GLenum target, break; case GL_TEXTURE_BORDER_COLOR: - r200SetTexBorderColor( t, texObj->_BorderChan ); + r200SetTexBorderColor( t, texObj->BorderColor ); break; case GL_TEXTURE_BASE_LEVEL: diff --git a/r200/r200_texstate.c b/r200/r200_texstate.c index 3f9a2f4..0ad5651 100644 --- a/r200/r200_texstate.c +++ b/r200/r200_texstate.c @@ -1366,27 +1366,27 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) fprintf(stderr, "%s unit %d\n", __FUNCTION__, unit); if (texUnit->TexGenEnabled & S_BIT) { - mode = texUnit->GenModeS; + mode = texUnit->GenS.Mode; } else { tgcm |= R200_TEXGEN_COMP_S << (unit * 4); } if (texUnit->TexGenEnabled & T_BIT) { - if (texUnit->GenModeT != mode) + if (texUnit->GenT.Mode != mode) mixed_fallback = GL_TRUE; } else { tgcm |= R200_TEXGEN_COMP_T << (unit * 4); } if (texUnit->TexGenEnabled & R_BIT) { - if (texUnit->GenModeR != mode) + if (texUnit->GenR.Mode != mode) mixed_fallback = GL_TRUE; } else { tgcm |= R200_TEXGEN_COMP_R << (unit * 4); } if (texUnit->TexGenEnabled & Q_BIT) { - if (texUnit->GenModeQ != mode) + if (texUnit->GenQ.Mode != mode) mixed_fallback = GL_TRUE; } else { tgcm |= R200_TEXGEN_COMP_Q << (unit * 4); @@ -1395,8 +1395,8 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) if (mixed_fallback) { if (R200_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "fallback mixed texgen, 0x%x (0x%x 0x%x 0x%x 0x%x)\n", - texUnit->TexGenEnabled, texUnit->GenModeS, texUnit->GenModeT, - texUnit->GenModeR, texUnit->GenModeQ); + texUnit->TexGenEnabled, texUnit->GenS.Mode, texUnit->GenT.Mode, + texUnit->GenR.Mode, texUnit->GenQ.Mode); return GL_FALSE; } @@ -1414,8 +1414,10 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) switch (mode) { case GL_OBJECT_LINEAR: { GLuint needtgenable = r200_need_dis_texgen( texUnit->TexGenEnabled, - texUnit->ObjectPlaneS, texUnit->ObjectPlaneT, - texUnit->ObjectPlaneR, texUnit->ObjectPlaneQ ); + texUnit->GenS.ObjectPlane, + texUnit->GenT.ObjectPlane, + texUnit->GenR.ObjectPlane, + texUnit->GenQ.ObjectPlane ); if (needtgenable & (S_BIT | T_BIT)) { if (R200_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "fallback mixed texgen / obj plane, 0x%x\n", @@ -1431,17 +1433,19 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) tgi |= R200_TEXGEN_INPUT_OBJ << inputshift; set_texgen_matrix( rmesa, unit, - (texUnit->TexGenEnabled & S_BIT) ? texUnit->ObjectPlaneS : I, - (texUnit->TexGenEnabled & T_BIT) ? texUnit->ObjectPlaneT : I + 4, - (texUnit->TexGenEnabled & R_BIT) ? texUnit->ObjectPlaneR : I + 8, - (texUnit->TexGenEnabled & Q_BIT) ? texUnit->ObjectPlaneQ : I + 12); + (texUnit->TexGenEnabled & S_BIT) ? texUnit->GenS.ObjectPlane : I, + (texUnit->TexGenEnabled & T_BIT) ? texUnit->GenT.ObjectPlane : I + 4, + (texUnit->TexGenEnabled & R_BIT) ? texUnit->GenR.ObjectPlane : I + 8, + (texUnit->TexGenEnabled & Q_BIT) ? texUnit->GenQ.ObjectPlane : I + 12); } break; case GL_EYE_LINEAR: { GLuint needtgenable = r200_need_dis_texgen( texUnit->TexGenEnabled, - texUnit->EyePlaneS, texUnit->EyePlaneT, - texUnit->EyePlaneR, texUnit->EyePlaneQ ); + texUnit->GenS.EyePlane, + texUnit->GenT.EyePlane, + texUnit->GenR.EyePlane, + texUnit->GenQ.EyePlane ); if (needtgenable & (S_BIT | T_BIT)) { if (R200_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "fallback mixed texgen / eye plane, 0x%x\n", @@ -1456,10 +1460,10 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) } tgi |= R200_TEXGEN_INPUT_EYE << inputshift; set_texgen_matrix( rmesa, unit, - (texUnit->TexGenEnabled & S_BIT) ? texUnit->EyePlaneS : I, - (texUnit->TexGenEnabled & T_BIT) ? texUnit->EyePlaneT : I + 4, - (texUnit->TexGenEnabled & R_BIT) ? texUnit->EyePlaneR : I + 8, - (texUnit->TexGenEnabled & Q_BIT) ? texUnit->EyePlaneQ : I + 12); + (texUnit->TexGenEnabled & S_BIT) ? texUnit->GenS.EyePlane : I, + (texUnit->TexGenEnabled & T_BIT) ? texUnit->GenT.EyePlane : I + 4, + (texUnit->TexGenEnabled & R_BIT) ? texUnit->GenR.EyePlane : I + 8, + (texUnit->TexGenEnabled & Q_BIT) ? texUnit->GenQ.EyePlane : I + 12); } break; @@ -1495,7 +1499,7 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) */ if (R200_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "fallback unsupported texgen, %d\n", - texUnit->GenModeS); + texUnit->GenS.Mode); return GL_FALSE; } diff --git a/r200/r200_vertprog.c b/r200/r200_vertprog.c index 562992f..4ce93b5 100644 --- a/r200/r200_vertprog.c +++ b/r200/r200_vertprog.c @@ -202,7 +202,7 @@ static unsigned long t_dst(struct prog_dst_register *dst) } } -static unsigned long t_src_class(enum register_file file) +static unsigned long t_src_class(gl_register_file file) { switch(file){ @@ -290,7 +290,7 @@ static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_regis t_swizzle(GET_SWZ(src->Swizzle, 2)), t_swizzle(GET_SWZ(src->Swizzle, 3)), t_src_class(src->File), - src->NegateBase) | (src->RelAddr << 4); + src->Negate) | (src->RelAddr << 4); } static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src) @@ -302,7 +302,7 @@ static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_sr t_swizzle(GET_SWZ(src->Swizzle, 0)), t_swizzle(GET_SWZ(src->Swizzle, 0)), t_src_class(src->File), - src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4); + src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4); } static unsigned long t_opcode(enum prog_opcode opcode) @@ -700,7 +700,7 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO, t_src_class(src[0].File), - src[0].NegateBase) | (src[0].RelAddr << 4); + src[0].Negate) | (src[0].RelAddr << 4); o_inst->src1 = UNUSED_SRC_0; o_inst->src2 = UNUSED_SRC_0; } @@ -712,12 +712,12 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte t_swizzle(GET_SWZ(src[0].Swizzle, 0)), SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, t_src_class(src[0].File), - src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), SWIZZLE_ZERO, SWIZZLE_ZERO, t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO, t_src_class(src[1].File), - src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; o_inst++; @@ -766,11 +766,11 @@ if ((o_inst - vp->instr) == 31) { o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, t_src_class(src[1].File), - src[1].NegateBase) | (src[1].RelAddr << 4); + src[1].Negate) | (src[1].RelAddr << 4); o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, t_src_class(src[1].File), - src[1].NegateBase) | (src[1].RelAddr << 4); + src[1].Negate) | (src[1].RelAddr << 4); } else { o_inst->src1 = t_src(vp, &src[1]); @@ -792,7 +792,7 @@ else { t_swizzle(GET_SWZ(src[0].Swizzle, 2)), SWIZZLE_ZERO, t_src_class(src[0].File), - src[0].NegateBase) | (src[0].RelAddr << 4); + src[0].Negate) | (src[0].RelAddr << 4); o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 0)), @@ -800,7 +800,7 @@ else { t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, t_src_class(src[1].File), - src[1].NegateBase) | (src[1].RelAddr << 4); + src[1].Negate) | (src[1].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; goto next; @@ -815,7 +815,7 @@ else { t_swizzle(GET_SWZ(src[0].Swizzle, 2)), VSF_IN_COMPONENT_ONE, t_src_class(src[0].File), - src[0].NegateBase) | (src[0].RelAddr << 4); + src[0].Negate) | (src[0].RelAddr << 4); o_inst->src1 = t_src(vp, &src[1]); o_inst->src2 = UNUSED_SRC_1; goto next; @@ -831,7 +831,7 @@ else { t_swizzle(GET_SWZ(src[1].Swizzle, 2)), t_swizzle(GET_SWZ(src[1].Swizzle, 3)), t_src_class(src[1].File), - (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; goto next; @@ -846,7 +846,7 @@ else { t_swizzle(GET_SWZ(src[0].Swizzle, 2)), t_swizzle(GET_SWZ(src[0].Swizzle, 3)), t_src_class(src[0].File), - (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; goto next; @@ -874,7 +874,7 @@ else { VSF_IN_COMPONENT_W, VSF_IN_CLASS_TMP, /* Not 100% sure about this */ - (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/); + (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/); o_inst->src2 = UNUSED_SRC_0; u_temp_i--; @@ -899,7 +899,7 @@ else { t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w t_src_class(src[0].File), - src[0].NegateBase) | (src[0].RelAddr << 4); + src[0].Negate) | (src[0].RelAddr << 4); o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z @@ -907,7 +907,7 @@ else { t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w t_src_class(src[1].File), - src[1].NegateBase) | (src[1].RelAddr << 4); + src[1].Negate) | (src[1].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; o_inst++; @@ -922,7 +922,7 @@ else { t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w t_src_class(src[1].File), - (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z @@ -930,7 +930,7 @@ else { t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w t_src_class(src[0].File), - src[0].NegateBase) | (src[0].RelAddr << 4); + src[0].Negate) | (src[0].RelAddr << 4); o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1, VSF_IN_COMPONENT_X, diff --git a/r300/r300_cmdbuf.c b/r300/r300_cmdbuf.c index c9e1dfe..f447275 100644 --- a/r300/r300_cmdbuf.c +++ b/r300/r300_cmdbuf.c @@ -130,16 +130,26 @@ int r300FlushCmdBuf(r300ContextPtr r300, const char *caller) static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *state) { - int i; + int i, j, reg; int dwords = (*state->check) (r300, state); + drm_r300_cmd_header_t cmd; fprintf(stderr, " emit %s %d/%d\n", state->name, dwords, state->cmd_size); if (RADEON_DEBUG & DEBUG_VERBOSE) { - for (i = 0; i < dwords; i++) { - fprintf(stderr, " %s[%d]: %08x\n", - state->name, i, state->cmd[i]); + for (i = 0; i < dwords;) { + cmd = *((drm_r300_cmd_header_t *) &state->cmd[i]); + reg = (cmd.packet0.reghi << 8) | cmd.packet0.reglo; + fprintf(stderr, " %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n", + state->name, i, reg, cmd.packet0.count); + ++i; + for (j = 0; j < cmd.packet0.count; j++) { + fprintf(stderr, " %s[%d]: 0x%04x = %08x\n", + state->name, i, reg, state->cmd[i]); + reg += 4; + ++i; + } } } } diff --git a/r300/r300_context.c b/r300/r300_context.c index 3743627..7d67050 100644 --- a/r300/r300_context.c +++ b/r300/r300_context.c @@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/matrix.h" #include "main/extensions.h" #include "main/state.h" +#include "main/texobj.h" #include "main/bufferobj.h" #include "swrast/swrast.h" @@ -77,19 +78,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. int future_hw_tcl_on = 1; int hw_tcl_on = 1; -#define need_GL_EXT_stencil_two_side -#define need_GL_ARB_multisample +#define need_GL_VERSION_2_0 #define need_GL_ARB_point_parameters -#define need_GL_ARB_texture_compression -#define need_GL_ARB_vertex_buffer_object #define need_GL_ARB_vertex_program -#define need_GL_EXT_blend_minmax -//#define need_GL_EXT_fog_coord -#define need_GL_EXT_multi_draw_arrays -#define need_GL_EXT_secondary_color #define need_GL_EXT_blend_equation_separate #define need_GL_EXT_blend_func_separate +#define need_GL_EXT_blend_minmax +#define need_GL_EXT_fog_coord #define need_GL_EXT_gpu_program_parameters +#define need_GL_EXT_secondary_color +#define need_GL_EXT_stencil_two_side +#define need_GL_ATI_separate_stencil #define need_GL_NV_vertex_program #include "extension_helper.h" @@ -97,27 +96,23 @@ const struct dri_extension card_extensions[] = { /* *INDENT-OFF* */ {"GL_ARB_depth_texture", NULL}, {"GL_ARB_fragment_program", NULL}, - {"GL_ARB_multisample", GL_ARB_multisample_functions}, {"GL_ARB_multitexture", NULL}, {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, {"GL_ARB_shadow", NULL}, {"GL_ARB_shadow_ambient", NULL}, {"GL_ARB_texture_border_clamp", NULL}, - {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions}, {"GL_ARB_texture_cube_map", NULL}, {"GL_ARB_texture_env_add", NULL}, {"GL_ARB_texture_env_combine", NULL}, {"GL_ARB_texture_env_crossbar", NULL}, {"GL_ARB_texture_env_dot3", NULL}, {"GL_ARB_texture_mirrored_repeat", NULL}, - {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions}, {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions}, {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions}, {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions}, {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions}, {"GL_EXT_blend_subtract", NULL}, -// {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, - {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions}, + {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions}, {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, {"GL_EXT_shadow_funcs", NULL}, @@ -130,6 +125,7 @@ const struct dri_extension card_extensions[] = { {"GL_EXT_texture_lod_bias", NULL}, {"GL_EXT_texture_mirror_clamp", NULL}, {"GL_EXT_texture_rectangle", NULL}, + {"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions}, {"GL_ATI_texture_env_combine3", NULL}, {"GL_ATI_texture_mirror_once", NULL}, {"GL_MESA_pack_invert", NULL}, @@ -142,6 +138,16 @@ const struct dri_extension card_extensions[] = { /* *INDENT-ON* */ }; + +/** + * The GL 2.0 functions are needed to make display lists work with + * functions added by GL_ATI_separate_stencil. + */ +const struct dri_extension gl_20_extension[] = { + {"GL_VERSION_2_0", GL_VERSION_2_0_functions }, +}; + + extern struct tnl_pipeline_stage _r300_render_stage; extern const struct tnl_pipeline_stage _r300_tcl_stage; @@ -283,10 +289,23 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, ctx->Const.MaxTextureMaxAnisotropy = 16.0; ctx->Const.MaxTextureLodBias = 16.0; - if (screen->chip_family >= CHIP_FAMILY_RV515) { + if (screen->chip_family >= CHIP_FAMILY_RV515) ctx->Const.MaxTextureLevels = 13; - ctx->Const.MaxTextureRectSize = 4096; - } + else + ctx->Const.MaxTextureLevels = 12; + + driCalculateMaxTextureLevels( r300->texture_heaps, + r300->nr_heaps, + & ctx->Const, + 4, + ctx->Const.MaxTextureLevels - 1, + MIN2(ctx->Const.MaxTextureLevels, + MAX_3D_TEXTURE_LEVELS) - 1, + ctx->Const.MaxTextureLevels - 1, + ctx->Const.MaxTextureLevels - 1, + ctx->Const.MaxTextureLevels - 1, + GL_FALSE, + 2 ); ctx->Const.MinPointSize = 1.0; ctx->Const.MinPointSizeAA = 1.0; @@ -306,6 +325,10 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, #endif #endif + ctx->Const.MaxDrawBuffers = 1; + + _mesa_set_mvp_with_dp4( ctx, GL_TRUE ); + /* Initialize the software rasterizer and helper modules. */ _swrast_CreateContext(ctx); @@ -480,6 +503,7 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv) r300ContextPtr r300 = (r300ContextPtr) driContextPriv->driverPrivate; radeonContextPtr radeon = (radeonContextPtr) r300; radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL; + int i; if (RADEON_DEBUG & DEBUG_DRI) { fprintf(stderr, "Destroying context !\n"); @@ -533,6 +557,11 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv) assert(is_empty_list(&r300->swapped)); } + /* Drop texture object references from current hardware state */ + for (i = 0; i < 8; i++) { + _mesa_reference_texobj(&r300->state.texture.unit[i].texobj, NULL); + } + radeonCleanupContext(&r300->radeon); #ifdef USER_BUFFERS diff --git a/r300/r300_context.h b/r300/r300_context.h index c15e9fa..96a3205 100644 --- a/r300/r300_context.h +++ b/r300/r300_context.h @@ -170,6 +170,10 @@ struct r300_dma { typedef struct r300_tex_obj r300TexObj, *r300TexObjPtr; +/* Maximum number of mipmap levels supported by any supported GPU + */ +#define R300_MAX_TEXTURE_LEVELS 13 + /* Texture object in locally shared texture space. */ struct r300_tex_obj { @@ -178,7 +182,7 @@ struct r300_tex_obj { GLuint bufAddr; /* Offset to start of locally shared texture block */ - drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS]; + drm_radeon_tex_image_t image[6][R300_MAX_TEXTURE_LEVELS]; /* Six, for the cube faces */ GLboolean image_override; /* Image overridden by GLX_EXT_tfp */ @@ -211,7 +215,7 @@ struct r300_tex_obj { }; struct r300_texture_env_state { - r300TexObjPtr texobj; + struct gl_texture_object *texobj; GLenum format; GLenum envMode; }; diff --git a/r300/r300_emit.c b/r300/r300_emit.c index 80bd338..28c3157 100644 --- a/r300/r300_emit.c +++ b/r300/r300_emit.c @@ -314,10 +314,6 @@ GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten) R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT | R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; -#if 0 - if (OutputsWritten & (1 << VERT_RESULT_FOGC)) ; -#endif - if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; @@ -326,12 +322,21 @@ GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten) GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten) { - GLuint i, ret = 0; + GLuint i, ret = 0, first_free_texcoord = 0; for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) { ret |= (4 << (3 * i)); + ++first_free_texcoord; + } + } + + if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { + if (first_free_texcoord > 8) { + fprintf(stderr, "\tout of free texcoords to write fog coord\n"); + _mesa_exit(-1); } + ret |= 4 << (3 * first_free_texcoord); } return ret; diff --git a/r300/r300_fragprog.c b/r300/r300_fragprog.c index 4ef7f2b..873cde4 100644 --- a/r300/r300_fragprog.c +++ b/r300/r300_fragprog.c @@ -163,6 +163,19 @@ static GLboolean transform_TEX( } } + if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) { + int tmpreg = radeonFindFreeTemporary(t); + tgt = radeonAppendInstructions(t->Program, 1); + tgt->Opcode = OPCODE_MOV; + tgt->DstReg.File = PROGRAM_TEMPORARY; + tgt->DstReg.Index = tmpreg; + tgt->SrcReg[0] = inst.SrcReg[0]; + + reset_srcreg(&inst.SrcReg[0]); + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tmpreg; + } + tgt = radeonAppendInstructions(t->Program, 1); _mesa_copy_instructions(tgt, &inst, 1); @@ -201,9 +214,9 @@ static GLboolean transform_TEX( * r < tex <=> -tex+r < 0 * r >= tex <=> not (-tex+r < 0 */ if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - tgt[1].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW; + tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; else - tgt[1].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW; + tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; tgt[2].Opcode = OPCODE_CMP; tgt[2].DstReg = orig_inst->DstReg; @@ -343,8 +356,8 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) static void nqssadce_init(struct nqssadce_state* s) { - s->Outputs[FRAG_RESULT_COLR].Sourced = WRITEMASK_XYZW; - s->Outputs[FRAG_RESULT_DEPR].Sourced = WRITEMASK_W; + s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; + s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; } diff --git a/r300/r300_fragprog_swizzle.c b/r300/r300_fragprog_swizzle.c index a86d2bd..fc9d855 100644 --- a/r300/r300_fragprog_swizzle.c +++ b/r300/r300_fragprog_swizzle.c @@ -92,7 +92,7 @@ static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle) GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) { if (reg.Abs) - reg.NegateBase = 0; + reg.Negate = NEGATE_NONE; if (opcode == OPCODE_KIL || opcode == OPCODE_TEX || @@ -100,7 +100,7 @@ GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) opcode == OPCODE_TXP) { int j; - if (reg.Abs || reg.NegateBase != (15*reg.NegateAbs)) + if (reg.Abs || reg.Negate) return GL_FALSE; for(j = 0; j < 4; ++j) { @@ -121,7 +121,7 @@ GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) if (GET_SWZ(reg.Swizzle, j) != SWIZZLE_NIL) relevant |= 1 << j; - if ((reg.NegateBase & relevant) && (reg.NegateBase & relevant) != relevant) + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) return GL_FALSE; if (!lookup_native_swizzle(reg.Swizzle)) @@ -137,13 +137,12 @@ GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) { if (src.Abs) - src.NegateBase = 0; + src.Negate = NEGATE_NONE; while(dst.WriteMask) { const struct swizzle_data *best_swizzle = 0; GLuint best_matchcount = 0; GLuint best_matchmask = 0; - GLboolean rgbnegate; int i, comp; for(i = 0; i < num_native_swizzles; ++i) { @@ -157,6 +156,11 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, if (swz == SWIZZLE_NIL) continue; if (swz == GET_SWZ(sd->hash, comp)) { + /* check if the negate bit of current component + * is the same for already matched components */ + if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp)))) + continue; + matchcount++; matchmask |= 1 << comp; } @@ -170,13 +174,6 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, } } - if ((src.NegateBase & best_matchmask) != 0) { - best_matchmask &= src.NegateBase; - rgbnegate = !src.NegateAbs; - } else { - rgbnegate = src.NegateAbs; - } - struct prog_instruction *inst; _mesa_insert_instructions(s->Program, s->IP, 1); @@ -185,6 +182,7 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, inst->DstReg = dst; inst->DstReg.WriteMask &= (best_matchmask | WRITEMASK_W); inst->SrcReg[0] = src; + inst->SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE; /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */ dst.WriteMask &= ~inst->DstReg.WriteMask; diff --git a/r300/r300_reg.h b/r300/r300_reg.h index 7c6485e..8f1a663 100644 --- a/r300/r300_reg.h +++ b/r300/r300_reg.h @@ -656,7 +656,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_GB_FOG_SELECT_C3A (3 << 0) # define R300_GB_FOG_SELECT_1_1_W (4 << 0) # define R300_GB_FOG_SELECT_Z (5 << 0) -# define R300_GB_DEPTH_SELECT_Z (0 << 3 +# define R300_GB_DEPTH_SELECT_Z (0 << 3) # define R300_GB_DEPTH_SELECT_1_1_W (1 << 3) # define R300_GB_W_SELECT_1_W (0 << 4) # define R300_GB_W_SELECT_1 (1 << 4) @@ -730,8 +730,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_RS_IP_TEX_PTR_Q_SHIFT 18 #define R500_RS_IP_COL_PTR_SHIFT 24 #define R500_RS_IP_COL_FMT_SHIFT 27 -# define R500_RS_COL_PTR(x) (x << 24) -# define R500_RS_COL_FMT(x) (x << 27) +# define R500_RS_COL_PTR(x) ((x) << 24) +# define R500_RS_COL_FMT(x) ((x) << 27) /* gap */ #define R500_RS_IP_OFFSET_DIS (0 << 31) #define R500_RS_IP_OFFSET_EN (1 << 31) @@ -1172,9 +1172,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_RS_IP_3 0x431C # define R300_RS_INTERP_SRC_SHIFT 2 /* TODO: check for removal */ # define R300_RS_INTERP_SRC_MASK (7 << 2) /* TODO: check for removal */ -# define R300_RS_TEX_PTR(x) (x << 0) -# define R300_RS_COL_PTR(x) (x << 6) -# define R300_RS_COL_FMT(x) (x << 9) +# define R300_RS_TEX_PTR(x) ((x) << 0) +# define R300_RS_COL_PTR(x) ((x) << 6) +# define R300_RS_COL_FMT(x) ((x) << 9) # define R300_RS_COL_FMT_RGBA 0 # define R300_RS_COL_FMT_RGB0 1 # define R300_RS_COL_FMT_RGB1 2 @@ -1184,10 +1184,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_RS_COL_FMT_111A 8 # define R300_RS_COL_FMT_1110 9 # define R300_RS_COL_FMT_1111 10 -# define R300_RS_SEL_S(x) (x << 13) -# define R300_RS_SEL_T(x) (x << 16) -# define R300_RS_SEL_R(x) (x << 19) -# define R300_RS_SEL_Q(x) (x << 22) +# define R300_RS_SEL_S(x) ((x) << 13) +# define R300_RS_SEL_T(x) ((x) << 16) +# define R300_RS_SEL_R(x) ((x) << 19) +# define R300_RS_SEL_Q(x) ((x) << 22) # define R300_RS_SEL_C0 0 # define R300_RS_SEL_C1 1 # define R300_RS_SEL_C2 2 @@ -1224,6 +1224,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_RS_INST_COL_ADDR_SHIFT 18 #define R500_RS_INST_TEX_ADJ (1 << 25) #define R500_RS_INST_W_CN (1 << 26) +#define R500_RS_INST_TEX_ID(x) ((x) << R500_RS_INST_TEX_ID_SHIFT) +#define R500_RS_INST_TEX_ADDR(x) ((x) << R500_RS_INST_TEX_ADDR_SHIFT) +#define R500_RS_INST_COL_ID(x) ((x) << R500_RS_INST_COL_ID_SHIFT) +#define R500_RS_INST_COL_ADDR(x) ((x) << R500_RS_INST_COL_ADDR_SHIFT) /* These DWORDs control how vertex data is routed into fragment program * registers, after interpolators. @@ -1239,9 +1243,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_RS_INST_TEX_ID(x) ((x) << 0) # define R300_RS_INST_TEX_CN_WRITE (1 << 3) # define R300_RS_INST_TEX_ADDR_SHIFT 6 +# define R300_RS_INST_TEX_ADDR(x) ((x) << R300_RS_INST_TEX_ADDR_SHIFT) # define R300_RS_INST_COL_ID(x) ((x) << 11) # define R300_RS_INST_COL_CN_WRITE (1 << 14) # define R300_RS_INST_COL_ADDR_SHIFT 17 +# define R300_RS_INST_COL_ADDR(x) ((x) << R300_RS_INST_COL_ADDR_SHIFT) # define R300_RS_INST_TEX_ADJ (1 << 22) # define R300_RS_COL_BIAS_UNUSED_SHIFT 23 @@ -2705,7 +2711,7 @@ enum { # define R500_ALPHA_OP_COS 13 # define R500_ALPHA_OP_MDH 14 # define R500_ALPHA_OP_MDV 15 -# define R500_ALPHA_ADDRD(x) (x << 4) +# define R500_ALPHA_ADDRD(x) ((x) << 4) # define R500_ALPHA_ADDRD_REL (1 << 11) # define R500_ALPHA_SEL_A_SHIFT 12 # define R500_ALPHA_SEL_A_SRC0 (0 << 12) @@ -2749,16 +2755,16 @@ enum { # define R500_ALPHA_OMOD_DIV_4 (5 << 26) # define R500_ALPHA_OMOD_DIV_8 (6 << 26) # define R500_ALPHA_OMOD_DISABLE (7 << 26) -# define R500_ALPHA_TARGET(x) (x << 29) +# define R500_ALPHA_TARGET(x) ((x) << 29) # define R500_ALPHA_W_OMASK (1 << 31) #define R500_US_ALU_ALPHA_ADDR_0 0x9800 -# define R500_ALPHA_ADDR0(x) (x << 0) +# define R500_ALPHA_ADDR0(x) ((x) << 0) # define R500_ALPHA_ADDR0_CONST (1 << 8) # define R500_ALPHA_ADDR0_REL (1 << 9) -# define R500_ALPHA_ADDR1(x) (x << 10) +# define R500_ALPHA_ADDR1(x) ((x) << 10) # define R500_ALPHA_ADDR1_CONST (1 << 18) # define R500_ALPHA_ADDR1_REL (1 << 19) -# define R500_ALPHA_ADDR2(x) (x << 20) +# define R500_ALPHA_ADDR2(x) ((x) << 20) # define R500_ALPHA_ADDR2_CONST (1 << 28) # define R500_ALPHA_ADDR2_REL (1 << 29) # define R500_ALPHA_SRCP_OP_1_MINUS_2A0 (0 << 30) @@ -2779,7 +2785,7 @@ enum { # define R500_ALU_RGBA_OP_SOP (10 << 0) # define R500_ALU_RGBA_OP_MDH (11 << 0) # define R500_ALU_RGBA_OP_MDV (12 << 0) -# define R500_ALU_RGBA_ADDRD(x) (x << 4) +# define R500_ALU_RGBA_ADDRD(x) ((x) << 4) # define R500_ALU_RGBA_ADDRD_REL (1 << 11) # define R500_ALU_RGBA_SEL_C_SHIFT 12 # define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12) @@ -2906,16 +2912,16 @@ enum { # define R500_ALU_RGB_OMOD_DIV_4 (5 << 26) # define R500_ALU_RGB_OMOD_DIV_8 (6 << 26) # define R500_ALU_RGB_OMOD_DISABLE (7 << 26) -# define R500_ALU_RGB_TARGET(x) (x << 29) +# define R500_ALU_RGB_TARGET(x) ((x) << 29) # define R500_ALU_RGB_WMASK (1 << 31) #define R500_US_ALU_RGB_ADDR_0 0x9000 -# define R500_RGB_ADDR0(x) (x << 0) +# define R500_RGB_ADDR0(x) ((x) << 0) # define R500_RGB_ADDR0_CONST (1 << 8) # define R500_RGB_ADDR0_REL (1 << 9) -# define R500_RGB_ADDR1(x) (x << 10) +# define R500_RGB_ADDR1(x) ((x) << 10) # define R500_RGB_ADDR1_CONST (1 << 18) # define R500_RGB_ADDR1_REL (1 << 19) -# define R500_RGB_ADDR2(x) (x << 20) +# define R500_RGB_ADDR2(x) ((x) << 20) # define R500_RGB_ADDR2_CONST (1 << 28) # define R500_RGB_ADDR2_REL (1 << 29) # define R500_RGB_SRCP_OP_1_MINUS_2RGB0 (0 << 30) @@ -2970,19 +2976,19 @@ enum { /* note that these are 8 bit lengths, despite the offsets, at least for R500 */ #define R500_US_CODE_ADDR 0x4630 -# define R500_US_CODE_START_ADDR(x) (x << 0) -# define R500_US_CODE_END_ADDR(x) (x << 16) +# define R500_US_CODE_START_ADDR(x) ((x) << 0) +# define R500_US_CODE_END_ADDR(x) ((x) << 16) #define R500_US_CODE_OFFSET 0x4638 -# define R500_US_CODE_OFFSET_ADDR(x) (x << 0) +# define R500_US_CODE_OFFSET_ADDR(x) ((x) << 0) #define R500_US_CODE_RANGE 0x4634 -# define R500_US_CODE_RANGE_ADDR(x) (x << 0) -# define R500_US_CODE_RANGE_SIZE(x) (x << 16) +# define R500_US_CODE_RANGE_ADDR(x) ((x) << 0) +# define R500_US_CODE_RANGE_SIZE(x) ((x) << 16) #define R500_US_CONFIG 0x4600 # define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1) #define R500_US_FC_ADDR_0 0xa000 -# define R500_FC_BOOL_ADDR(x) (x << 0) -# define R500_FC_INT_ADDR(x) (x << 8) -# define R500_FC_JUMP_ADDR(x) (x << 16) +# define R500_FC_BOOL_ADDR(x) ((x) << 0) +# define R500_FC_INT_ADDR(x) ((x) << 8) +# define R500_FC_JUMP_ADDR(x) ((x) << 16) # define R500_FC_JUMP_GLOBAL (1 << 31) #define R500_US_FC_BOOL_CONST 0x4620 # define R500_FC_KBOOL(x) (x) @@ -3003,8 +3009,8 @@ enum { # define R500_FC_A_OP_NONE (0 << 6) # define R500_FC_A_OP_POP (1 << 6) # define R500_FC_A_OP_PUSH (2 << 6) -# define R500_FC_JUMP_FUNC(x) (x << 8) -# define R500_FC_B_POP_CNT(x) (x << 16) +# define R500_FC_JUMP_FUNC(x) ((x) << 8) +# define R500_FC_B_POP_CNT(x) ((x) << 16) # define R500_FC_B_OP0_NONE (0 << 24) # define R500_FC_B_OP0_DECR (1 << 24) # define R500_FC_B_OP0_INCR (2 << 24) @@ -3013,14 +3019,14 @@ enum { # define R500_FC_B_OP1_INCR (2 << 26) # define R500_FC_IGNORE_UNCOVERED (1 << 28) #define R500_US_FC_INT_CONST_0 0x4c00 -# define R500_FC_INT_CONST_KR(x) (x << 0) -# define R500_FC_INT_CONST_KG(x) (x << 8) -# define R500_FC_INT_CONST_KB(x) (x << 16) +# define R500_FC_INT_CONST_KR(x) ((x) << 0) +# define R500_FC_INT_CONST_KG(x) ((x) << 8) +# define R500_FC_INT_CONST_KB(x) ((x) << 16) /* _0 through _15 */ #define R500_US_FORMAT0_0 0x4640 -# define R500_FORMAT_TXWIDTH(x) (x << 0) -# define R500_FORMAT_TXHEIGHT(x) (x << 11) -# define R500_FORMAT_TXDEPTH(x) (x << 22) +# define R500_FORMAT_TXWIDTH(x) ((x) << 0) +# define R500_FORMAT_TXHEIGHT(x) ((x) << 11) +# define R500_FORMAT_TXDEPTH(x) ((x) << 22) /* _0 through _3 */ #define R500_US_OUT_FMT_0 0x46a4 # define R500_OUT_FMT_C4_8 (0 << 0) @@ -3061,12 +3067,12 @@ enum { # define R500_C3_SEL_R (1 << 14) # define R500_C3_SEL_G (2 << 14) # define R500_C3_SEL_B (3 << 14) -# define R500_OUT_SIGN(x) (x << 16) +# define R500_OUT_SIGN(x) ((x) << 16) # define R500_ROUND_ADJ (1 << 20) #define R500_US_PIXSIZE 0x4604 # define R500_PIX_SIZE(x) (x) #define R500_US_TEX_ADDR_0 0x9800 -# define R500_TEX_SRC_ADDR(x) (x << 0) +# define R500_TEX_SRC_ADDR(x) ((x) << 0) # define R500_TEX_SRC_ADDR_REL (1 << 7) # define R500_TEX_SRC_S_SWIZ_R (0 << 8) # define R500_TEX_SRC_S_SWIZ_G (1 << 8) @@ -3084,7 +3090,7 @@ enum { # define R500_TEX_SRC_Q_SWIZ_G (1 << 14) # define R500_TEX_SRC_Q_SWIZ_B (2 << 14) # define R500_TEX_SRC_Q_SWIZ_A (3 << 14) -# define R500_TEX_DST_ADDR(x) (x << 16) +# define R500_TEX_DST_ADDR(x) ((x) << 16) # define R500_TEX_DST_ADDR_REL (1 << 23) # define R500_TEX_DST_R_SWIZ_R (0 << 24) # define R500_TEX_DST_R_SWIZ_G (1 << 24) @@ -3103,7 +3109,7 @@ enum { # define R500_TEX_DST_A_SWIZ_B (2 << 30) # define R500_TEX_DST_A_SWIZ_A (3 << 30) #define R500_US_TEX_ADDR_DXDY_0 0xa000 -# define R500_DX_ADDR(x) (x << 0) +# define R500_DX_ADDR(x) ((x) << 0) # define R500_DX_ADDR_REL (1 << 7) # define R500_DX_S_SWIZ_R (0 << 8) # define R500_DX_S_SWIZ_G (1 << 8) @@ -3121,7 +3127,7 @@ enum { # define R500_DX_Q_SWIZ_G (1 << 14) # define R500_DX_Q_SWIZ_B (2 << 14) # define R500_DX_Q_SWIZ_A (3 << 14) -# define R500_DY_ADDR(x) (x << 16) +# define R500_DY_ADDR(x) ((x) << 16) # define R500_DY_ADDR_REL (1 << 17) # define R500_DY_S_SWIZ_R (0 << 24) # define R500_DY_S_SWIZ_G (1 << 24) @@ -3140,7 +3146,7 @@ enum { # define R500_DY_Q_SWIZ_B (2 << 30) # define R500_DY_Q_SWIZ_A (3 << 30) #define R500_US_TEX_INST_0 0x9000 -# define R500_TEX_ID(x) (x << 16) +# define R500_TEX_ID(x) ((x) << 16) # define R500_TEX_INST_NOP (0 << 22) # define R500_TEX_INST_LD (1 << 22) # define R500_TEX_INST_TEXKILL (2 << 22) diff --git a/r300/r300_state.c b/r300/r300_state.c index b756d1c..79f0b36 100644 --- a/r300/r300_state.c +++ b/r300/r300_state.c @@ -472,7 +472,9 @@ static void r300SetEarlyZState(GLcontext * ctx) if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS) topZ = R300_ZTOP_DISABLE; - if (current_fragment_program_writes_depth(ctx)) + else if (current_fragment_program_writes_depth(ctx)) + topZ = R300_ZTOP_DISABLE; + else if (ctx->FragmentProgram._Current && ctx->FragmentProgram._Current->UsesKill) topZ = R300_ZTOP_DISABLE; if (topZ != r300->hw.zstencil_format.cmd[2]) { @@ -730,133 +732,6 @@ static void r300ColorMask(GLcontext * ctx, } /* ============================================================= - * Fog - */ -static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - union { - int i; - float f; - } fogScale, fogStart; - - (void)param; - - fogScale.i = r300->hw.fogp.cmd[R300_FOGP_SCALE]; - fogStart.i = r300->hw.fogp.cmd[R300_FOGP_START]; - - switch (pname) { - case GL_FOG_MODE: - switch (ctx->Fog.Mode) { - case GL_LINEAR: - R300_STATECHANGE(r300, fogs); - r300->hw.fogs.cmd[R300_FOGS_STATE] = - (r300->hw.fogs. - cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) | - R300_FG_FOG_BLEND_FN_LINEAR; - - if (ctx->Fog.Start == ctx->Fog.End) { - fogScale.f = -1.0; - fogStart.f = 1.0; - } else { - fogScale.f = - 1.0 / (ctx->Fog.End - ctx->Fog.Start); - fogStart.f = - -ctx->Fog.Start / (ctx->Fog.End - - ctx->Fog.Start); - } - break; - case GL_EXP: - R300_STATECHANGE(r300, fogs); - r300->hw.fogs.cmd[R300_FOGS_STATE] = - (r300->hw.fogs. - cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) | - R300_FG_FOG_BLEND_FN_EXP; - fogScale.f = 0.0933 * ctx->Fog.Density; - fogStart.f = 0.0; - break; - case GL_EXP2: - R300_STATECHANGE(r300, fogs); - r300->hw.fogs.cmd[R300_FOGS_STATE] = - (r300->hw.fogs. - cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) | - R300_FG_FOG_BLEND_FN_EXP2; - fogScale.f = 0.3 * ctx->Fog.Density; - fogStart.f = 0.0; - default: - return; - } - break; - case GL_FOG_DENSITY: - switch (ctx->Fog.Mode) { - case GL_EXP: - fogScale.f = 0.0933 * ctx->Fog.Density; - fogStart.f = 0.0; - break; - case GL_EXP2: - fogScale.f = 0.3 * ctx->Fog.Density; - fogStart.f = 0.0; - default: - break; - } - break; - case GL_FOG_START: - case GL_FOG_END: - if (ctx->Fog.Mode == GL_LINEAR) { - if (ctx->Fog.Start == ctx->Fog.End) { - fogScale.f = -1.0; - fogStart.f = 1.0; - } else { - fogScale.f = - 1.0 / (ctx->Fog.End - ctx->Fog.Start); - fogStart.f = - -ctx->Fog.Start / (ctx->Fog.End - - ctx->Fog.Start); - } - } - break; - case GL_FOG_COLOR: - R300_STATECHANGE(r300, fogc); - r300->hw.fogc.cmd[R300_FOGC_R] = - (GLuint) (ctx->Fog.Color[0] * 1023.0F) & 0x3FF; - r300->hw.fogc.cmd[R300_FOGC_G] = - (GLuint) (ctx->Fog.Color[1] * 1023.0F) & 0x3FF; - r300->hw.fogc.cmd[R300_FOGC_B] = - (GLuint) (ctx->Fog.Color[2] * 1023.0F) & 0x3FF; - break; - case GL_FOG_COORD_SRC: - break; - default: - return; - } - - if (fogScale.i != r300->hw.fogp.cmd[R300_FOGP_SCALE] || - fogStart.i != r300->hw.fogp.cmd[R300_FOGP_START]) { - R300_STATECHANGE(r300, fogp); - r300->hw.fogp.cmd[R300_FOGP_SCALE] = fogScale.i; - r300->hw.fogp.cmd[R300_FOGP_START] = fogStart.i; - } -} - -static void r300SetFogState(GLcontext * ctx, GLboolean state) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - - R300_STATECHANGE(r300, fogs); - if (state) { - r300->hw.fogs.cmd[R300_FOGS_STATE] |= R300_FG_FOG_BLEND_ENABLE; - - r300Fogfv(ctx, GL_FOG_MODE, NULL); - r300Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density); - r300Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start); - r300Fogfv(ctx, GL_FOG_END, &ctx->Fog.End); - r300Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); - } else { - r300->hw.fogs.cmd[R300_FOGS_STATE] &= ~R300_FG_FOG_BLEND_ENABLE; - } -} - -/* ============================================================= * Point state */ static void r300PointSize(GLcontext * ctx, GLfloat size) @@ -1487,7 +1362,7 @@ static void r300SetupTextures(GLcontext * ctx) #endif tmu_mappings[i] = hw_tmu; - t = r300->state.texture.unit[i].texobj; + t = (r300TexObjPtr) r300->state.texture.unit[i].texobj->DriverData; /* XXX questionable fix for bug 9170: */ if (!t) continue; @@ -1594,18 +1469,14 @@ union r300_outputs_written { static void r300SetupRSUnit(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - /* I'm still unsure if these are needed */ - GLuint interp_col[8]; TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; union r300_outputs_written OutputsWritten; GLuint InputsRead; int fp_reg, high_rr; - int col_interp_nr; - int rs_tex_count = 0, rs_col_count = 0; - int i, count; - - memset(interp_col, 0, sizeof(interp_col)); + int col_ip, tex_ip; + int rs_tex_count = 0; + int i, count, col_fmt; if (hw_tcl_on) OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; @@ -1623,51 +1494,66 @@ static void r300SetupRSUnit(GLcontext * ctx) R300_STATECHANGE(r300, rc); R300_STATECHANGE(r300, rr); - fp_reg = col_interp_nr = high_rr = 0; - - r300->hw.rr.cmd[R300_RR_INST_1] = 0; + fp_reg = col_ip = tex_ip = col_fmt = 0; - if (InputsRead & FRAG_BIT_WPOS) { - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) - if (!(InputsRead & (FRAG_BIT_TEX0 << i))) - break; + r300->hw.rc.cmd[1] = 0; + r300->hw.rc.cmd[2] = 0; + for (i=0; i<R300_RR_CMDSIZE-1; ++i) + r300->hw.rr.cmd[R300_RR_INST_0 + i] = 0; - if (i == ctx->Const.MaxTextureUnits) { - fprintf(stderr, "\tno free texcoord found...\n"); - _mesa_exit(-1); - } + for (i=0; i<R300_RI_CMDSIZE-1; ++i) + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0; - InputsRead |= (FRAG_BIT_TEX0 << i); - InputsRead &= ~FRAG_BIT_WPOS; - } if (InputsRead & FRAG_BIT_COL0) { - count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; - interp_col[0] |= R300_RS_COL_PTR(rs_col_count); - if (count == 3) - interp_col[0] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGB1); - rs_col_count += count; + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; + if (count == 4) + col_fmt = R300_RS_COL_FMT_RGBA; + else if (count == 3) + col_fmt = R300_RS_COL_FMT_RGB1; + else + col_fmt = R300_RS_COL_FMT_0001; + + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(col_fmt); + r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_COL0; + ++col_ip; + ++fp_reg; + } else { + WARN_ONCE("fragprog wants col0, vp doesn't provide it\n"); + } } - else - interp_col[0] = R300_RS_COL_FMT(R300_RS_COL_FMT_0001); if (InputsRead & FRAG_BIT_COL1) { - count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; - if (count == 3) - interp_col[1] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGB0); - interp_col[1] |= R300_RS_COL_PTR(1); - rs_col_count += count; - } - - if (InputsRead & FRAG_BIT_FOGC) { - /* XXX FIX THIS - * Just turn off the bit for now. - * Need to do something similar to the color/texcoord inputs. - */ - InputsRead &= ~FRAG_BIT_FOGC; + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; + if (count == 4) + col_fmt = R300_RS_COL_FMT_RGBA; + else if (count == 3) + col_fmt = R300_RS_COL_FMT_RGB1; + else + col_fmt = R300_RS_COL_FMT_0001; + + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(col_fmt); + r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_COL1; + ++col_ip; + ++fp_reg; + } else { + WARN_ONCE("fragprog wants col1, vp doesn't provide it\n"); + } } for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (! ( InputsRead & FRAG_BIT_TEX(i) ) ) + continue; + + if (!R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { + WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); + continue; + } + int swiz; /* with TCL we always seem to route 4 components */ @@ -1676,7 +1562,6 @@ static void r300SetupRSUnit(GLcontext * ctx) else count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; - r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | rs_tex_count; switch(count) { case 4: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3); break; case 3: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(R300_RS_SEL_K1); break; @@ -1685,63 +1570,48 @@ static void r300SetupRSUnit(GLcontext * ctx) case 2: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); break; }; - r300->hw.ri.cmd[R300_RI_INTERP_0 + i] |= swiz; - - r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0; - if (InputsRead & (FRAG_BIT_TEX0 << i)) { - - rs_tex_count += count; - - //assert(r300->state.texture.tc_count != 0); - r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] |= R300_RS_INST_TEX_CN_WRITE | i /* source INTERP */ - | (fp_reg << R300_RS_INST_TEX_ADDR_SHIFT); - high_rr = fp_reg; - - /* Passing invalid data here can lock the GPU. */ - if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { - InputsRead &= ~(FRAG_BIT_TEX0 << i); - fp_reg++; - } else { - WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); - } - } + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz | R300_RS_TEX_PTR(rs_tex_count); + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~(FRAG_BIT_TEX0 << i); + rs_tex_count += count; + ++tex_ip; + ++fp_reg; } - if (InputsRead & FRAG_BIT_COL0) { - if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { - r300->hw.rr.cmd[R300_RR_INST_0] |= R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | (fp_reg++ << R300_RS_INST_COL_ADDR_SHIFT); - InputsRead &= ~FRAG_BIT_COL0; - col_interp_nr++; + if (InputsRead & FRAG_BIT_FOGC) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count); + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_FOGC; + rs_tex_count += 4; + ++tex_ip; + ++fp_reg; } else { - WARN_ONCE("fragprog wants col0, vp doesn't provide it\n"); + WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n"); } } - if (InputsRead & FRAG_BIT_COL1) { - if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { - r300->hw.rr.cmd[R300_RR_INST_1] |= R300_RS_INST_COL_ID(1) | R300_RS_INST_COL_CN_WRITE | (fp_reg++ << R300_RS_INST_COL_ADDR_SHIFT); - InputsRead &= ~FRAG_BIT_COL1; - if (high_rr < 1) - high_rr = 1; - col_interp_nr++; - } else { - WARN_ONCE("fragprog wants col1, vp doesn't provide it\n"); - } + if (InputsRead & FRAG_BIT_WPOS) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count); + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_WPOS; + rs_tex_count += 4; + ++tex_ip; + ++fp_reg; } + InputsRead &= ~FRAG_BIT_WPOS; - /* Need at least one. This might still lock as the values are undefined... */ - if (rs_tex_count == 0 && col_interp_nr == 0) { - r300->hw.rr.cmd[R300_RR_INST_0] |= R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | (fp_reg++ << R300_RS_INST_COL_ADDR_SHIFT); - col_interp_nr++; + /* Setup default color if no color or tex was set */ + if (rs_tex_count == 0 && col_ip == 0) { + r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(0) | R300_RS_COL_FMT(R300_RS_COL_FMT_0001); + ++col_ip; } - r300->hw.rc.cmd[1] = 0 | (rs_tex_count << R300_IT_COUNT_SHIFT) - | (col_interp_nr << R300_IC_COUNT_SHIFT) - | R300_HIRES_EN; + high_rr = (col_ip > tex_ip) ? col_ip : tex_ip; + r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; + r300->hw.rc.cmd[2] |= high_rr - 1; - assert(high_rr >= 0); - r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr + 1); - r300->hw.rc.cmd[2] = high_rr; + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr); if (InputsRead) WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); @@ -1750,18 +1620,15 @@ static void r300SetupRSUnit(GLcontext * ctx) static void r500SetupRSUnit(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - /* I'm still unsure if these are needed */ - GLuint interp_col[8]; - union r300_outputs_written OutputsWritten; TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; + union r300_outputs_written OutputsWritten; GLuint InputsRead; int fp_reg, high_rr; - int rs_col_count = 0; - int in_texcoords, col_interp_nr; - int i, count; + int col_ip, tex_ip; + int rs_tex_count = 0; + int i, count, col_fmt; - memset(interp_col, 0, sizeof(interp_col)); if (hw_tcl_on) OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; else @@ -1778,130 +1645,151 @@ static void r500SetupRSUnit(GLcontext * ctx) R300_STATECHANGE(r300, rc); R300_STATECHANGE(r300, rr); - fp_reg = col_interp_nr = high_rr = in_texcoords = 0; + fp_reg = col_ip = tex_ip = col_fmt = 0; - r300->hw.rr.cmd[R300_RR_INST_1] = 0; + r300->hw.rc.cmd[1] = 0; + r300->hw.rc.cmd[2] = 0; + for (i=0; i<R300_RR_CMDSIZE-1; ++i) + r300->hw.rr.cmd[R300_RR_INST_0 + i] = 0; - if (InputsRead & FRAG_BIT_WPOS) { - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) - if (!(InputsRead & (FRAG_BIT_TEX0 << i))) - break; + for (i=0; i<R500_RI_CMDSIZE-1; ++i) + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0; - if (i == ctx->Const.MaxTextureUnits) { - fprintf(stderr, "\tno free texcoord found...\n"); - _mesa_exit(-1); - } - - InputsRead |= (FRAG_BIT_TEX0 << i); - InputsRead &= ~FRAG_BIT_WPOS; - } if (InputsRead & FRAG_BIT_COL0) { - count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; - interp_col[0] |= R500_RS_COL_PTR(rs_col_count); - if (count == 3) - interp_col[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB1); - rs_col_count += count; + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; + if (count == 4) + col_fmt = R300_RS_COL_FMT_RGBA; + else if (count == 3) + col_fmt = R300_RS_COL_FMT_RGB1; + else + col_fmt = R300_RS_COL_FMT_0001; + + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(col_fmt); + r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_COL0; + ++col_ip; + ++fp_reg; + } else { + WARN_ONCE("fragprog wants col0, vp doesn't provide it\n"); + } } - else - interp_col[0] = R500_RS_COL_FMT(R300_RS_COL_FMT_0001); if (InputsRead & FRAG_BIT_COL1) { - count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; - interp_col[1] |= R500_RS_COL_PTR(1); - if (count == 3) - interp_col[1] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB0); - rs_col_count += count; + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; + if (count == 4) + col_fmt = R300_RS_COL_FMT_RGBA; + else if (count == 3) + col_fmt = R300_RS_COL_FMT_RGB1; + else + col_fmt = R300_RS_COL_FMT_0001; + + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(col_fmt); + r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_COL1; + ++col_ip; + ++fp_reg; + } else { + WARN_ONCE("fragprog wants col1, vp doesn't provide it\n"); + } } + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - GLuint swiz = 0; + if (! ( InputsRead & FRAG_BIT_TEX(i) ) ) + continue; - /* with TCL we always seem to route 4 components */ - if (InputsRead & (FRAG_BIT_TEX0 << i)) { - - if (hw_tcl_on) - count = 4; - else - count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; - - /* always have on texcoord */ - swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_S_SHIFT; - if (count >= 2) - swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_T_SHIFT; - else - swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT; - - if (count >= 3) - swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_R_SHIFT; - else - swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; - - if (count == 4) - swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_Q_SHIFT; - else - swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; - - } else - swiz = (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | - (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT); - - r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | swiz; - - r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0; - if (InputsRead & (FRAG_BIT_TEX0 << i)) { - //assert(r300->state.texture.tc_count != 0); - r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] |= R500_RS_INST_TEX_CN_WRITE | i /* source INTERP */ - | (fp_reg << R500_RS_INST_TEX_ADDR_SHIFT); - high_rr = fp_reg; - - /* Passing invalid data here can lock the GPU. */ - if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { - InputsRead &= ~(FRAG_BIT_TEX0 << i); - fp_reg++; - } else { - WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); - } + if (!R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { + WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); + continue; } - } - if (InputsRead & FRAG_BIT_COL0) { - if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { - r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); - InputsRead &= ~FRAG_BIT_COL0; - col_interp_nr++; + int swiz = 0; + + /* with TCL we always seem to route 4 components */ + if (hw_tcl_on) + count = 4; + else + count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; + + if (count == 4) { + swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; + swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT; + swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT; + swiz |= (rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT; + } else if (count == 3) { + swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; + swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT; + swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT; + swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; + } else if (count == 2) { + swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; + swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT; + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; + swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; + } else if (count == 1) { + swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT; + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; + swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; } else { - WARN_ONCE("fragprog wants col0, vp doesn't provide it\n"); + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT; + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT; + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; + swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; } + + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz; + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~(FRAG_BIT_TEX0 << i); + rs_tex_count += count; + ++tex_ip; + ++fp_reg; } - if (InputsRead & FRAG_BIT_COL1) { - if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { - r300->hw.rr.cmd[R300_RR_INST_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); - InputsRead &= ~FRAG_BIT_COL1; - if (high_rr < 1) - high_rr = 1; - col_interp_nr++; + if (InputsRead & FRAG_BIT_FOGC) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) | + ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) | + ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) | + ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT); + + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_FOGC; + rs_tex_count += 4; + ++tex_ip; + ++fp_reg; } else { - WARN_ONCE("fragprog wants col1, vp doesn't provide it\n"); + WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n"); } } - /* Need at least one. This might still lock as the values are undefined... */ - if (in_texcoords == 0 && col_interp_nr == 0) { - r300->hw.rr.cmd[R300_RR_INST_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); - col_interp_nr++; + if (InputsRead & FRAG_BIT_WPOS) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) | + ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) | + ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) | + ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT); + + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_WPOS; + rs_tex_count += 4; + ++tex_ip; + ++fp_reg; + } + + /* Setup default color if no color or tex was set */ + if (rs_tex_count == 0 && col_ip == 0) { + r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(0) | R500_RS_COL_FMT(R300_RS_COL_FMT_0001); + ++col_ip; } - r300->hw.rc.cmd[1] = 0 | (in_texcoords << R300_IT_COUNT_SHIFT) - | (col_interp_nr << R300_IC_COUNT_SHIFT) - | R300_HIRES_EN; + high_rr = (col_ip > tex_ip) ? col_ip : tex_ip; + r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; + r300->hw.rc.cmd[2] |= 0xC0 | (high_rr - 1); - assert(high_rr >= 0); - r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, high_rr + 1); - r300->hw.rc.cmd[2] = 0xC0 | high_rr; + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, high_rr); if (InputsRead) WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); @@ -2139,7 +2027,7 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) /* empty */ break; case GL_FOG: - r300SetFogState(ctx, state); + /* empty */ break; case GL_ALPHA_TEST: r300SetAlphaState(ctx); @@ -2204,7 +2092,7 @@ static void r300ResetHwState(r300ContextPtr r300) r300DepthFunc(ctx, ctx->Depth.Func); /* stencil */ - r300Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled); + r300Enable(ctx, GL_STENCIL_TEST, ctx->Stencil._Enabled); r300StencilMaskSeparate(ctx, 0, ctx->Stencil.WriteMask[0]); r300StencilFuncSeparate(ctx, 0, ctx->Stencil.Function[0], ctx->Stencil.Ref[0], ctx->Stencil.ValueMask[0]); @@ -2293,11 +2181,9 @@ static void r300ResetHwState(r300ContextPtr r300) break; } - /* XXX: set to 0 when fog is disabled? */ - r300->hw.gb_misc.cmd[R300_GB_MISC_SELECT] = R300_GB_FOG_SELECT_1_1_W; - /* XXX: Enable anti-aliasing? */ r300->hw.gb_misc.cmd[R300_GB_MISC_AA_CONFIG] = GB_AA_CONFIG_AA_DISABLE; + r300->hw.gb_misc.cmd[R300_GB_MISC_SELECT] = 0; r300->hw.ga_point_s0.cmd[1] = r300PackFloat32(0.0); r300->hw.ga_point_s0.cmd[2] = r300PackFloat32(0.0); @@ -2346,17 +2232,11 @@ static void r300ResetHwState(r300ContextPtr r300) R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; r300->hw.us_out_fmt.cmd[4] = R500_OUT_FMT_UNUSED | R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; - r300->hw.us_out_fmt.cmd[5] = R300_W_FMT_W24; - - r300Enable(ctx, GL_FOG, ctx->Fog.Enabled); - r300Fogfv(ctx, GL_FOG_MODE, NULL); - r300Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density); - r300Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start); - r300Fogfv(ctx, GL_FOG_END, &ctx->Fog.End); - r300Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); - r300Fogfv(ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL); + r300->hw.us_out_fmt.cmd[5] = R300_W_FMT_W0 | R300_W_SRC_US; - r300->hw.fg_depth_src.cmd[1] = 0; + /* disable fog unit */ + r300->hw.fogs.cmd[R300_FOGS_STATE] = 0; + r300->hw.fg_depth_src.cmd[1] = R300_FG_DEPTH_SRC_SCAN; r300->hw.rb3d_cctl.cmd[1] = 0; @@ -2611,16 +2491,6 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) } code = &fp->code; - if (fp->mesa_program.FogOption != GL_NONE) { - /* Enable HW fog. Try not to squish GL context. - * (Anybody sane remembered to set glFog() opts first!) */ - r300SetFogState(ctx, GL_TRUE); - ctx->Fog.Mode = fp->mesa_program.FogOption; - r300Fogfv(ctx, GL_FOG_MODE, NULL); - } else - /* Make sure HW is matching GL context. */ - r300SetFogState(ctx, ctx->Fog.Enabled); - r300SetupTextures(ctx); R300_STATECHANGE(rmesa, fp); @@ -2669,9 +2539,22 @@ void r300UpdateShaderStates(r300ContextPtr rmesa) r300UpdateTextureState(ctx); r300SetEarlyZState(ctx); - GLuint fgdepthsrc = R300_FG_DEPTH_SRC_SCAN; - if (current_fragment_program_writes_depth(ctx)) + /* w_fmt value is set to get best performance + * see p.130 R5xx 3D acceleration guide v1.3 */ + GLuint w_fmt, fgdepthsrc; + if (current_fragment_program_writes_depth(ctx)) { fgdepthsrc = R300_FG_DEPTH_SRC_SHADER; + w_fmt = R300_W_FMT_W24 | R300_W_SRC_US; + } else { + fgdepthsrc = R300_FG_DEPTH_SRC_SCAN; + w_fmt = R300_W_FMT_W0 | R300_W_SRC_US; + } + + if (w_fmt != rmesa->hw.us_out_fmt.cmd[5]) { + R300_STATECHANGE(rmesa, us_out_fmt); + rmesa->hw.us_out_fmt.cmd[5] = w_fmt; + } + if (fgdepthsrc != rmesa->hw.fg_depth_src.cmd[1]) { R300_STATECHANGE(rmesa, fg_depth_src); rmesa->hw.fg_depth_src.cmd[1] = fgdepthsrc; @@ -2792,7 +2675,6 @@ void r300InitStateFuncs(struct dd_function_table *functions) functions->DepthFunc = r300DepthFunc; functions->DepthMask = r300DepthMask; functions->CullFace = r300CullFace; - functions->Fogfv = r300Fogfv; functions->FrontFace = r300FrontFace; functions->ShadeModel = r300ShadeModel; functions->LogicOpcode = r300LogicOpcode; diff --git a/r300/r300_swtcl.c b/r300/r300_swtcl.c index f021e12..ba3621b 100644 --- a/r300/r300_swtcl.c +++ b/r300/r300_swtcl.c @@ -85,15 +85,15 @@ static void r300SetVertexFormat( GLcontext *ctx ) struct vertex_buffer *VB = &tnl->vb; DECLARE_RENDERINPUTS(index_bitset); GLuint InputsRead = 0, OutputsWritten = 0; - int vap_fmt_0 = 0; - int vap_vte_cntl = 0; + int vap_fmt_1 = 0; int offset = 0; int vte = 0; + int fog_id; GLint inputs[VERT_ATTRIB_MAX]; GLint tab[VERT_ATTRIB_MAX]; int swizzle[VERT_ATTRIB_MAX][4]; GLuint i, nr; - GLuint sz, vap_fmt_1 = 0; + GLuint sz; DECLARE_RENDERINPUTS(render_inputs_bitset); RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset); @@ -132,7 +132,6 @@ static void r300SetVertexFormat( GLcontext *ctx ) /* if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) { EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F ); - vap_fmt_0 |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; offset += 1; } */ @@ -154,6 +153,33 @@ static void r300SetVertexFormat( GLcontext *ctx ) OutputsWritten |= 1 << VERT_RESULT_COL1; } + fog_id = -1; + if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_FOG)) { + /* find first free tex coord slot */ + if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { + int i; + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (!RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { + fog_id = i; + break; + } + } + } else { + fog_id = 0; + } + + if (fog_id == -1) { + fprintf(stderr, "\tout of free texcoords to do fog\n"); + _mesa_exit(-1); + } + + sz = VB->AttribPtr[VERT_ATTRIB_FOG]->size; + EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F + sz - 1); + InputsRead |= 1 << VERT_ATTRIB_FOG; + OutputsWritten |= 1 << VERT_RESULT_FOGC; + vap_fmt_1 |= sz << (3 * fog_id); + } + if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { int i; @@ -168,6 +194,37 @@ static void r300SetVertexFormat( GLcontext *ctx ) } } + /* RS can't put fragment position on the pixel stack, so stuff it in texcoord if needed */ + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS) && (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_WPOS)) { + int first_free_tex = -1; + if (fog_id >= 0) { + first_free_tex = fog_id+1; + } else { + if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { + int i; + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (!RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { + first_free_tex = i; + break; + } + } + } else { + first_free_tex = 0; + } + } + + if (first_free_tex == -1) { + fprintf(stderr, "\tout of free texcoords to write w pos\n"); + _mesa_exit(-1); + } + + sz = VB->AttribPtr[VERT_ATTRIB_POS]->size; + InputsRead |= 1 << (VERT_ATTRIB_TEX0 + first_free_tex); + OutputsWritten |= 1 << (VERT_RESULT_TEX0 + first_free_tex); + EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_1F + sz - 1 ); + vap_fmt_1 |= sz << (3 * first_free_tex); + } + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { if (InputsRead & (1 << i)) { inputs[i] = nr++; @@ -175,7 +232,7 @@ static void r300SetVertexFormat( GLcontext *ctx ) inputs[i] = -1; } } - + /* Fixed, apply to vir0 only */ if (InputsRead & (1 << VERT_ATTRIB_POS)) inputs[VERT_ATTRIB_POS] = 0; @@ -183,19 +240,21 @@ static void r300SetVertexFormat( GLcontext *ctx ) inputs[VERT_ATTRIB_COLOR0] = 2; if (InputsRead & (1 << VERT_ATTRIB_COLOR1)) inputs[VERT_ATTRIB_COLOR1] = 3; + if (InputsRead & (1 << VERT_ATTRIB_FOG)) + inputs[VERT_ATTRIB_FOG] = 6 + fog_id; for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) if (InputsRead & (1 << i)) inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); - + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { if (InputsRead & (1 << i)) { tab[nr++] = i; } } - + for (i = 0; i < nr; i++) { int ci; - + swizzle[i][0] = SWIZZLE_ZERO; swizzle[i][1] = SWIZZLE_ZERO; swizzle[i][2] = SWIZZLE_ZERO; @@ -215,21 +274,21 @@ static void r300SetVertexFormat( GLcontext *ctx ) ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, nr); - + R300_STATECHANGE(rmesa, vic); rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); - + R300_STATECHANGE(rmesa, vof); rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten); rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1; - + rmesa->swtcl.vertex_size = _tnl_install_attrs( ctx, - rmesa->swtcl.vertex_attrs, + rmesa->swtcl.vertex_attrs, rmesa->swtcl.vertex_attr_count, NULL, 0 ); - + rmesa->swtcl.vertex_size /= 4; RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset ); @@ -247,34 +306,34 @@ static void flush_last_swtcl_prim( r300ContextPtr rmesa ) { if (RADEON_DEBUG & DEBUG_IOCTL) fprintf(stderr, "%s\n", __FUNCTION__); - + rmesa->dma.flush = NULL; if (rmesa->dma.current.buf) { struct r300_dma_region *current = &rmesa->dma.current; GLuint current_offset = GET_START(current); - assert (current->start + + assert (current->start + rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == current->ptr); if (rmesa->dma.current.start != rmesa->dma.current.ptr) { r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__); - + r300EmitState(rmesa); - + r300EmitVertexAOS( rmesa, rmesa->swtcl.vertex_size, current_offset); - + r300EmitVbufPrim( rmesa, rmesa->swtcl.hw_primitive, rmesa->swtcl.numverts); - + r300EmitCacheFlush(rmesa); } - + rmesa->swtcl.numverts = 0; current->start = current->ptr; } @@ -287,7 +346,7 @@ r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize ) { GLuint bytes = vsize * nverts; - if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) + if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) r300RefillCurrentDmaRegion( rmesa, bytes); if (!rmesa->dma.flush) { @@ -297,7 +356,7 @@ r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize ) ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); ASSERT( rmesa->dma.flush == flush_last_swtcl_prim ); - ASSERT( rmesa->dma.current.start + + ASSERT( rmesa->dma.current.start + rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == rmesa->dma.current.ptr ); @@ -352,7 +411,7 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim ); r300ContextPtr rmesa = R300_CONTEXT(ctx); \ const char *r300verts = (char *)rmesa->swtcl.verts; #define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int))) -#define VERTEX r300Vertex +#define VERTEX r300Vertex #define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS)) #define PRINT_VERTEX(x) #undef TAG @@ -445,7 +504,7 @@ do { \ #define LOCAL_VARS(n) \ r300ContextPtr rmesa = R300_CONTEXT(ctx); \ - GLuint color[n], spec[n]; \ + GLuint color[n] = { 0, }, spec[n] = { 0, }; \ GLuint coloroffset = rmesa->swtcl.coloroffset; \ GLuint specoffset = rmesa->swtcl.specoffset; \ (void) color; (void) spec; (void) coloroffset; (void) specoffset; @@ -570,17 +629,16 @@ static void r300ChooseRenderState( GLcontext *ctx ) static void r300RenderStart(GLcontext *ctx) { r300ContextPtr rmesa = R300_CONTEXT( ctx ); - // fprintf(stderr, "%s\n", __FUNCTION__); - r300ChooseRenderState(ctx); + r300ChooseRenderState(ctx); r300SetVertexFormat(ctx); r300UpdateShaders(rmesa); r300UpdateShaderStates(rmesa); r300EmitCacheFlush(rmesa); - - if (rmesa->dma.flush != 0 && + + if (rmesa->dma.flush != 0 && rmesa->dma.flush != flush_last_swtcl_prim) rmesa->dma.flush( rmesa ); @@ -593,7 +651,7 @@ static void r300RenderFinish(GLcontext *ctx) static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim ) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - + if (rmesa->swtcl.hw_primitive != hwprim) { R300_NEWPRIM( rmesa ); rmesa->swtcl.hw_primitive = hwprim; @@ -610,8 +668,6 @@ static void r300RenderPrimitive(GLcontext *ctx, GLenum prim) return; r300RasterPrimitive( ctx, reduced_prim[prim] ); - // fprintf(stderr, "%s\n", __FUNCTION__); - } static void r300ResetLineStipple(GLcontext *ctx) @@ -625,12 +681,12 @@ void r300InitSwtcl(GLcontext *ctx) TNLcontext *tnl = TNL_CONTEXT(ctx); r300ContextPtr rmesa = R300_CONTEXT(ctx); static int firsttime = 1; - + if (firsttime) { init_rast_tab(); firsttime = 0; } - + tnl->Driver.Render.Start = r300RenderStart; tnl->Driver.Render.Finish = r300RenderFinish; tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive; @@ -638,15 +694,15 @@ void r300InitSwtcl(GLcontext *ctx) tnl->Driver.Render.BuildVertices = _tnl_build_vertices; tnl->Driver.Render.CopyPV = _tnl_copy_pv; tnl->Driver.Render.Interp = _tnl_interp; - + /* FIXME: what are these numbers? */ - _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, + _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 48 * sizeof(GLfloat) ); - + rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; rmesa->swtcl.RenderIndex = ~0; rmesa->swtcl.render_primitive = GL_TRIANGLES; - rmesa->swtcl.hw_primitive = 0; + rmesa->swtcl.hw_primitive = 0; _tnl_invalidate_vertex_state( ctx, ~0 ); _tnl_invalidate_vertices( ctx, ~0 ); @@ -654,11 +710,6 @@ void r300InitSwtcl(GLcontext *ctx) _tnl_need_projected_coords( ctx, GL_FALSE ); r300ChooseRenderState(ctx); - - _mesa_validate_all_lighting_tables( ctx ); - - tnl->Driver.NotifyMaterialChange = - _mesa_validate_all_lighting_tables; } void r300DestroySwtcl(GLcontext *ctx) @@ -691,7 +742,7 @@ void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr) type = r300PrimitiveType(rmesa, primitive); num_verts = r300NumVerts(rmesa, vertex_nr, primitive); - + start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0); e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type); } diff --git a/r300/r300_tex.c b/r300/r300_tex.c index 8ab382c..7c699ec 100644 --- a/r300/r300_tex.c +++ b/r300/r300_tex.c @@ -171,8 +171,13 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat } } -static void r300SetTexBorderColor(r300TexObjPtr t, GLubyte c[4]) +static void r300SetTexBorderColor(r300TexObjPtr t, const GLfloat color[4]) { + GLubyte c[4]; + CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]); + CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]); + CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]); + CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]); t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]); } @@ -203,7 +208,7 @@ static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj) r300UpdateTexWrap(t); r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy); - r300SetTexBorderColor(t, texObj->_BorderChan); + r300SetTexBorderColor(t, texObj->BorderColor); } return t; @@ -929,7 +934,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, break; case GL_TEXTURE_BORDER_COLOR: - r300SetTexBorderColor(t, texObj->_BorderChan); + r300SetTexBorderColor(t, texObj->BorderColor); break; case GL_TEXTURE_BASE_LEVEL: diff --git a/r300/r300_texmem.c b/r300/r300_texmem.c index b03eefa..a89ab83 100644 --- a/r300/r300_texmem.c +++ b/r300/r300_texmem.c @@ -44,6 +44,7 @@ SOFTWARE. #include "main/colormac.h" #include "main/macros.h" #include "main/simple_list.h" +#include "main/texobj.h" #include "radeon_reg.h" /* gets definition for usleep */ #include "r300_context.h" #include "r300_state.h" @@ -71,8 +72,8 @@ void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t) } for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) { - if (rmesa->state.texture.unit[i].texobj == t) { - rmesa->state.texture.unit[i].texobj = NULL; + if (rmesa->state.texture.unit[i].texobj == t->base.tObj) { + _mesa_reference_texobj(&rmesa->state.texture.unit[i].texobj, NULL); } } } @@ -306,7 +307,7 @@ static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t, ASSERT(face < 6); /* Ensure we have a valid texture to upload */ - if ((hwlevel < 0) || (hwlevel >= RADEON_MAX_TEXTURE_LEVELS)) { + if ((hwlevel < 0) || (hwlevel >= R300_MAX_TEXTURE_LEVELS)) { _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__); return; } diff --git a/r300/r300_texstate.c b/r300/r300_texstate.c index e2329f0..f6ae4b6 100644 --- a/r300/r300_texstate.c +++ b/r300/r300_texstate.c @@ -345,7 +345,7 @@ static void r300SetTexImages(r300ContextPtr rmesa, numLevels = t->base.lastLevel - t->base.firstLevel + 1; - assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS); + assert(numLevels <= R300_MAX_TEXTURE_LEVELS); /* Calculate mipmap offsets and dimensions for blitting (uploading) * The idea is that we lay out the mipmap levels within a block of @@ -557,32 +557,39 @@ static GLboolean r300UpdateTexture(GLcontext * ctx, int unit) { r300ContextPtr rmesa = R300_CONTEXT(ctx); struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; + struct gl_texture_object *tObj = texUnit->_ReallyEnabled ? + texUnit->_Current : NULL; + r300TexObjPtr t = tObj ? (r300TexObjPtr) tObj->DriverData : NULL; /* Fallback if there's a texture border */ - if (tObj->Image[0][tObj->BaseLevel]->Border > 0) - return GL_FALSE; + if (tObj && tObj->Image[0][tObj->BaseLevel]->Border > 0) { + tObj = NULL; + t = NULL; + } /* Update state if this is a different texture object to last * time. */ - if (rmesa->state.texture.unit[unit].texobj != t) { + if (rmesa->state.texture.unit[unit].texobj != tObj) { if (rmesa->state.texture.unit[unit].texobj != NULL) { + r300TexObjPtr t_old = (r300TexObjPtr) rmesa->state.texture.unit[unit].texobj->DriverData; + /* The old texture is no longer bound to this texture unit. * Mark it as such. */ - rmesa->state.texture.unit[unit].texobj->base.bound &= - ~(1 << unit); + t_old->base.bound &= ~(1 << unit); } - rmesa->state.texture.unit[unit].texobj = t; - t->base.bound |= (1 << unit); - driUpdateTextureLRU((driTextureObject *) t); /* XXX: should be locked! */ + _mesa_reference_texobj(&rmesa->state.texture.unit[unit].texobj, tObj); + + if (t) { + t->base.bound |= (1 << unit); + driUpdateTextureLRU(&t->base); /* XXX: should be locked! */ + } } - return !t->border_fallback; + return !t || !t->border_fallback; } void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, @@ -650,7 +657,7 @@ static GLboolean r300UpdateTextureUnit(GLcontext * ctx, int unit) } else if (texUnit->_ReallyEnabled) { return GL_FALSE; } else { - return GL_TRUE; + return r300UpdateTexture(ctx, unit); } } diff --git a/r300/r300_vertprog.c b/r300/r300_vertprog.c index c4e325e..146daa3 100644 --- a/r300/r300_vertprog.c +++ b/r300/r300_vertprog.c @@ -126,7 +126,7 @@ static unsigned long t_dst_mask(GLuint mask) return mask & VSF_FLAG_ALL; } -static unsigned long t_dst_class(enum register_file file) +static unsigned long t_dst_class(gl_register_file file) { switch (file) { @@ -161,7 +161,7 @@ static unsigned long t_dst_index(struct r300_vertex_program *vp, return dst->Index; } -static unsigned long t_src_class(enum register_file file) +static unsigned long t_src_class(gl_register_file file) { switch (file) { case PROGRAM_TEMPORARY: @@ -245,7 +245,7 @@ static unsigned long t_src_index(struct r300_vertex_program *vp, static unsigned long t_src(struct r300_vertex_program *vp, struct prog_src_register *src) { - /* src->NegateBase uses the NEGATE_ flags from program_instruction.h, + /* src->Negate uses the NEGATE_ flags from program_instruction.h, * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. */ return PVS_SRC_OPERAND(t_src_index(vp, src), @@ -254,13 +254,13 @@ static unsigned long t_src(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src->Swizzle, 2)), t_swizzle(GET_SWZ(src->Swizzle, 3)), t_src_class(src->File), - src->NegateBase) | (src->RelAddr << 4); + src->Negate) | (src->RelAddr << 4); } static unsigned long t_src_scalar(struct r300_vertex_program *vp, struct prog_src_register *src) { - /* src->NegateBase uses the NEGATE_ flags from program_instruction.h, + /* src->Negate uses the NEGATE_ flags from program_instruction.h, * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. */ return PVS_SRC_OPERAND(t_src_index(vp, src), @@ -269,8 +269,7 @@ static unsigned long t_src_scalar(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src->Swizzle, 0)), t_swizzle(GET_SWZ(src->Swizzle, 0)), t_src_class(src->File), - src-> - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4); } @@ -307,7 +306,7 @@ static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[0].Swizzle, 3)), t_src_class(src[0].File), (!src[0]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[3] = 0; @@ -369,8 +368,7 @@ static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[0].Swizzle, 2)), SWIZZLE_ZERO, t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), @@ -378,8 +376,7 @@ static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[1].Swizzle, 1)), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, t_src_class(src[1].File), - src[1]. - NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + src[1].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[1].RelAddr << 4); inst[3] = __CONST(1, SWIZZLE_ZERO); @@ -422,8 +419,7 @@ static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[0].Swizzle, 2)), PVS_SRC_SELECT_FORCE_1, t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[2] = t_src(vp, &src[1]); inst[3] = __CONST(1, SWIZZLE_ZERO); @@ -519,7 +515,7 @@ static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp, PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY, /* Not 100% sure about this */ (!src[0]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE + Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE /*VSF_FLAG_ALL */ ); inst[3] = __CONST(0, SWIZZLE_ZERO); (*u_temp_i)--; @@ -564,8 +560,7 @@ static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[0].Swizzle, 0)), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[2] = __CONST(0, SWIZZLE_ZERO); inst[3] = __CONST(0, SWIZZLE_ZERO); @@ -592,24 +587,21 @@ static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp, PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); return inst; @@ -837,7 +829,7 @@ static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[1].Swizzle, 3)), t_src_class(src[1].File), (!src[1]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); inst[3] = 0; #else @@ -857,7 +849,7 @@ static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[1].Swizzle, 3)), t_src_class(src[1].File), (!src[1]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); #endif @@ -905,16 +897,14 @@ static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W t_src_class(src[1].File), - src[1]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); inst[3] = __CONST(1, SWIZZLE_ZERO); inst += 4; @@ -931,15 +921,14 @@ static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W t_src_class(src[1].File), (!src[1]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[3] = PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, @@ -993,17 +982,16 @@ static void t_inputs_outputs(struct r300_vertex_program *vp) vp->outputs[VERT_RESULT_COL0] + 3; cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1; } -#if 0 - if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) { - vp->outputs[VERT_RESULT_FOGC] = cur_reg++; - } -#endif for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { if (vp->key.OutputsWritten & (1 << i)) { vp->outputs[i] = cur_reg++; } } + + if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) { + vp->outputs[VERT_RESULT_FOGC] = cur_reg++; + } } static void r300TranslateVertexShader(struct r300_vertex_program *vp, @@ -1271,7 +1259,6 @@ static void position_invariant(struct gl_program *prog) else vpi[i].Opcode = OPCODE_MAD; - vpi[i].StringPos = 0; vpi[i].Data = 0; if (i == 3) diff --git a/r300/r500_fragprog.c b/r300/r500_fragprog.c index 75dae86..292573d 100644 --- a/r300/r500_fragprog.c +++ b/r300/r500_fragprog.c @@ -31,6 +31,12 @@ #include "radeon_program_alu.h" +static void reset_srcreg(struct prog_src_register* reg) +{ + _mesa_bzero(reg, sizeof(*reg)); + reg->Swizzle = SWIZZLE_NOOP; +} + static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu) { gl_state_index fail_value_tokens[STATE_LENGTH] = { @@ -99,6 +105,19 @@ static GLboolean transform_TEX( destredirect = GL_TRUE; } + if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) { + int tmpreg = radeonFindFreeTemporary(t); + tgt = radeonAppendInstructions(t->Program, 1); + tgt->Opcode = OPCODE_MOV; + tgt->DstReg.File = PROGRAM_TEMPORARY; + tgt->DstReg.Index = tmpreg; + tgt->SrcReg[0] = inst.SrcReg[0]; + + reset_srcreg(&inst.SrcReg[0]); + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tmpreg; + } + tgt = radeonAppendInstructions(t->Program, 1); _mesa_copy_instructions(tgt, &inst, 1); @@ -137,9 +156,9 @@ static GLboolean transform_TEX( * r < tex <=> -tex+r < 0 * r >= tex <=> not (-tex+r < 0 */ if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - tgt[1].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW; + tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; else - tgt[1].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW; + tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; tgt[2].Opcode = OPCODE_CMP; tgt[2].DstReg = orig_inst->DstReg; @@ -279,8 +298,8 @@ static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler) static void nqssadce_init(struct nqssadce_state* s) { - s->Outputs[FRAG_RESULT_COLR].Sourced = WRITEMASK_XYZW; - s->Outputs[FRAG_RESULT_DEPR].Sourced = WRITEMASK_W; + s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; + s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; } static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) @@ -295,8 +314,8 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) if (reg.Abs) return GL_FALSE; - if (reg.NegateAbs) - reg.NegateBase ^= 15; + if (reg.Negate) + reg.Negate ^= NEGATE_XYZW; if (opcode == OPCODE_KIL) { if (reg.Swizzle != SWIZZLE_NOOP) @@ -305,7 +324,7 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) for(i = 0; i < 4; ++i) { GLuint swz = GET_SWZ(reg.Swizzle, i); if (swz == SWIZZLE_NIL) { - reg.NegateBase &= ~(1 << i); + reg.Negate &= ~(1 << i); continue; } if (swz >= 4) @@ -313,15 +332,14 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) } } - if (reg.NegateBase) + if (reg.Negate) return GL_FALSE; return GL_TRUE; } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) { /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; * if it doesn't fit perfectly into a .xyzw case... */ - if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs - && !reg.NegateBase && !reg.NegateAbs) + if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs && !reg.Negate) return GL_TRUE; return GL_FALSE; @@ -336,7 +354,7 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO) relevant |= 1 << i; } - if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant)) + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) return GL_FALSE; return GL_TRUE; @@ -360,7 +378,7 @@ static void nqssadce_build_swizzle(struct nqssadce_state *s, GLuint swz = GET_SWZ(src.Swizzle, i); if (swz == SWIZZLE_NIL) continue; - negatebase[GET_BIT(src.NegateBase, i)] |= 1 << i; + negatebase[GET_BIT(src.Negate, i)] |= 1 << i; } _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0)); diff --git a/r300/radeon_ioctl.c b/r300/radeon_ioctl.c index 36502eb..f042a7b 100644 --- a/r300/radeon_ioctl.c +++ b/r300/radeon_ioctl.c @@ -59,7 +59,7 @@ static uint32_t radeonGetLastFrame(radeonContextPtr radeon) { drm_radeon_getparam_t gp; int ret; - uint32_t frame; + uint32_t frame = 0; gp.param = RADEON_PARAM_LAST_FRAME; gp.value = (int *)&frame; @@ -78,7 +78,7 @@ uint32_t radeonGetAge(radeonContextPtr radeon) { drm_radeon_getparam_t gp; int ret; - uint32_t age; + uint32_t age = 0; gp.param = RADEON_PARAM_LAST_CLEAR; gp.value = (int *)&age; diff --git a/r300/radeon_nqssadce.c b/r300/radeon_nqssadce.c index 97ce016..4a2e1cb 100644 --- a/r300/radeon_nqssadce.c +++ b/r300/radeon_nqssadce.c @@ -61,12 +61,12 @@ static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_reg struct prog_src_register tmp = srcreg; int i; tmp.Swizzle = 0; - tmp.NegateBase = 0; + tmp.Negate = NEGATE_NONE; for(i = 0; i < 4; ++i) { GLuint swz = GET_SWZ(swizzle, i); if (swz < 4) { tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); - tmp.NegateBase |= GET_BIT(srcreg.NegateBase, swz) << i; + tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; } else { tmp.Swizzle |= swz << (i*3); } @@ -103,9 +103,8 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, inst->SrcReg[src].File = PROGRAM_TEMPORARY; inst->SrcReg[src].Index = dstreg.Index; inst->SrcReg[src].Swizzle = 0; - inst->SrcReg[src].NegateBase = 0; + inst->SrcReg[src].Negate = NEGATE_NONE; inst->SrcReg[src].Abs = 0; - inst->SrcReg[src].NegateAbs = 0; for(i = 0; i < 4; ++i) { if (GET_BIT(sourced, i)) inst->SrcReg[src].Swizzle |= i << (3*i); @@ -191,7 +190,7 @@ static void process_instruction(struct nqssadce_state* s) if (inst->Opcode != OPCODE_KIL) { if (s->Descr->RewriteDepthOut) { - if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPR) + if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPTH) rewrite_depth_out(inst); } diff --git a/r300/radeon_program_alu.c b/r300/radeon_program_alu.c index 1ef71e7..8283723 100644 --- a/r300/radeon_program_alu.c +++ b/r300/radeon_program_alu.c @@ -81,18 +81,6 @@ static struct prog_instruction *emit3(struct gl_program* p, return fpi; } -static void set_swizzle(struct prog_src_register *SrcReg, int coordinate, int swz) -{ - SrcReg->Swizzle &= ~(7 << (3*coordinate)); - SrcReg->Swizzle |= swz << (3*coordinate); -} - -static void set_negate_base(struct prog_src_register *SrcReg, int coordinate, int negate) -{ - SrcReg->NegateBase &= ~(1 << coordinate); - SrcReg->NegateBase |= (negate << coordinate); -} - static struct prog_dst_register dstreg(int file, int index) { struct prog_dst_register dst; @@ -156,15 +144,14 @@ static struct prog_src_register absolute(struct prog_src_register reg) { struct prog_src_register newreg = reg; newreg.Abs = 1; - newreg.NegateBase = 0; - newreg.NegateAbs = 0; + newreg.Negate = NEGATE_NONE; return newreg; } static struct prog_src_register negate(struct prog_src_register reg) { struct prog_src_register newreg = reg; - newreg.NegateAbs = !newreg.NegateAbs; + newreg.Negate = newreg.Negate ^ NEGATE_XYZW; return newreg; } @@ -189,8 +176,7 @@ static void transform_ABS(struct radeon_transform_context* t, { struct prog_src_register src = inst->SrcReg[0]; src.Abs = 1; - src.NegateBase = 0; - src.NegateAbs = 0; + src.Negate = NEGATE_NONE; emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src); } @@ -198,18 +184,9 @@ static void transform_DPH(struct radeon_transform_context* t, struct prog_instruction* inst) { struct prog_src_register src0 = inst->SrcReg[0]; - if (src0.NegateAbs) { - if (src0.Abs) { - int tempreg = radeonFindFreeTemporary(t); - emit1(t->Program, OPCODE_MOV, 0, dstreg(PROGRAM_TEMPORARY, tempreg), src0); - src0 = srcreg(src0.File, src0.Index); - } else { - src0.NegateAbs = 0; - src0.NegateBase ^= NEGATE_XYZW; - } - } - set_swizzle(&src0, 3, SWIZZLE_ONE); - set_negate_base(&src0, 3, 0); + src0.Negate &= ~NEGATE_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= SWIZZLE_ONE << (3 * 3); emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]); } @@ -649,7 +626,7 @@ GLboolean radeonTransformDeriv(struct radeon_transform_context* t, B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE); - B.NegateBase = NEGATE_XYZW; + B.Negate = NEGATE_XYZW; emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], B); diff --git a/r300/radeon_program_pair.c b/r300/radeon_program_pair.c index 58bc0d5..2e21f7b 100644 --- a/r300/radeon_program_pair.c +++ b/r300/radeon_program_pair.c @@ -47,6 +47,7 @@ struct pair_state_instruction { GLuint IsTex:1; /**< Is a texture instruction */ + GLuint IsOutput:1; /**< Is output instruction */ GLuint NeedRGB:1; /**< Needs the RGB ALU */ GLuint NeedAlpha:1; /**< Needs the Alpha ALU */ GLuint IsTranscendent:1; /**< Is a special transcendent instruction */ @@ -123,6 +124,7 @@ struct pair_state { GLboolean Debug; GLboolean Verbose; void *UserData; + GLubyte NumKillInsts; /** * Translate Mesa registers to hardware registers @@ -149,6 +151,11 @@ struct pair_state { struct pair_state_instruction *ReadyTEX; /** + * Linked list of deferred instructions + */ + struct pair_state_instruction *DeferredInsts; + + /** * Pool of @ref reg_value structures for fast allocation. */ struct reg_value *ValuePool; @@ -231,7 +238,9 @@ static void instruction_ready(struct pair_state *s, int ip) if (s->Verbose) _mesa_printf("instruction_ready(%i)\n", ip); - if (pairinst->IsTex) + if (s->NumKillInsts > 0 && pairinst->IsOutput) + add_pairinst_to_list(&s->DeferredInsts, pairinst); + else if (pairinst->IsTex) add_pairinst_to_list(&s->ReadyTEX, pairinst); else if (!pairinst->NeedAlpha) add_pairinst_to_list(&s->ReadyRGB, pairinst); @@ -255,8 +264,7 @@ static void final_rewrite(struct pair_state *s, struct prog_instruction *inst) inst->SrcReg[2] = inst->SrcReg[1]; inst->SrcReg[1].File = PROGRAM_BUILTIN; inst->SrcReg[1].Swizzle = SWIZZLE_1111; - inst->SrcReg[1].NegateBase = 0; - inst->SrcReg[1].NegateAbs = 0; + inst->SrcReg[1].Negate = NEGATE_NONE; inst->Opcode = OPCODE_MAD; break; case OPCODE_CMP: @@ -340,6 +348,8 @@ static void classify_instruction(struct pair_state *s, error("Unknown opcode %d\n", inst->Opcode); break; } + + pairinst->IsOutput = (inst->DstReg.File == PROGRAM_OUTPUT); } @@ -451,19 +461,7 @@ static void allocate_input_registers(struct pair_state *s) int i; GLuint hwindex = 0; - /* Texcoords come first */ - for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) { - if (InputsRead & (FRAG_BIT_TEX0 << i)) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++); - } - InputsRead &= ~FRAG_BITS_TEX_ANY; - - /* fragment position treated as a texcoord */ - if (InputsRead & FRAG_BIT_WPOS) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++); - InputsRead &= ~FRAG_BIT_WPOS; - - /* Then primary colour */ + /* Primary colour */ if (InputsRead & FRAG_BIT_COL0) alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++); InputsRead &= ~FRAG_BIT_COL0; @@ -473,11 +471,23 @@ static void allocate_input_registers(struct pair_state *s) alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++); InputsRead &= ~FRAG_BIT_COL1; - /* Fog coordinate */ + /* Texcoords */ + for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++); + } + InputsRead &= ~FRAG_BITS_TEX_ANY; + + /* Fogcoords treated as a texcoord */ if (InputsRead & FRAG_BIT_FOGC) alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_FOGC, hwindex++); InputsRead &= ~FRAG_BIT_FOGC; + /* fragment position treated as a texcoord */ + if (InputsRead & FRAG_BIT_WPOS) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++); + InputsRead &= ~FRAG_BIT_WPOS; + /* Anything else */ if (InputsRead) error("Don't know how to handle inputs 0x%x\n", InputsRead); @@ -603,8 +613,11 @@ static void emit_all_tex(struct pair_state *s) struct prog_instruction *inst = s->Program->Instructions + ip; commit_instruction(s, ip); - if (inst->Opcode != OPCODE_KIL) + if (inst->Opcode == OPCODE_KIL) + --s->NumKillInsts; + else inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); + inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index); if (s->Debug) { @@ -722,7 +735,6 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_ if (pairinst->NeedRGB && !pairinst->IsTranscendent) { GLboolean srcrgb = GL_FALSE; GLboolean srcalpha = GL_FALSE; - GLuint negatebase = 0; int j; for(j = 0; j < 3; ++j) { GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); @@ -730,8 +742,6 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_ srcrgb = GL_TRUE; else if (swz < 4) srcalpha = GL_TRUE; - if (swz != SWIZZLE_NIL && GET_BIT(inst->SrcReg[i].NegateBase, j)) - negatebase = 1; } source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); if (source < 0) @@ -739,12 +749,11 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_ pair->RGB.Arg[i].Source = source; pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->RGB.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs; + pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (NEGATE_X | NEGATE_Y | NEGATE_Z)); } if (pairinst->NeedAlpha) { GLboolean srcrgb = GL_FALSE; GLboolean srcalpha = GL_FALSE; - GLuint negatebase = GET_BIT(inst->SrcReg[i].NegateBase, pairinst->IsTranscendent ? 0 : 3); GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3); if (swz < 3) srcrgb = GL_TRUE; @@ -756,7 +765,7 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_ pair->Alpha.Arg[i].Source = source; pair->Alpha.Arg[i].Swizzle = swz; pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->Alpha.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs; + pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & NEGATE_W); } } @@ -778,10 +787,10 @@ static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruc struct prog_instruction *inst = s->Program->Instructions + ip; if (inst->DstReg.File == PROGRAM_OUTPUT) { - if (inst->DstReg.Index == FRAG_RESULT_COLR) { + if (inst->DstReg.Index == FRAG_RESULT_COLOR) { pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } else if (inst->DstReg.Index == FRAG_RESULT_DEPR) { + } else if (inst->DstReg.Index == FRAG_RESULT_DEPTH) { pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); } } else { @@ -866,6 +875,17 @@ static void emit_alu(struct pair_state *s) s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair); } +static GLubyte countKillInsts(struct gl_program *prog) +{ + GLubyte i, count = 0; + + for (i = 0; i < prog->NumInstructions; ++i) { + if (prog->Instructions[i].Opcode == OPCODE_KIL) + ++count; + } + + return count; +} GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, const struct radeon_pair_handler* handler, void *userdata) @@ -879,6 +899,7 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, s.UserData = userdata; s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE; s.Verbose = GL_FALSE && s.Debug; + s.NumKillInsts = countKillInsts(program); s.Instructions = (struct pair_state_instruction*)_mesa_calloc( sizeof(struct pair_state_instruction)*s.Program->NumInstructions); @@ -897,6 +918,21 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, if (s.ReadyTEX) emit_all_tex(&s); + if (!s.NumKillInsts) { + struct pair_state_instruction *pairinst = s.DeferredInsts; + while (pairinst) { + if (!pairinst->NeedAlpha) + add_pairinst_to_list(&s.ReadyRGB, pairinst); + else if (!pairinst->NeedRGB) + add_pairinst_to_list(&s.ReadyAlpha, pairinst); + else + add_pairinst_to_list(&s.ReadyFullALU, pairinst); + + pairinst = pairinst->NextReady; + } + s.DeferredInsts = NULL; + } + while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha) emit_alu(&s); } diff --git a/radeon/radeon_context.c b/radeon/radeon_context.c index 1e992c0..b67bda7 100644 --- a/radeon/radeon_context.c +++ b/radeon/radeon_context.c @@ -62,9 +62,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_tcl.h" #include "radeon_maos.h" -#define need_GL_ARB_multisample -#define need_GL_ARB_texture_compression -#define need_GL_ARB_vertex_buffer_object #define need_GL_EXT_blend_minmax #define need_GL_EXT_fog_coord #define need_GL_EXT_secondary_color @@ -114,16 +111,13 @@ static const GLubyte *radeonGetString( GLcontext *ctx, GLenum name ) */ const struct dri_extension card_extensions[] = { - { "GL_ARB_multisample", GL_ARB_multisample_functions }, { "GL_ARB_multitexture", NULL }, { "GL_ARB_texture_border_clamp", NULL }, - { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, { "GL_ARB_texture_env_add", NULL }, { "GL_ARB_texture_env_combine", NULL }, { "GL_ARB_texture_env_crossbar", NULL }, { "GL_ARB_texture_env_dot3", NULL }, { "GL_ARB_texture_mirrored_repeat", NULL }, - { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, { "GL_EXT_blend_logic_op", NULL }, { "GL_EXT_blend_subtract", GL_EXT_blend_minmax_functions }, { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, @@ -359,6 +353,10 @@ radeonCreateContext( const __GLcontextModes *glVisual, rmesa->boxes = 0; + ctx->Const.MaxDrawBuffers = 1; + + _mesa_set_mvp_with_dp4( ctx, GL_TRUE ); + /* Initialize the software rasterizer and helper modules. */ _swrast_CreateContext( ctx ); diff --git a/radeon/radeon_lighting.c b/radeon/radeon_lighting.c index 6d9ccfa..ac3b94e 100644 --- a/radeon/radeon_lighting.c +++ b/radeon/radeon_lighting.c @@ -246,7 +246,7 @@ void radeonUpdateMaterial( GLcontext *ctx ) * _VP_inf_norm * _h_inf_norm * _Position - * _NormDirection + * _NormSpotDirection * _ModelViewInvScale * _NeedEyeCoords * _EyeZDir @@ -308,9 +308,9 @@ void radeonUpdateLighting( GLcontext *ctx ) fcmd[LIT_DIRECTION_W] = 0; } else { COPY_4V( &fcmd[LIT_POSITION_X], l->_Position ); - fcmd[LIT_DIRECTION_X] = -l->_NormDirection[0]; - fcmd[LIT_DIRECTION_Y] = -l->_NormDirection[1]; - fcmd[LIT_DIRECTION_Z] = -l->_NormDirection[2]; + fcmd[LIT_DIRECTION_X] = -l->_NormSpotDirection[0]; + fcmd[LIT_DIRECTION_Y] = -l->_NormSpotDirection[1]; + fcmd[LIT_DIRECTION_Z] = -l->_NormSpotDirection[2]; fcmd[LIT_DIRECTION_W] = 0; } diff --git a/radeon/radeon_maos_arrays.c b/radeon/radeon_maos_arrays.c index de3c3a1..31eea13 100644 --- a/radeon/radeon_maos_arrays.c +++ b/radeon/radeon_maos_arrays.c @@ -40,7 +40,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "swrast_setup/swrast_setup.h" #include "math/m_translate.h" #include "tnl/tnl.h" -#include "tnl/tcontext.h" #include "radeon_context.h" #include "radeon_ioctl.h" diff --git a/radeon/radeon_screen.c b/radeon/radeon_screen.c index 81337da..791f598 100644 --- a/radeon/radeon_screen.c +++ b/radeon/radeon_screen.c @@ -206,6 +206,8 @@ DRI_CONF_BEGIN DRI_CONF_END; static const GLuint __driNConfigOptions = 17; +extern const struct dri_extension gl_20_extension[]; + #ifndef RADEON_DEBUG int RADEON_DEBUG = 0; @@ -259,8 +261,6 @@ radeonFillInModes( __DRIscreenPrivate *psp, __GLcontextModes *m; unsigned depth_buffer_factor; unsigned back_buffer_factor; - GLenum fb_format; - GLenum fb_type; int i; /* Right now GLX_SWAP_COPY_OML isn't supported, but it would be easy @@ -274,7 +274,7 @@ radeonFillInModes( __DRIscreenPrivate *psp, uint8_t depth_bits_array[2]; uint8_t stencil_bits_array[2]; - + uint8_t msaa_samples_array[1]; depth_bits_array[0] = depth_bits; depth_bits_array[1] = depth_bits; @@ -286,22 +286,32 @@ radeonFillInModes( __DRIscreenPrivate *psp, stencil_bits_array[0] = 0; stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits; + msaa_samples_array[0] = 0; + depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 2 : 1; back_buffer_factor = (have_back_buffer) ? 2 : 1; - if ( pixel_bits == 16 ) { - fb_format = GL_RGB; - fb_type = GL_UNSIGNED_SHORT_5_6_5; - } - else { - fb_format = GL_BGRA; - fb_type = GL_UNSIGNED_INT_8_8_8_8_REV; - } + if (pixel_bits == 16) { + __DRIconfig **configs_a8r8g8b8; + __DRIconfig **configs_r5g6b5; + + configs_r5g6b5 = driCreateConfigs(GL_RGB, GL_UNSIGNED_SHORT_5_6_5, + depth_bits_array, stencil_bits_array, + depth_buffer_factor, back_buffer_modes, + back_buffer_factor, msaa_samples_array, + 1); + configs_a8r8g8b8 = driCreateConfigs(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, + depth_bits_array, stencil_bits_array, + 1, back_buffer_modes, 1, + msaa_samples_array, 1); + configs = driConcatConfigs(configs_r5g6b5, configs_a8r8g8b8); + } else + configs = driCreateConfigs(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, + depth_bits_array, stencil_bits_array, + depth_buffer_factor, + back_buffer_modes, back_buffer_factor, + msaa_samples_array, 1); - configs = driCreateConfigs(fb_format, fb_type, - depth_bits_array, stencil_bits_array, - depth_buffer_factor, - back_buffer_modes, back_buffer_factor); if (configs == NULL) { fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", __func__, __LINE__ ); @@ -358,7 +368,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) unsigned char *RADEONMMIO; int i; int ret; - uint32_t temp; + uint32_t temp = 0; if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) { fprintf(stderr,"\nERROR! sizeof(RADEONDRIRec) does not match passed size from device driver\n"); @@ -551,11 +561,8 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->chip_family = CHIP_FAMILY_RS300; break; - /* 9500 with 1 pipe verified by: Reid Linnemann <lreid@cs.okstate.edu> */ + case PCI_CHIP_R300_AD: - screen->chip_family = CHIP_FAMILY_RV350; - screen->chip_flags = RADEON_CHIPSET_TCL; - break; case PCI_CHIP_R300_AE: case PCI_CHIP_R300_AF: case PCI_CHIP_R300_AG: @@ -883,6 +890,18 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) } else { screen->num_gb_pipes = temp; } + + /* pipe overrides */ + switch (dri_priv->deviceID) { + case PCI_CHIP_R300_AD: /* 9500 with 1 quadpipe verified by: Reid Linnemann <lreid@cs.okstate.edu> */ + case PCI_CHIP_RV410_5E4C: /* RV410 SE only have 1 quadpipe */ + case PCI_CHIP_RV410_5E4F: /* RV410 SE only have 1 quadpipe */ + screen->num_gb_pipes = 1; + break; + default: + break; + } + } if ( sPriv->drm_version.minor >= 10 ) { @@ -1112,7 +1131,7 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv, static void radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) { - _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); + _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL); } #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) @@ -1146,6 +1165,7 @@ static void radeonDestroyContext(__DRIcontextPrivate * driContextPriv) #endif + /** * This is the driver specific part of the createNewScreen entry point. * @@ -1198,6 +1218,8 @@ radeonInitScreen(__DRIscreenPrivate *psp) driInitSingleExtension( NULL, NV_vp_extension ); driInitSingleExtension( NULL, ATI_fs_extension ); driInitExtensions( NULL, point_extensions, GL_FALSE ); +#elif defined(RADEON_COMMON_FOR_R300) + driInitSingleExtension( NULL, gl_20_extension ); #endif if (!radeonInitDriver(psp)) diff --git a/radeon/radeon_state.c b/radeon/radeon_state.c index 32bcff3..b656100 100644 --- a/radeon/radeon_state.c +++ b/radeon/radeon_state.c @@ -967,7 +967,7 @@ void radeonUpdateMaterial( GLcontext *ctx ) * _VP_inf_norm * _h_inf_norm * _Position - * _NormDirection + * _NormSpotDirection * _ModelViewInvScale * _NeedEyeCoords * _EyeZDir @@ -1028,9 +1028,9 @@ static void update_light( GLcontext *ctx ) fcmd[LIT_DIRECTION_W] = 0; } else { COPY_4V( &fcmd[LIT_POSITION_X], l->_Position ); - fcmd[LIT_DIRECTION_X] = -l->_NormDirection[0]; - fcmd[LIT_DIRECTION_Y] = -l->_NormDirection[1]; - fcmd[LIT_DIRECTION_Z] = -l->_NormDirection[2]; + fcmd[LIT_DIRECTION_X] = -l->_NormSpotDirection[0]; + fcmd[LIT_DIRECTION_Y] = -l->_NormSpotDirection[1]; + fcmd[LIT_DIRECTION_Z] = -l->_NormSpotDirection[2]; fcmd[LIT_DIRECTION_W] = 0; } diff --git a/radeon/radeon_tex.c b/radeon/radeon_tex.c index b0aec21..f2b6deb 100644 --- a/radeon/radeon_tex.c +++ b/radeon/radeon_tex.c @@ -239,8 +239,13 @@ static void radeonSetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf ) } } -static void radeonSetTexBorderColor( radeonTexObjPtr t, GLubyte c[4] ) +static void radeonSetTexBorderColor( radeonTexObjPtr t, const GLfloat color[4] ) { + GLubyte c[4]; + CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]); + CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]); + CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]); + CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]); t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] ); } @@ -276,7 +281,7 @@ static radeonTexObjPtr radeonAllocTexObj( struct gl_texture_object *texObj ) radeonSetTexWrap( t, texObj->WrapS, texObj->WrapT ); radeonSetTexMaxAnisotropy( t, texObj->MaxAnisotropy ); radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter ); - radeonSetTexBorderColor( t, texObj->_BorderChan ); + radeonSetTexBorderColor( t, texObj->BorderColor ); } return t; @@ -755,7 +760,7 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target, break; case GL_TEXTURE_BORDER_COLOR: - radeonSetTexBorderColor( t, texObj->_BorderChan ); + radeonSetTexBorderColor( t, texObj->BorderColor ); break; case GL_TEXTURE_BASE_LEVEL: diff --git a/radeon/radeon_texstate.c b/radeon/radeon_texstate.c index 1e2f654..b165205 100644 --- a/radeon/radeon_texstate.c +++ b/radeon/radeon_texstate.c @@ -1023,11 +1023,11 @@ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit ) */ else if ( (texUnit->TexGenEnabled & S_BIT) && (texUnit->TexGenEnabled & T_BIT) && - (texUnit->GenModeS == texUnit->GenModeT) ) { + (texUnit->GenS.Mode == texUnit->GenT.Mode) ) { if ( ((texUnit->TexGenEnabled & R_BIT) && - (texUnit->GenModeS != texUnit->GenModeR)) || + (texUnit->GenS.Mode != texUnit->GenR.Mode)) || ((texUnit->TexGenEnabled & Q_BIT) && - (texUnit->GenModeS != texUnit->GenModeQ)) ) { + (texUnit->GenS.Mode != texUnit->GenQ.Mode)) ) { /* Mixed modes, fallback: */ if (RADEON_DEBUG & DEBUG_FALLBACKS) @@ -1051,23 +1051,23 @@ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit ) rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_Q_BIT(unit); } - switch (texUnit->GenModeS) { + switch (texUnit->GenS.Mode) { case GL_OBJECT_LINEAR: rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_OBJ << inputshift; set_texgen_matrix( rmesa, unit, - texUnit->ObjectPlaneS, - texUnit->ObjectPlaneT, - texUnit->ObjectPlaneR, - texUnit->ObjectPlaneQ); + texUnit->GenS.ObjectPlane, + texUnit->GenT.ObjectPlane, + texUnit->GenR.ObjectPlane, + texUnit->GenQ.ObjectPlane); break; case GL_EYE_LINEAR: rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE << inputshift; set_texgen_matrix( rmesa, unit, - texUnit->EyePlaneS, - texUnit->EyePlaneT, - texUnit->EyePlaneR, - texUnit->EyePlaneQ); + texUnit->GenS.EyePlane, + texUnit->GenT.EyePlane, + texUnit->GenR.EyePlane, + texUnit->GenQ.EyePlane); break; case GL_REFLECTION_MAP_NV: diff --git a/radeon/server/radeon_reg.h b/radeon/server/radeon_reg.h index 596a8aa..ae2ccdf 100644 --- a/radeon/server/radeon_reg.h +++ b/radeon/server/radeon_reg.h @@ -1500,7 +1500,7 @@ # define RADEON_ALPHA_ARG_C_T1_ALPHA (6 << 8) # define RADEON_ALPHA_ARG_C_T2_ALPHA (7 << 8) # define RADEON_ALPHA_ARG_C_T3_ALPHA (8 << 8) -# define RADEON_DOT_ALPHA_DONT_REPLICATE (1 << 9) +# define RADEON_DOT_ALPHA_DONT_REPLICATE (1 << 12) # define RADEON_ALPHA_ARG_MASK 0xf #define RADEON_PP_TFACTOR_0 0x1c68 |