diff options
author | Luc Verhaegen <libv@skynet.be> | 2010-03-16 20:46:25 +0100 |
---|---|---|
committer | Luc Verhaegen <libv@skynet.be> | 2010-03-16 20:46:25 +0100 |
commit | fd026ff56899498375b748a36cc2eaed1158484e (patch) | |
tree | 02055174656133dafb303a31b2b208ba21ef4a60 | |
parent | 534eb0f6eea95ff5851d3cb74663679fcd375572 (diff) |
Import radeon, r200, r300 and r600 dri drivers from mesa 7.8-rc1.7.8-rc1
129 files changed, 8105 insertions, 4175 deletions
diff --git a/configure.ac b/configure.ac index 5cd936f..78601e8 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ # Process this file with autoconf to produce a configure script AC_PREREQ(2.57) -AC_INIT([mesa-dri-radeon], 7.7.0, [], mesa-dri-radeon) +AC_INIT([mesa-dri-radeon], 7.8.0, [], mesa-dri-radeon) AM_INIT_AUTOMAKE([dist-bzip2]) @@ -10,14 +10,16 @@ AM_MAINTAINER_MODE # Checks for programs. AC_DISABLE_STATIC AC_PROG_LIBTOOL -AC_PROG_CC + +# thanks to some unwarranted use of scattered variable definition +AC_PROG_CC_C99 # Checks for header files. AC_HEADER_STDC PKG_CHECK_MODULES([DRM], [libdrm >= 2.3.0]) -PKG_CHECK_MODULES([DRI], [libmesadri >= 7.7.0 libmesadri < 7.8.0 - libmesadricommon >= 7.7.0 libmesadricommon < 7.8.0]) +PKG_CHECK_MODULES([DRI], [libmesadri >= 7.8.0 libmesadri < 7.9.0 + libmesadricommon >= 7.8.0 libmesadricommon < 7.9.0]) # libdrm 2.4.17 changed the api significantly. PKG_CHECK_MODULES([LIBDRM_RADEON], [libdrm_radeon libdrm >= 2.4.17], diff --git a/r200/Makefile.am b/r200/Makefile.am index 7cdc634..7c5b249 100644 --- a/r200/Makefile.am +++ b/r200/Makefile.am @@ -20,6 +20,7 @@ r200_dri_la_SOURCES = \ ../radeon/radeon_queryobj.c \ ../radeon/radeon_span.c \ ../radeon/radeon_texture.c \ + ../radeon/radeon_tex_copy.c \ r200_context.c \ r200_ioctl.c \ r200_state.c \ @@ -34,6 +35,7 @@ r200_dri_la_SOURCES = \ r200_sanity.c \ r200_fragshader.c \ r200_vertprog.c \ + r200_blit.c \ ../radeon/radeon_screen.c if HAVE_LIBDRM_RADEON diff --git a/r200/r200_blit.c b/r200/r200_blit.c new file mode 100644 index 0000000..3075760 --- /dev/null +++ b/r200/r200_blit.c @@ -0,0 +1,408 @@ +/* + * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_common.h" +#include "r200_context.h" +#include "r200_blit.h" + +static inline uint32_t cmdpacket0(struct radeon_screen *rscrn, + int reg, int count) +{ + if (count) + return CP_PACKET0(reg, count - 1); + return CP_PACKET2; +} + +/* common formats supported as both textures and render targets */ +unsigned r200_check_blit(gl_format mesa_format) +{ + /* XXX others? BE/LE? */ + switch (mesa_format) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + case MESA_FORMAT_RGB565: + case MESA_FORMAT_ARGB4444: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_A8: + break; + default: + return 0; + } + + /* ??? */ + if (_mesa_get_format_bits(mesa_format, GL_DEPTH_BITS) > 0) + return 0; + + return 1; +} + +static inline void emit_vtx_state(struct r200_context *r200) +{ + BATCH_LOCALS(&r200->radeon); + + BEGIN_BATCH(14); + if (r200->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + OUT_BATCH_REGVAL(R200_SE_VAP_CNTL_STATUS, 0); + } else { + OUT_BATCH_REGVAL(R200_SE_VAP_CNTL_STATUS, RADEON_TCL_BYPASS); + } + OUT_BATCH_REGVAL(R200_SE_VAP_CNTL, (R200_VAP_FORCE_W_TO_ONE | + (9 << R200_VAP_VF_MAX_VTX_NUM__SHIFT))); + OUT_BATCH_REGVAL(R200_SE_VTX_STATE_CNTL, 0); + OUT_BATCH_REGVAL(R200_SE_VTE_CNTL, 0); + OUT_BATCH_REGVAL(R200_SE_VTX_FMT_0, R200_VTX_XY); + OUT_BATCH_REGVAL(R200_SE_VTX_FMT_1, (2 << R200_VTX_TEX0_COMP_CNT_SHIFT)); + OUT_BATCH_REGVAL(RADEON_SE_CNTL, (RADEON_DIFFUSE_SHADE_GOURAUD | + RADEON_BFACE_SOLID | + RADEON_FFACE_SOLID | + RADEON_VTX_PIX_CENTER_OGL | + RADEON_ROUND_MODE_ROUND | + RADEON_ROUND_PREC_4TH_PIX)); + END_BATCH(); +} + +static void inline emit_tx_setup(struct r200_context *r200, + gl_format mesa_format, + struct radeon_bo *bo, + intptr_t offset, + unsigned width, + unsigned height, + unsigned pitch) +{ + uint32_t txformat = R200_TXFORMAT_NON_POWER2; + BATCH_LOCALS(&r200->radeon); + + assert(width <= 2047); + assert(height <= 2047); + assert(offset % 32 == 0); + + /* XXX others? BE/LE? */ + switch (mesa_format) { + case MESA_FORMAT_ARGB8888: + txformat |= R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP; + break; + case MESA_FORMAT_XRGB8888: + txformat |= R200_TXFORMAT_ARGB8888; + break; + case MESA_FORMAT_RGB565: + txformat |= R200_TXFORMAT_RGB565; + break; + case MESA_FORMAT_ARGB4444: + txformat |= R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP; + break; + case MESA_FORMAT_ARGB1555: + txformat |= R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP; + break; + case MESA_FORMAT_A8: + txformat |= R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP; + break; + default: + break; + } + + BEGIN_BATCH(28); + OUT_BATCH_REGVAL(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE); + OUT_BATCH_REGVAL(R200_PP_CNTL_X, 0); + OUT_BATCH_REGVAL(R200_PP_TXMULTI_CTL_0, 0); + OUT_BATCH_REGVAL(R200_PP_TXCBLEND_0, (R200_TXC_ARG_A_ZERO | + R200_TXC_ARG_B_ZERO | + R200_TXC_ARG_C_R0_COLOR | + R200_TXC_OP_MADD)); + OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_0, R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); + OUT_BATCH_REGVAL(R200_PP_TXABLEND_0, (R200_TXA_ARG_A_ZERO | + R200_TXA_ARG_B_ZERO | + R200_TXA_ARG_C_R0_ALPHA | + R200_TXA_OP_MADD)); + OUT_BATCH_REGVAL(R200_PP_TXABLEND2_0, R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); + OUT_BATCH_REGVAL(R200_PP_TXFILTER_0, (R200_CLAMP_S_CLAMP_LAST | + R200_CLAMP_T_CLAMP_LAST | + R200_MAG_FILTER_NEAREST | + R200_MIN_FILTER_NEAREST)); + OUT_BATCH_REGVAL(R200_PP_TXFORMAT_0, txformat); + OUT_BATCH_REGVAL(R200_PP_TXFORMAT_X_0, 0); + OUT_BATCH_REGVAL(R200_PP_TXSIZE_0, ((width - 1) | + ((height - 1) << RADEON_TEX_VSIZE_SHIFT))); + OUT_BATCH_REGVAL(R200_PP_TXPITCH_0, pitch * _mesa_get_format_bytes(mesa_format) - 32); + + OUT_BATCH_REGSEQ(R200_PP_TXOFFSET_0, 1); + OUT_BATCH_RELOC(0, bo, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + + END_BATCH(); +} + +static inline void emit_cb_setup(struct r200_context *r200, + struct radeon_bo *bo, + intptr_t offset, + gl_format mesa_format, + unsigned pitch, + unsigned width, + unsigned height) +{ + uint32_t dst_pitch = pitch; + uint32_t dst_format = 0; + BATCH_LOCALS(&r200->radeon); + + /* XXX others? BE/LE? */ + switch (mesa_format) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + dst_format = RADEON_COLOR_FORMAT_ARGB8888; + break; + case MESA_FORMAT_RGB565: + dst_format = RADEON_COLOR_FORMAT_RGB565; + break; + case MESA_FORMAT_ARGB4444: + dst_format = RADEON_COLOR_FORMAT_ARGB4444; + break; + case MESA_FORMAT_ARGB1555: + dst_format = RADEON_COLOR_FORMAT_ARGB1555; + break; + case MESA_FORMAT_A8: + dst_format = RADEON_COLOR_FORMAT_RGB8; + break; + default: + break; + } + + BEGIN_BATCH_NO_AUTOSTATE(22); + OUT_BATCH_REGVAL(R200_RE_AUX_SCISSOR_CNTL, 0); + OUT_BATCH_REGVAL(R200_RE_CNTL, 0); + OUT_BATCH_REGVAL(RADEON_RE_TOP_LEFT, 0); + OUT_BATCH_REGVAL(RADEON_RE_WIDTH_HEIGHT, ((width << RADEON_RE_WIDTH_SHIFT) | + (height << RADEON_RE_HEIGHT_SHIFT))); + OUT_BATCH_REGVAL(RADEON_RB3D_PLANEMASK, 0xffffffff); + OUT_BATCH_REGVAL(RADEON_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); + OUT_BATCH_REGVAL(RADEON_RB3D_CNTL, dst_format); + + OUT_BATCH_REGSEQ(RADEON_RB3D_COLOROFFSET, 1); + OUT_BATCH_RELOC(0, bo, 0, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_REGSEQ(RADEON_RB3D_COLORPITCH, 1); + OUT_BATCH_RELOC(dst_pitch, bo, dst_pitch, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0); + + END_BATCH(); +} + +static GLboolean validate_buffers(struct r200_context *r200, + struct radeon_bo *src_bo, + struct radeon_bo *dst_bo) +{ + int ret; + + radeon_cs_space_reset_bos(r200->radeon.cmdbuf.cs); + + ret = radeon_cs_space_check_with_bo(r200->radeon.cmdbuf.cs, + src_bo, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + return GL_FALSE; + + ret = radeon_cs_space_check_with_bo(r200->radeon.cmdbuf.cs, + dst_bo, 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT); + if (ret) + return GL_FALSE; + + return GL_TRUE; +} + +/** + * Calculate texcoords for given image region. + * Output values are [minx, maxx, miny, maxy] + */ +static inline void calc_tex_coords(float img_width, float img_height, + float x, float y, + float reg_width, float reg_height, + unsigned flip_y, float *buf) +{ + buf[0] = x / img_width; + buf[1] = buf[0] + reg_width / img_width; + buf[2] = y / img_height; + buf[3] = buf[2] + reg_height / img_height; + if (flip_y) + { + buf[2] = 1.0 - buf[2]; + buf[3] = 1.0 - buf[3]; + } +} + +static inline void emit_draw_packet(struct r200_context *r200, + unsigned src_width, unsigned src_height, + unsigned src_x_offset, unsigned src_y_offset, + unsigned dst_x_offset, unsigned dst_y_offset, + unsigned reg_width, unsigned reg_height, + unsigned flip_y) +{ + float texcoords[4]; + float verts[12]; + BATCH_LOCALS(&r200->radeon); + + calc_tex_coords(src_width, src_height, + src_x_offset, src_y_offset, + reg_width, reg_height, + flip_y, texcoords); + + verts[0] = dst_x_offset; + verts[1] = dst_y_offset + reg_height; + verts[2] = texcoords[0]; + verts[3] = texcoords[3]; + + verts[4] = dst_x_offset + reg_width; + verts[5] = dst_y_offset + reg_height; + verts[6] = texcoords[1]; + verts[7] = texcoords[3]; + + verts[8] = dst_x_offset + reg_width; + verts[9] = dst_y_offset; + verts[10] = texcoords[1]; + verts[11] = texcoords[2]; + + BEGIN_BATCH(14); + OUT_BATCH(R200_CP_CMD_3D_DRAW_IMMD_2 | (12 << 16)); + OUT_BATCH(RADEON_CP_VC_CNTL_PRIM_WALK_RING | + RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | + (3 << 16)); + OUT_BATCH_TABLE(verts, 12); + END_BATCH(); +} + +/** + * Copy a region of [@a width x @a height] pixels from source buffer + * to destination buffer. + * @param[in] r200 r200 context + * @param[in] src_bo source radeon buffer object + * @param[in] src_offset offset of the source image in the @a src_bo + * @param[in] src_mesaformat source image format + * @param[in] src_pitch aligned source image width + * @param[in] src_width source image width + * @param[in] src_height source image height + * @param[in] src_x_offset x offset in the source image + * @param[in] src_y_offset y offset in the source image + * @param[in] dst_bo destination radeon buffer object + * @param[in] dst_offset offset of the destination image in the @a dst_bo + * @param[in] dst_mesaformat destination image format + * @param[in] dst_pitch aligned destination image width + * @param[in] dst_width destination image width + * @param[in] dst_height destination image height + * @param[in] dst_x_offset x offset in the destination image + * @param[in] dst_y_offset y offset in the destination image + * @param[in] width region width + * @param[in] height region height + * @param[in] flip_y set if y coords of the source image need to be flipped + */ +unsigned r200_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned reg_width, + unsigned reg_height, + unsigned flip_y) +{ + struct r200_context *r200 = R200_CONTEXT(ctx); + + if (!r200_check_blit(dst_mesaformat)) + return GL_FALSE; + + /* Make sure that colorbuffer has even width - hw limitation */ + if (dst_pitch % 2 > 0) + ++dst_pitch; + + /* Rendering to small buffer doesn't work. + * Looks like a hw limitation. + */ + if (dst_pitch < 32) + return GL_FALSE; + + /* Need to clamp the region size to make sure + * we don't read outside of the source buffer + * or write outside of the destination buffer. + */ + if (reg_width + src_x_offset > src_width) + reg_width = src_width - src_x_offset; + if (reg_height + src_y_offset > src_height) + reg_height = src_height - src_y_offset; + if (reg_width + dst_x_offset > dst_width) + reg_width = dst_width - dst_x_offset; + if (reg_height + dst_y_offset > dst_height) + reg_height = dst_height - dst_y_offset; + + if (src_bo == dst_bo) { + return GL_FALSE; + } + + if (src_offset % 32 || dst_offset % 32) { + return GL_FALSE; + } + + if (0) { + fprintf(stderr, "src: size [%d x %d], pitch %d, " + "offset [%d x %d], format %s, bo %p\n", + src_width, src_height, src_pitch, + src_x_offset, src_y_offset, + _mesa_get_format_name(src_mesaformat), + src_bo); + fprintf(stderr, "dst: pitch %d, offset[%d x %d], format %s, bo %p\n", + dst_pitch, dst_x_offset, dst_y_offset, + _mesa_get_format_name(dst_mesaformat), dst_bo); + fprintf(stderr, "region: %d x %d\n", reg_width, reg_height); + } + + /* Flush is needed to make sure that source buffer has correct data */ + radeonFlush(r200->radeon.glCtx); + + rcommonEnsureCmdBufSpace(&r200->radeon, 78, __FUNCTION__); + + if (!validate_buffers(r200, src_bo, dst_bo)) + return GL_FALSE; + + /* 14 */ + emit_vtx_state(r200); + /* 28 */ + emit_tx_setup(r200, src_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch); + /* 22 */ + emit_cb_setup(r200, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height); + /* 14 */ + emit_draw_packet(r200, src_width, src_height, + src_x_offset, src_y_offset, + dst_x_offset, dst_y_offset, + reg_width, reg_height, + flip_y); + + radeonFlush(ctx); + + return GL_TRUE; +} diff --git a/r200/r200_blit.h b/r200/r200_blit.h new file mode 100644 index 0000000..53206f0 --- /dev/null +++ b/r200/r200_blit.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef R200_BLIT_H +#define R200_BLIT_H + +void r200_blit_init(struct r200_context *r200); + +unsigned r200_check_blit(gl_format mesa_format); + +unsigned r200_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned width, + unsigned height, + unsigned flip_y); + +#endif // R200_BLIT_H diff --git a/r200/r200_cmdbuf.c b/r200/r200_cmdbuf.c index 1d1bea6..2f2b8d9 100644 --- a/r200/r200_cmdbuf.c +++ b/r200/r200_cmdbuf.c @@ -35,15 +35,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/imports.h" #include "main/macros.h" #include "main/context.h" -#include "swrast/swrast.h" #include "main/simple_list.h" #include "radeon_common.h" #include "r200_context.h" -#include "r200_state.h" #include "r200_ioctl.h" -#include "r200_tcl.h" -#include "r200_sanity.h" #include "radeon_reg.h" /* The state atoms will be emitted in the order they appear in the atom list, @@ -92,6 +88,7 @@ void r200SetUpAtomList( r200ContextPtr rmesa ) insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.pix[i] ); insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[0] ); insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[1] ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.stp ); for (i = 0; i < 8; ++i) insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i] ); for (i = 0; i < 3 + mtu; ++i) diff --git a/r200/r200_context.c b/r200/r200_context.c index 5f985d6..dad2580 100644 --- a/r200/r200_context.c +++ b/r200/r200_context.c @@ -37,10 +37,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/context.h" #include "main/simple_list.h" #include "main/imports.h" -#include "main/matrix.h" #include "main/extensions.h" -#include "main/framebuffer.h" -#include "main/state.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" @@ -58,9 +55,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_tex.h" #include "r200_swtcl.h" #include "r200_tcl.h" -#include "r200_maos.h" #include "r200_vertprog.h" #include "radeon_queryobj.h" +#include "r200_blit.h" #include "radeon_span.h" @@ -79,7 +76,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define DRIVER_DATE "20060602" -#include "vblank.h" #include "utils.h" #include "xmlpool.h" /* for symbolic values of enum-type options */ @@ -268,16 +264,18 @@ static void r200_init_vtbl(radeonContextPtr radeon) radeon->vtbl.fallback = r200Fallback; radeon->vtbl.update_scissor = r200_vtbl_update_scissor; radeon->vtbl.emit_query_finish = r200_emit_query_finish; + radeon->vtbl.check_blit = r200_check_blit; + radeon->vtbl.blit = r200_blit; } /* Create the device specific rendering context. */ GLboolean r200CreateContext( const __GLcontextModes *glVisual, - __DRIcontextPrivate *driContextPriv, + __DRIcontext *driContextPriv, void *sharedContextPrivate) { - __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; + __DRIscreen *sPriv = driContextPriv->driScreenPriv; radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private); struct dd_function_table functions; r200ContextPtr rmesa; @@ -294,6 +292,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, if ( !rmesa ) return GL_FALSE; + rmesa->radeon.radeonScreen = screen; r200_init_vtbl(&rmesa->radeon); /* init exp fog table data */ r200InitStaticFogData(); @@ -326,7 +325,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, r200InitDriverFuncs(&functions); r200InitIoctlFuncs(&functions); r200InitStateFuncs(&functions); - r200InitTextureFuncs(&functions); + r200InitTextureFuncs(&rmesa->radeon, &functions); r200InitShaderFuncs(&functions); radeonInitQueryObjFunctions(&functions); @@ -352,6 +351,8 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits; ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits; + ctx->Const.MaxCombinedTextureImageUnits = ctx->Const.MaxTextureUnits; + i = driQueryOptioni( &rmesa->radeon.optionCache, "allow_large_textures"); /* FIXME: When no memory manager is available we should set this @@ -360,6 +361,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, ctx->Const.Max3DTextureLevels = 9; ctx->Const.MaxCubeTextureLevels = 12; ctx->Const.MaxTextureRectSize = 2048; + ctx->Const.MaxRenderbufferSize = 2048; ctx->Const.MaxTextureMaxAnisotropy = 16.0; @@ -390,6 +392,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; ctx->Const.MaxDrawBuffers = 1; + ctx->Const.MaxColorAttachments = 1; _mesa_set_mvp_with_dp4( ctx, GL_TRUE ); @@ -496,7 +499,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, } -void r200DestroyContext( __DRIcontextPrivate *driContextPriv ) +void r200DestroyContext( __DRIcontext *driContextPriv ) { int i; r200ContextPtr rmesa = (r200ContextPtr)driContextPriv->driverPrivate; diff --git a/r200/r200_context.h b/r200/r200_context.h index 246f98c..a9dce31 100644 --- a/r200/r200_context.h +++ b/r200/r200_context.h @@ -636,14 +636,16 @@ struct r200_context { #define R200_CONTEXT(ctx) ((r200ContextPtr)(ctx->DriverCtx)) -extern void r200DestroyContext( __DRIcontextPrivate *driContextPriv ); +extern void r200DestroyContext( __DRIcontext *driContextPriv ); extern GLboolean r200CreateContext( const __GLcontextModes *glVisual, - __DRIcontextPrivate *driContextPriv, + __DRIcontext *driContextPriv, void *sharedContextPrivate); -extern GLboolean r200MakeCurrent( __DRIcontextPrivate *driContextPriv, - __DRIdrawablePrivate *driDrawPriv, - __DRIdrawablePrivate *driReadPriv ); -extern GLboolean r200UnbindContext( __DRIcontextPrivate *driContextPriv ); +extern GLboolean r200MakeCurrent( __DRIcontext *driContextPriv, + __DRIdrawable *driDrawPriv, + __DRIdrawable *driReadPriv ); +extern GLboolean r200UnbindContext( __DRIcontext *driContextPriv ); + +extern void r200_init_texcopy_functions(struct dd_function_table *table); /* ================================================================ * Debugging: diff --git a/r200/r200_ioctl.c b/r200/r200_ioctl.c index b238adb..b72f69b 100644 --- a/r200/r200_ioctl.c +++ b/r200/r200_ioctl.c @@ -46,13 +46,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_common.h" #include "radeon_lock.h" #include "r200_context.h" -#include "r200_state.h" #include "r200_ioctl.h" -#include "r200_tcl.h" -#include "r200_sanity.h" #include "radeon_reg.h" -#include "drirenderbuffer.h" #include "vblank.h" #define R200_TIMEOUT 512 @@ -61,10 +57,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. static void r200KernelClear(GLcontext *ctx, GLuint flags) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); + __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLint cx, cy, cw, ch, ret; GLuint i; + radeonEmitState(&rmesa->radeon); + LOCK_HARDWARE( &rmesa->radeon ); /* Throttle the number of clear ioctls we do. @@ -185,7 +183,7 @@ static void r200KernelClear(GLcontext *ctx, GLuint flags) static void r200Clear( GLcontext *ctx, GLbitfield mask ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); + __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLuint flags = 0; GLuint color_mask = 0; GLuint orig_mask = mask; diff --git a/r200/r200_maos_arrays.c b/r200/r200_maos_arrays.c index 383a0c4..aecba7f 100644 --- a/r200/r200_maos_arrays.c +++ b/r200/r200_maos_arrays.c @@ -74,7 +74,7 @@ static void r200_emit_vecfog(GLcontext *ctx, struct radeon_aos *aos, GLvoid *data, int stride, int count) { radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - uint32_t *out; + GLfloat *out; int i; int size = 1; @@ -90,12 +90,14 @@ static void r200_emit_vecfog(GLcontext *ctx, struct radeon_aos *aos, aos->components = size; aos->count = count; - out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); + radeon_bo_map(aos->bo, 1); + out = (GLfloat*)((char*)aos->bo->ptr + aos->offset); for (i = 0; i < count; i++) { out[0] = r200ComputeFogBlendFactor( ctx, *(GLfloat *)data ); out++; data += stride; } + radeon_bo_unmap(aos->bo); } /* Emit any changed arrays to new GART memory, re-emit a packet to diff --git a/r200/r200_pixel.c b/r200/r200_pixel.c index 9577387..bfb7e2a 100644 --- a/r200/r200_pixel.c +++ b/r200/r200_pixel.c @@ -88,10 +88,10 @@ check_color_per_fragment_ops( const GLcontext *ctx ) ctx->Fog.Enabled || ctx->Scissor.Enabled || ctx->Stencil._Enabled || - !ctx->Color.ColorMask[0] || - !ctx->Color.ColorMask[1] || - !ctx->Color.ColorMask[2] || - !ctx->Color.ColorMask[3] || + !ctx->Color.ColorMask[0][0] || + !ctx->Color.ColorMask[0][1] || + !ctx->Color.ColorMask[0][2] || + !ctx->Color.ColorMask[0][3] || ctx->Color.ColorLogicOpEnabled || ctx->Texture._EnabledUnits ) && @@ -214,7 +214,7 @@ r200TryReadPixels( GLcontext *ctx, } { - __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; + __DRIdrawable *dPriv = rmesa->radeon.dri.drawable; driRenderbuffer *drb = (driRenderbuffer *) ctx->ReadBuffer->_ColorReadBuffer; int nbox = dPriv->numClipRects; int src_offset = drb->offset @@ -298,7 +298,7 @@ static void do_draw_pix( GLcontext *ctx, #if 0 r200ContextPtr rmesa = R200_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); + __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); drm_clip_rect_t *box = dPriv->pClipRects; struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorDrawBuffers[0]; driRenderbuffer *drb = (driRenderbuffer *) rb; @@ -400,10 +400,10 @@ r200TryDrawPixels( GLcontext *ctx, case GL_RGBA: case GL_BGRA: planemask = radeonPackColor(cpp, - ctx->Color.ColorMask[RCOMP], - ctx->Color.ColorMask[GCOMP], - ctx->Color.ColorMask[BCOMP], - ctx->Color.ColorMask[ACOMP]); + ctx->Color.ColorMask[0][RCOMP], + ctx->Color.ColorMask[0][GCOMP], + ctx->Color.ColorMask[0][BCOMP], + ctx->Color.ColorMask[0][ACOMP]); if (cpp == 2) planemask |= planemask << 16; diff --git a/r200/r200_reg.h b/r200/r200_reg.h index 526a624..b3a4940 100644 --- a/r200/r200_reg.h +++ b/r200/r200_reg.h @@ -690,7 +690,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # define R200_PVS_CNTL_1_PROGRAM_START_SHIFT 0 # define R200_PVS_CNTL_1_POS_END_SHIFT 10 # define R200_PVS_CNTL_1_PROGRAM_END_SHIFT 20 -/* Addresses are relative the the vertex program parameters area. */ +/* Addresses are relative to the vertex program parameters area. */ #define R200_VAP_PVS_CNTL_2 0x22d4 # define R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT 0 # define R200_PVS_CNTL_2_PARAM_COUNT_SHIFT 16 @@ -938,7 +938,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define R200_CLAMP_Q_CLAMP_GL (6 << 9) #define R200_CLAMP_Q_MIRROR_CLAMP_GL (7 << 9) #define R200_CLAMP_Q_MASK (7 << 9) -#define R200_MIN_MIP_LEVEL_MASK (0xff << 12) +#define R200_MIN_MIP_LEVEL_MASK (0x0f << 12) #define R200_MIN_MIP_LEVEL_SHIFT 12 #define R200_TEXCOORD_NONPROJ (0 << 16) #define R200_TEXCOORD_CUBIC_ENV (1 << 16) @@ -950,6 +950,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define R200_TEXCOORD_ZERO (7 << 16) #define R200_TEXCOORD_MASK (7 << 16) #define R200_LOD_BIAS_MASK (0xfff80000) +#define R200_LOD_BIAS_FIXED_ONE (0x08000000) +#define R200_LOD_BIAS_CORRECTION (0x00600000) #define R200_LOD_BIAS_SHIFT 19 #define R200_PP_TXSIZE_0 0x2c0c /* NPOT only */ #define R200_PP_TX_WIDTHMASK_SHIFT 0 diff --git a/r200/r200_sanity.c b/r200/r200_sanity.c index 1241a92..a439fd8 100644 --- a/r200/r200_sanity.c +++ b/r200/r200_sanity.c @@ -38,7 +38,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/imports.h" #include "r200_context.h" -#include "r200_ioctl.h" #include "r200_sanity.h" #include "radeon_reg.h" #include "r200_reg.h" diff --git a/r200/r200_state.c b/r200/r200_state.c index 6d99c03..050e5aa 100644 --- a/r200/r200_state.c +++ b/r200/r200_state.c @@ -57,8 +57,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_swtcl.h" #include "r200_vertprog.h" -#include "drirenderbuffer.h" - /* ============================================================= * Alpha blending @@ -597,6 +595,13 @@ static void r200PointSize( GLcontext *ctx, GLfloat size ) r200ContextPtr rmesa = R200_CONTEXT(ctx); GLfloat *fcmd = (GLfloat *)rmesa->hw.ptp.cmd; + radeon_print(RADEON_STATE, RADEON_TRACE, + "%s(%p) size: %f, fixed point result: %d.%d (%d/16)\n", + __func__, ctx, size, + ((GLuint)(ctx->Point.Size * 16.0))/16, + (((GLuint)(ctx->Point.Size * 16.0))&15)*100/16, + ((GLuint)(ctx->Point.Size * 16.0))&15); + R200_STATECHANGE( rmesa, cst ); R200_STATECHANGE( rmesa, ptp ); rmesa->hw.cst.cmd[CST_RE_POINTSIZE] &= ~0xffff; @@ -721,10 +726,10 @@ static void r200ColorMask( GLcontext *ctx, if (!rrb) return; mask = radeonPackColor( rrb->cpp, - ctx->Color.ColorMask[RCOMP], - ctx->Color.ColorMask[GCOMP], - ctx->Color.ColorMask[BCOMP], - ctx->Color.ColorMask[ACOMP] ); + ctx->Color.ColorMask[0][RCOMP], + ctx->Color.ColorMask[0][GCOMP], + ctx->Color.ColorMask[0][BCOMP], + ctx->Color.ColorMask[0][ACOMP] ); if (!(r && g && b && a)) @@ -1585,7 +1590,7 @@ static void r200ClearStencil( GLcontext *ctx, GLint s ) void r200UpdateWindow( GLcontext *ctx ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); + __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0; GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0; const GLfloat *v = ctx->Viewport._WindowMap.m; @@ -1665,7 +1670,7 @@ static void r200DepthRange( GLcontext *ctx, GLclampd nearval, void r200UpdateViewportOffset( GLcontext *ctx ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); + __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLfloat xoffset = (GLfloat)dPriv->x; GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h; const GLfloat *v = ctx->Viewport._WindowMap.m; @@ -2466,6 +2471,12 @@ static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask ) radeon_firevertices(&r200->radeon); + radeon_print(RADEON_STATE, RADEON_TRACE, + "%s(%p) first 32 bits are %x.\n", + __func__, + ctx, + *(uint32_t*)mask); + R200_STATECHANGE(r200, stp); /* Must flip pattern upside down. @@ -2490,7 +2501,6 @@ void r200InitStateFuncs( struct dd_function_table *functions ) functions->BlendFuncSeparate = r200BlendFuncSeparate; functions->ClearColor = r200ClearColor; functions->ClearDepth = r200ClearDepth; - functions->ClearIndex = NULL; functions->ClearStencil = r200ClearStencil; functions->ClipPlane = r200ClipPlane; functions->ColorMask = r200ColorMask; @@ -2502,7 +2512,6 @@ void r200InitStateFuncs( struct dd_function_table *functions ) functions->Fogfv = r200Fogfv; functions->FrontFace = r200FrontFace; functions->Hint = NULL; - functions->IndexMask = NULL; functions->LightModelfv = r200LightModelfv; functions->Lightfv = r200Lightfv; functions->LineStipple = r200LineStipple; diff --git a/r200/r200_state_init.c b/r200/r200_state_init.c index 6c5a0b7..1606553 100644 --- a/r200/r200_state_init.c +++ b/r200/r200_state_init.c @@ -39,7 +39,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "swrast/swrast.h" #include "vbo/vbo.h" -#include "tnl/tnl.h" #include "tnl/t_pipeline.h" #include "swrast_setup/swrast_setup.h" @@ -48,9 +47,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_context.h" #include "r200_ioctl.h" #include "r200_state.h" -#include "r200_tcl.h" -#include "r200_tex.h" -#include "r200_swtcl.h" #include "radeon_queryobj.h" #include "xmlpool.h" @@ -351,6 +347,15 @@ static int check_rrb(GLcontext *ctx, struct radeon_state_atom *atom) return atom->cmd_size; } +static int check_polygon_stipple(GLcontext *ctx, + struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + if (r200->hw.set.cmd[SET_RE_CNTL] & R200_STIPPLE_ENABLE) + return atom->cmd_size; + return 0; +} + static void mtl_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); @@ -698,7 +703,8 @@ static void tex_emit_mm(GLcontext *ctx, struct radeon_state_atom *atom) uint32_t dwords = atom->check(ctx, atom); int i = atom->idx; radeonTexObj *t = r200->state.texture.unit[i].texobj; - if (!r200->state.texture.unit[i].unitneeded) + + if (!r200->state.texture.unit[i].unitneeded && !(dwords <= atom->cmd_size)) dwords -= 4; BEGIN_BATCH_NO_AUTOSTATE(dwords); @@ -888,7 +894,10 @@ void r200InitState( r200ContextPtr rmesa ) } } - ALLOC_STATE( stp, always, STP_STATE_SIZE, "STP/stp", 0 ); + if (rmesa->radeon.radeonScreen->kernel_mm) + ALLOC_STATE( stp, polygon_stipple, STP_STATE_SIZE, "STP/stp", 0 ); + else + ALLOC_STATE( stp, never, STP_STATE_SIZE, "STP/stp", 0 ); for (i = 0; i < 6; i++) if (rmesa->radeon.radeonScreen->kernel_mm) @@ -1380,7 +1389,7 @@ void r200InitState( r200ContextPtr rmesa ) rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0; rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT_X] = (/* R200_TEXCOORD_PROJ | */ - 0x100000); /* Small default bias */ + R200_LOD_BIAS_CORRECTION); /* Small default bias */ if (rmesa->radeon.radeonScreen->drmSupportsFragShader) { rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_NEWDRM] = rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; diff --git a/r200/r200_swtcl.c b/r200/r200_swtcl.c index 240fb45..262fe3c 100644 --- a/r200/r200_swtcl.c +++ b/r200/r200_swtcl.c @@ -44,7 +44,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "swrast/s_context.h" #include "swrast/s_fog.h" #include "swrast_setup/swrast_setup.h" -#include "math/m_translate.h" #include "tnl/tnl.h" #include "tnl/t_context.h" #include "tnl/t_pipeline.h" @@ -168,7 +167,7 @@ static void r200SetVertexFormat( GLcontext *ctx ) for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { - GLuint sz = VB->TexCoordPtr[i]->size; + GLuint sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size; fmt_1 |= sz << (3 * i); EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_1F + sz - 1, 0 ); @@ -297,7 +296,7 @@ void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset) radeonEmitState(&rmesa->radeon); r200EmitVertexAOS( rmesa, rmesa->radeon.swtcl.vertex_size, - first_elem(&rmesa->radeon.dma.reserved)->bo, + rmesa->radeon.swtcl.bo, current_offset); @@ -421,7 +420,6 @@ static struct { #define DO_POINTS 1 #define DO_FULL_QUAD 1 -#define HAVE_RGBA 1 #define HAVE_SPEC 1 #define HAVE_BACK_COLORS 0 #define HAVE_HW_FLATSHADE 1 diff --git a/r200/r200_tcl.c b/r200/r200_tcl.c index e7d48a7..f3f558b 100644 --- a/r200/r200_tcl.c +++ b/r200/r200_tcl.c @@ -46,7 +46,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_context.h" #include "r200_state.h" #include "r200_ioctl.h" -#include "r200_tex.h" #include "r200_tcl.h" #include "r200_swtcl.h" #include "r200_maos.h" diff --git a/r200/r200_tex.c b/r200/r200_tex.c index a417721..6723b12 100644 --- a/r200/r200_tex.c +++ b/r200/r200_tex.c @@ -44,9 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_mipmap_tree.h" #include "r200_context.h" -#include "r200_state.h" #include "r200_ioctl.h" -#include "r200_swtcl.h" #include "r200_tex.h" #include "xmlpool.h" @@ -67,6 +65,13 @@ static void r200SetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap, GLenu GLboolean is_clamp_to_border = GL_FALSE; struct gl_texture_object *tObj = &t->base; + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s(tex %p) sw %s, tw %s, rw %s\n", + __func__, t, + _mesa_lookup_enum_by_nr(swrap), + _mesa_lookup_enum_by_nr(twrap), + _mesa_lookup_enum_by_nr(rwrap)); + t->pp_txfilter &= ~(R200_CLAMP_S_MASK | R200_CLAMP_T_MASK | R200_BORDER_MODE_D3D); switch ( swrap ) { @@ -182,6 +187,9 @@ static void r200SetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap, GLenu static void r200SetTexMaxAnisotropy( radeonTexObjPtr t, GLfloat max ) { t->pp_txfilter &= ~R200_MAX_ANISO_MASK; + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s(tex %p) max %f.\n", + __func__, t, max); if ( max <= 1.0 ) { t->pp_txfilter |= R200_MAX_ANISO_1_TO_1; @@ -214,6 +222,13 @@ static void r200SetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf ) t->pp_txfilter &= ~(R200_MIN_FILTER_MASK | R200_MAG_FILTER_MASK); t->pp_txformat_x &= ~R200_VOLUME_FILTER_MASK; + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s(tex %p) minf %s, maxf %s, anisotropy %d.\n", + __func__, t, + _mesa_lookup_enum_by_nr(minf), + _mesa_lookup_enum_by_nr(magf), + anisotropy); + if ( anisotropy == R200_MAX_ANISO_1_TO_1 ) { switch ( minf ) { case GL_NEAREST: @@ -286,10 +301,8 @@ static void r200TexEnv( GLcontext *ctx, GLenum target, GLuint unit = ctx->Texture.CurrentUnit; struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - if ( R200_DEBUG & RADEON_STATE ) { - fprintf( stderr, "%s( %s )\n", + radeon_print(RADEON_TEXTURE | RADEON_STATE, RADEON_VERBOSE, "%s( %s )\n", __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) ); - } /* This is incorrect: Need to maintain this data for each of * GL_TEXTURE_{123}D, GL_TEXTURE_RECTANGLE_NV, etc, and switch @@ -311,18 +324,19 @@ static void r200TexEnv( GLcontext *ctx, GLenum target, case GL_TEXTURE_LOD_BIAS_EXT: { GLfloat bias, min; GLuint b; - const int fixed_one = 0x8000000; + const int fixed_one = R200_LOD_BIAS_FIXED_ONE; /* The R200's LOD bias is a signed 2's complement value with a * range of -16.0 <= bias < 16.0. * * NOTE: Add a small bias to the bias for conform mipsel.c test. */ - bias = *param + .01; + bias = *param; min = driQueryOptionb (&rmesa->radeon.optionCache, "no_neg_lod_bias") ? 0.0 : -16.0; bias = CLAMP( bias, min, 16.0 ); - b = (int)(bias * fixed_one) & R200_LOD_BIAS_MASK; + b = ((int)(bias * fixed_one) + + R200_LOD_BIAS_CORRECTION) & R200_LOD_BIAS_MASK; if ( (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT_X] & R200_LOD_BIAS_MASK) != b ) { R200_STATECHANGE( rmesa, tex[unit] ); @@ -358,10 +372,11 @@ static void r200TexParameter( GLcontext *ctx, GLenum target, { radeonTexObj* t = radeon_tex_obj(texObj); - if ( R200_DEBUG & (RADEON_STATE|RADEON_TEXTURE) ) { - fprintf( stderr, "%s( %s )\n", __FUNCTION__, + radeon_print(RADEON_TEXTURE | RADEON_STATE, RADEON_VERBOSE, + "%s(%p, tex %p) target %s, pname %s\n", + __FUNCTION__, ctx, texObj, + _mesa_lookup_enum_by_nr( target ), _mesa_lookup_enum_by_nr( pname ) ); - } switch ( pname ) { case GL_TEXTURE_MIN_FILTER: @@ -378,7 +393,7 @@ static void r200TexParameter( GLcontext *ctx, GLenum target, break; case GL_TEXTURE_BORDER_COLOR: - r200SetTexBorderColor( t, texObj->BorderColor ); + r200SetTexBorderColor( t, texObj->BorderColor.f ); break; case GL_TEXTURE_BASE_LEVEL: @@ -399,11 +414,10 @@ static void r200DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) r200ContextPtr rmesa = R200_CONTEXT(ctx); radeonTexObj* t = radeon_tex_obj(texObj); - if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) { - fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, - (void *)texObj, - _mesa_lookup_enum_by_nr(texObj->Target)); - } + radeon_print(RADEON_TEXTURE | RADEON_STATE, RADEON_NORMAL, + "%s( %p (target = %s) )\n", __FUNCTION__, + (void *)texObj, + _mesa_lookup_enum_by_nr(texObj->Target)); if (rmesa) { int i; @@ -458,10 +472,10 @@ static struct gl_texture_object *r200NewTextureObject(GLcontext * ctx, radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj); - if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) { - fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, - t, _mesa_lookup_enum_by_nr(target)); - } + radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_NORMAL, + "%s(%p) target %s, new texture %p.\n", + __FUNCTION__, ctx, + _mesa_lookup_enum_by_nr(target), t); _mesa_initialize_texture_object(&t->base, name, target); t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy; @@ -470,14 +484,14 @@ static struct gl_texture_object *r200NewTextureObject(GLcontext * ctx, r200SetTexWrap( t, t->base.WrapS, t->base.WrapT, t->base.WrapR ); r200SetTexMaxAnisotropy( t, t->base.MaxAnisotropy ); r200SetTexFilter(t, t->base.MinFilter, t->base.MagFilter); - r200SetTexBorderColor(t, t->base.BorderColor); + r200SetTexBorderColor(t, t->base.BorderColor.f); return &t->base; } -void r200InitTextureFuncs( struct dd_function_table *functions ) +void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions ) { /* Note: we only plug in the functions we implement in the driver * since _mesa_init_driver_functions() was already called. @@ -511,6 +525,11 @@ void r200InitTextureFuncs( struct dd_function_table *functions ) functions->CompressedTexImage2D = radeonCompressedTexImage2D; functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; + if (radeon->radeonScreen->kernel_mm) { + functions->CopyTexImage2D = radeonCopyTexImage2D; + functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; + } + functions->GenerateMipmap = radeonGenerateMipmap; functions->NewTextureImage = radeonNewTextureImage; diff --git a/r200/r200_tex.h b/r200/r200_tex.h index e122de6..1a1e703 100644 --- a/r200/r200_tex.h +++ b/r200/r200_tex.h @@ -48,7 +48,7 @@ extern int r200UploadTexImages( r200ContextPtr rmesa, radeonTexObjPtr t, GLuint extern void r200DestroyTexObj( r200ContextPtr rmesa, radeonTexObjPtr t ); -extern void r200InitTextureFuncs( struct dd_function_table *functions ); +extern void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions ); extern void r200UpdateFragmentShader( GLcontext *ctx ); diff --git a/r200/r200_texstate.c b/r200/r200_texstate.c index 7782404..9ccf30c 100644 --- a/r200/r200_texstate.c +++ b/r200/r200_texstate.c @@ -764,7 +764,7 @@ void r200SetTexOffset(__DRIcontext * pDRICtx, GLint texname, } } -void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format, +void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format, __DRIdrawable *dPriv) { struct gl_texture_unit *texUnit; @@ -781,7 +781,7 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo type = GL_BGRA; format = GL_UNSIGNED_BYTE; - internalFormat = (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT ? 3 : 4); + internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); radeon = pDRICtx->driverPrivate; rmesa = pDRICtx->driverPrivate; @@ -797,24 +797,13 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo return; } - radeon_update_renderbuffers(pDRICtx, dPriv); - /* back & depth buffer are useless free them right away */ - rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } - rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } + radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE); rb = rfb->color_rb[0]; if (rb->bo == NULL) { /* Failed to BO for the buffer */ return; } - + _mesa_lock_texture(radeon->glCtx, texObj); if (t->bo) { radeon_bo_unref(t->bo); @@ -843,7 +832,7 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo pitch_val = rb->pitch; switch (rb->cpp) { case 4: - if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT) + if (texture_format == __DRI_TEXTURE_FORMAT_RGB) t->pp_txformat = tx_table_le[MESA_FORMAT_RGB888].format; else t->pp_txformat = tx_table_le[MESA_FORMAT_ARGB8888].format; @@ -873,7 +862,7 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo void r200SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) { - r200SetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv); + r200SetTexBuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv); } @@ -1066,6 +1055,7 @@ static GLboolean r200UpdateAllTexEnv( GLcontext *ctx ) #define TEXOBJ_TXFORMAT_X_MASK (R200_DEPTH_LOG2_MASK | \ R200_TEXCOORD_MASK | \ + R200_MIN_MIP_LEVEL_MASK | \ R200_CLAMP_Q_MASK | \ R200_VOLUME_FILTER_MASK) @@ -1421,6 +1411,7 @@ static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t) { const struct gl_texture_image *firstImage = t->base.Image[0][t->minLod]; GLint log2Width, log2Height, log2Depth, texelBytes; + uint extra_size = 0; if ( t->bo ) { return; @@ -1431,6 +1422,10 @@ static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t) log2Depth = firstImage->DepthLog2; texelBytes = _mesa_get_format_bytes(firstImage->TexFormat); + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s(%p, tex %p) log2(w %d, h %d, d %d), texelBytes %d. format %d\n", + __func__, rmesa, t, log2Width, log2Height, + log2Depth, texelBytes, firstImage->TexFormat); if (!t->image_override) { if (VALID_FORMAT(firstImage->TexFormat)) { @@ -1443,6 +1438,8 @@ static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t) t->pp_txformat |= table[ firstImage->TexFormat ].format; t->pp_txfilter |= table[ firstImage->TexFormat ].filter; + + } else { _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__); @@ -1451,19 +1448,34 @@ static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t) } t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK; - t->pp_txfilter |= (t->maxLod - t->minLod) << R200_MAX_MIP_LEVEL_SHIFT; - + t->pp_txfilter |= ((t->maxLod) << R200_MAX_MIP_LEVEL_SHIFT) + & R200_MAX_MIP_LEVEL_MASK; + + if ( t->pp_txfilter & + (R200_MIN_FILTER_NEAREST_MIP_NEAREST + | R200_MIN_FILTER_NEAREST_MIP_LINEAR + | R200_MIN_FILTER_LINEAR_MIP_NEAREST + | R200_MIN_FILTER_LINEAR_MIP_LINEAR + | R200_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST + | R200_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR)) + extra_size = t->minLod; + t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK | R200_TXFORMAT_HEIGHT_MASK | R200_TXFORMAT_CUBIC_MAP_ENABLE | R200_TXFORMAT_F5_WIDTH_MASK | R200_TXFORMAT_F5_HEIGHT_MASK); - t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) | - (log2Height << R200_TXFORMAT_HEIGHT_SHIFT)); + t->pp_txformat |= (((log2Width + extra_size) << R200_TXFORMAT_WIDTH_SHIFT) | + ((log2Height + extra_size)<< R200_TXFORMAT_HEIGHT_SHIFT)); t->tile_bits = 0; - t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK); + t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK + | R200_MIN_MIP_LEVEL_MASK); + + t->pp_txformat_x |= (t->minLod << R200_MIN_MIP_LEVEL_SHIFT) + & R200_MIN_MIP_LEVEL_MASK; + if (t->base.Target == GL_TEXTURE_3D) { t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT); t->pp_txformat_x |= R200_TEXCOORD_VOLUME; @@ -1491,7 +1503,7 @@ static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t) */ t->pp_txformat_x |= R200_TEXCOORD_PROJ; } - + /* FIXME: NPOT sizes, Is it correct realy? */ t->pp_txsize = (((firstImage->Width - 1) << R200_PP_TX_WIDTHMASK_SHIFT) | ((firstImage->Height - 1) << R200_PP_TX_HEIGHTMASK_SHIFT)); diff --git a/r200/r200_vertprog.c b/r200/r200_vertprog.c index 11405d7..12f869d 100644 --- a/r200/r200_vertprog.c +++ b/r200/r200_vertprog.c @@ -437,7 +437,7 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte (1 << VERT_RESULT_TEX2) | (1 << VERT_RESULT_TEX3) | (1 << VERT_RESULT_TEX4) | (1 << VERT_RESULT_TEX5) | (1 << VERT_RESULT_PSIZ))) != 0) { if (R200_DEBUG & RADEON_FALLBACKS) { - fprintf(stderr, "can't handle vert prog outputs 0x%x\n", + fprintf(stderr, "can't handle vert prog outputs 0x%llx\n", mesa_vp->Base.OutputsWritten); } return GL_FALSE; @@ -1218,7 +1218,7 @@ r200DeleteProgram(GLcontext *ctx, struct gl_program *prog) _mesa_delete_program(ctx, prog); } -static void +static GLboolean r200ProgramStringNotify(GLcontext *ctx, GLenum target, struct gl_program *prog) { struct r200_vertex_program *vp = (void *)prog; @@ -1237,7 +1237,10 @@ r200ProgramStringNotify(GLcontext *ctx, GLenum target, struct gl_program *prog) break; } /* need this for tcl fallbacks */ - _tnl_program_string(ctx, target, prog); + (void) _tnl_program_string(ctx, target, prog); + + /* XXX check if program is legal, within limits */ + return GL_TRUE; } static GLboolean diff --git a/r300/Makefile.am b/r300/Makefile.am index 236710a..c4cf40a 100644 --- a/r300/Makefile.am +++ b/r300/Makefile.am @@ -24,10 +24,11 @@ r300_dri_la_SOURCES = \ ../radeon/radeon_span.c \ ../radeon/radeon_queryobj.c \ ../radeon/radeon_texture.c \ + ../radeon/radeon_tex_copy.c \ ../radeon/radeon_screen.c \ + r300_blit.c \ r300_context.c \ r300_draw.c \ - r300_ioctl.c \ r300_cmdbuf.c \ r300_state.c \ r300_render.c \ diff --git a/r300/compiler/SConscript b/r300/compiler/SConscript index 46075a8..46075a8 100644..100755 --- a/r300/compiler/SConscript +++ b/r300/compiler/SConscript diff --git a/r300/compiler/memory_pool.c b/r300/compiler/memory_pool.c index 37aa2b6..76c7c60 100644 --- a/r300/compiler/memory_pool.c +++ b/r300/compiler/memory_pool.c @@ -71,12 +71,14 @@ static void refill_pool(struct memory_pool * pool) void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes) { if (bytes < POOL_LARGE_ALLOC) { + void * ptr; + if (pool->head + bytes > pool->end) refill_pool(pool); assert(pool->head + bytes <= pool->end); - void * ptr = pool->head; + ptr = pool->head; pool->head += bytes; pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1)); diff --git a/r300/compiler/r300_fragprog.c b/r300/compiler/r300_fragprog.c index aa69b0f..928c15e 100644 --- a/r300/compiler/r300_fragprog.c +++ b/r300/compiler/r300_fragprog.c @@ -297,7 +297,7 @@ void r300FragmentProgramDump(struct rX00_fragment_program_code *c) if (flags[0] != 0) { sprintf(tmp, "o%i.%s", (code->alu.inst[i]. - rgb_addr >> R300_ALU_DSTC_SHIFT) & 31, + rgb_addr >> 29) & 3, flags); strcat(dstc, tmp); } @@ -311,7 +311,7 @@ void r300FragmentProgramDump(struct rX00_fragment_program_code *c) if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) { sprintf(tmp, "o%i.w ", (code->alu.inst[i]. - alpha_addr >> R300_ALU_DSTA_SHIFT) & 31); + alpha_addr >> 25) & 3); strcat(dsta, tmp); } if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) { diff --git a/r300/compiler/r300_fragprog_emit.c b/r300/compiler/r300_fragprog_emit.c index bbc0003..cc552ae 100644 --- a/r300/compiler/r300_fragprog_emit.c +++ b/r300/compiler/r300_fragprog_emit.c @@ -176,7 +176,9 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); } if (inst->RGB.OutputWriteMask) { - code->alu.inst[ip].rgb_addr |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT); + code->alu.inst[ip].rgb_addr |= + (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) | + R300_RGB_TARGET(inst->RGB.Target); emit->node_flags |= R300_RGBA_OUT; } @@ -187,7 +189,8 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i R300_ALU_DSTA_REG; } if (inst->Alpha.OutputWriteMask) { - code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT; + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT | + R300_ALPHA_TARGET(inst->Alpha.Target); emit->node_flags |= R300_RGBA_OUT; } if (inst->Alpha.DepthWriteMask) { @@ -329,14 +332,13 @@ void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi { struct r300_emit_state emit; struct r300_fragment_program_code *code = &compiler->code->code.r300; - struct rc_instruction * inst; memset(&emit, 0, sizeof(emit)); emit.compiler = compiler; memset(code, 0, sizeof(struct r300_fragment_program_code)); - for(inst = compiler->Base.Program.Instructions.Next; + for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; inst = inst->Next) { if (inst->Type == RC_INSTRUCTION_NORMAL) { diff --git a/r300/compiler/r3xx_fragprog.c b/r300/compiler/r3xx_fragprog.c index 5581f25..c2d5dc2 100644 --- a/r300/compiler/r3xx_fragprog.c +++ b/r300/compiler/r3xx_fragprog.c @@ -35,7 +35,10 @@ static void dataflow_outputs_mark_use(void * userdata, void * data, void (*callback)(void *, unsigned int, unsigned int)) { struct r300_fragment_program_compiler * c = userdata; - callback(data, c->OutputColor, RC_MASK_XYZW); + callback(data, c->OutputColor[0], RC_MASK_XYZW); + callback(data, c->OutputColor[1], RC_MASK_XYZW); + callback(data, c->OutputColor[2], RC_MASK_XYZW); + callback(data, c->OutputColor[3], RC_MASK_XYZW); callback(data, c->OutputDepth, RC_MASK_W); } diff --git a/r300/compiler/r500_fragprog.c b/r300/compiler/r500_fragprog.c index d87acec..b0fb8e9 100644 --- a/r300/compiler/r500_fragprog.c +++ b/r300/compiler/r500_fragprog.c @@ -295,7 +295,7 @@ static char *toswiz(int swiz_val) { case 2: return "B"; case 3: return "A"; case 4: return "0"; - case 5: return "1/2"; + case 5: return "H"; case 6: return "1"; case 7: return "U"; } diff --git a/r300/compiler/r500_fragprog_emit.c b/r300/compiler/r500_fragprog_emit.c index 2942267..710cae7 100644 --- a/r300/compiler/r500_fragprog_emit.c +++ b/r300/compiler/r500_fragprog_emit.c @@ -121,8 +121,19 @@ static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, static unsigned int fix_hw_swizzle(unsigned int swz) { - if (swz == 5) swz = 6; - if (swz == RC_SWIZZLE_UNUSED) swz = 4; + switch (swz) { + case RC_SWIZZLE_ZERO: + case RC_SWIZZLE_UNUSED: + swz = 4; + break; + case RC_SWIZZLE_HALF: + swz = 5; + break; + case RC_SWIZZLE_ONE: + swz = 6; + break; + } + return swz; } @@ -241,6 +252,9 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; + code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target); + code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target); + if (inst->WriteALUResult) { code->inst[ip].inst3 |= R500_ALU_RGB_WMASK; @@ -422,7 +436,6 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi { struct emit_state s; struct r500_fragment_program_code *code = &compiler->code->code.r500; - struct rc_instruction * inst; memset(&s, 0, sizeof(s)); s.C = &compiler->Base; @@ -432,7 +445,7 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi code->max_temp_idx = 1; code->inst_end = -1; - for (inst = compiler->Base.Program.Instructions.Next; + for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; inst = inst->Next) { if (inst->Type == RC_INSTRUCTION_NORMAL) { @@ -456,6 +469,8 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi if (compiler->Base.Error) return; + assert(code->inst_end >= 0); + if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { /* This may happen when dead-code elimination is disabled or * when most of the fragment program logic is leading to a KIL */ diff --git a/r300/compiler/radeon_code.h b/r300/compiler/radeon_code.h index 902b7cf..6d979bb 100644 --- a/r300/compiler/radeon_code.h +++ b/r300/compiler/radeon_code.h @@ -59,7 +59,9 @@ enum { RC_STATE_SHADOW_AMBIENT = 0, RC_STATE_R300_WINDOW_DIMENSION, - RC_STATE_R300_TEXRECT_FACTOR + RC_STATE_R300_TEXRECT_FACTOR, + RC_STATE_R300_VIEWPORT_SCALE, + RC_STATE_R300_VIEWPORT_OFFSET }; struct rc_constant { diff --git a/r300/compiler/radeon_compiler.c b/r300/compiler/radeon_compiler.c index c0e7a7f..272f907 100644 --- a/r300/compiler/radeon_compiler.c +++ b/r300/compiler/radeon_compiler.c @@ -229,15 +229,20 @@ void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_ou /** * Introduce standard code fragment to deal with fragment.position. */ -void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input) +void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, + int full_vtransform) { unsigned tempregi = rc_find_free_temporary(c); + struct rc_instruction * inst_rcp; + struct rc_instruction * inst_mul; + struct rc_instruction * inst_mad; + struct rc_instruction * inst; c->Program.InputsRead &= ~(1 << wpos); c->Program.InputsRead |= 1 << new_input; /* perspective divide */ - struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); + inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); inst_rcp->U.I.Opcode = RC_OPCODE_RCP; inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; @@ -248,7 +253,7 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig inst_rcp->U.I.SrcReg[0].Index = new_input; inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; - struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst_rcp); + inst_mul = rc_insert_new_instruction(c, inst_rcp); inst_mul->U.I.Opcode = RC_OPCODE_MUL; inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; @@ -263,7 +268,7 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; /* viewport transformation */ - struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_mul); + inst_mad = rc_insert_new_instruction(c, inst_mul); inst_mad->U.I.Opcode = RC_OPCODE_MAD; inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; @@ -275,14 +280,19 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig inst_mad->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; - inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); inst_mad->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT; - inst_mad->U.I.SrcReg[2].Index = inst_mad->U.I.SrcReg[1].Index; inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); - struct rc_instruction * inst; + if (full_vtransform) { + inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0); + inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0); + } else { + inst_mad->U.I.SrcReg[1].Index = + inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); + } + for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned i; diff --git a/r300/compiler/radeon_compiler.h b/r300/compiler/radeon_compiler.h index 87a732c..61333af 100644 --- a/r300/compiler/radeon_compiler.h +++ b/r300/compiler/radeon_compiler.h @@ -23,6 +23,8 @@ #ifndef RADEON_COMPILER_H #define RADEON_COMPILER_H +#include "main/compiler.h" + #include "memory_pool.h" #include "radeon_code.h" #include "radeon_program.h" @@ -73,15 +75,18 @@ void rc_calculate_inputs_outputs(struct radeon_compiler * c); void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input); void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask); void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output); -void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input); +void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, + int full_vtransform); struct r300_fragment_program_compiler { struct radeon_compiler Base; struct rX00_fragment_program_code *code; struct r300_fragment_program_external_state state; unsigned is_r500; + /* Register corresponding to the depthbuffer. */ unsigned OutputDepth; - unsigned OutputColor; + /* Registers corresponding to the four colorbuffers. */ + unsigned OutputColor[4]; void * UserData; void (*AllocateHwInputs)( diff --git a/r300/compiler/radeon_dataflow.c b/r300/compiler/radeon_dataflow.c index a003e77..cce9166 100644 --- a/r300/compiler/radeon_dataflow.c +++ b/r300/compiler/radeon_dataflow.c @@ -34,20 +34,19 @@ static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, { struct rc_sub_instruction * inst = &fullinst->U.I; const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); - unsigned int src; - for(src = 0; src < opcode->NumSrcRegs; ++src) { - unsigned int refmask = 0, chan; - + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + unsigned int refmask = 0; + if (inst->SrcReg[src].File == RC_FILE_NONE) return; - for(chan = 0; chan < 4; ++chan) + for(unsigned int chan = 0; chan < 4; ++chan) refmask |= 1 << GET_SWZ(inst->SrcReg[src].Swizzle, chan); refmask &= RC_MASK_XYZW; - for(chan = 0; chan < 4; ++chan) { + for(unsigned int chan = 0; chan < 4; ++chan) { if (GET_BIT(refmask, chan)) { cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, chan); } @@ -62,15 +61,12 @@ static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, v { struct rc_pair_instruction * inst = &fullinst->U.P; unsigned int refmasks[3] = { 0, 0, 0 }; - unsigned int arg, src; if (inst->RGB.Opcode != RC_OPCODE_NOP) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode); - for(arg = 0; arg < opcode->NumSrcRegs; ++arg) { - unsigned int chan; - - for(chan = 0; chan < 3; ++chan) { + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + for(unsigned int chan = 0; chan < 3; ++chan) { unsigned int swz = GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan); if (swz < 4) refmasks[inst->RGB.Arg[arg].Source] |= 1 << swz; @@ -81,16 +77,15 @@ static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, v if (inst->Alpha.Opcode != RC_OPCODE_NOP) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode); - for(arg = 0; arg < opcode->NumSrcRegs; ++arg) { + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { if (inst->Alpha.Arg[arg].Swizzle < 4) refmasks[inst->Alpha.Arg[arg].Source] |= 1 << inst->Alpha.Arg[arg].Swizzle; } } - for(src = 0; src < 3; ++src) { + for(unsigned int src = 0; src < 3; ++src) { if (inst->RGB.Src[src].Used) { - unsigned int chan; - for(chan = 0; chan < 3; ++chan) { + for(unsigned int chan = 0; chan < 3; ++chan) { if (GET_BIT(refmasks[src], chan)) cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, chan); } @@ -126,9 +121,7 @@ static void writes_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); if (opcode->HasDstReg) { - unsigned int chan; - - for(chan = 0; chan < 4; ++chan) { + for(unsigned int chan = 0; chan < 4; ++chan) { if (GET_BIT(inst->DstReg.WriteMask, chan)) cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, chan); } @@ -141,9 +134,8 @@ static void writes_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, static void writes_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) { struct rc_pair_instruction * inst = &fullinst->U.P; - unsigned int chan; - for(chan = 0; chan < 3; ++chan) { + for(unsigned int chan = 0; chan < 3; ++chan) { if (GET_BIT(inst->RGB.WriteMask, chan)) cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, chan); } diff --git a/r300/compiler/radeon_dataflow_deadcode.c b/r300/compiler/radeon_dataflow_deadcode.c index d78efa1..e0c66c4 100644 --- a/r300/compiler/radeon_dataflow_deadcode.c +++ b/r300/compiler/radeon_dataflow_deadcode.c @@ -67,14 +67,12 @@ static void or_updatemasks( struct updatemask_state * a, struct updatemask_state * b) { - unsigned int i; - - for(i = 0; i < RC_REGISTER_MAX_INDEX; ++i) { + for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) { dst->Output[i] = a->Output[i] | b->Output[i]; dst->Temporary[i] = a->Temporary[i] | b->Temporary[i]; } - for(i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i) + for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i) dst->Special[i] = a->Special[i] | b->Special[i]; dst->Address = a->Address | b->Address; @@ -138,7 +136,7 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); struct instruction_state * insts = &s->Instructions[inst->IP]; - unsigned int usedmask = 0, src; + unsigned int usedmask = 0; if (opcode->HasDstReg) { unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index); @@ -166,12 +164,12 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction unsigned int srcmasks[3]; rc_compute_sources_for_writemask(opcode, usedmask, srcmasks); - for(src = 0; src < opcode->NumSrcRegs; ++src) { - unsigned int refmask = 0, chan; + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + unsigned int refmask = 0; unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src]; insts->SrcReg[src] |= newsrcmask; - for(chan = 0; chan < 4; ++chan) { + for(unsigned int chan = 0; chan < 4; ++chan) { if (GET_BIT(newsrcmask, chan)) refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); } @@ -200,7 +198,6 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f { struct deadcode_state s; unsigned int nr_instructions; - struct rc_instruction * inst; memset(&s, 0, sizeof(s)); s.C = c; @@ -211,7 +208,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f dce(userdata, &s, &mark_output_use); - for(inst = c->Program.Instructions.Prev; + for(struct rc_instruction * inst = c->Program.Instructions.Prev; inst != &c->Program.Instructions; inst = inst->Prev) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); @@ -250,12 +247,11 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f } unsigned int ip = 0; - for(inst = c->Program.Instructions.Next; + for(struct rc_instruction * inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next, ++ip) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);\ int dead = 1; - unsigned int src, chan; if (!opcode->HasDstReg) { dead = 0; @@ -287,8 +283,8 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f rc_compute_sources_for_writemask(opcode, usemask, srcmasks); - for(src = 0; src < 3; ++src) { - for(chan = 0; chan < 4; ++chan) { + for(unsigned int src = 0; src < 3; ++src) { + for(unsigned int chan = 0; chan < 4; ++chan) { if (!GET_BIT(srcmasks[src], chan)) SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); } diff --git a/r300/compiler/radeon_dataflow_swizzles.c b/r300/compiler/radeon_dataflow_swizzles.c index d4ccd35..33acbd3 100644 --- a/r300/compiler/radeon_dataflow_swizzles.c +++ b/r300/compiler/radeon_dataflow_swizzles.c @@ -36,17 +36,17 @@ static void rewrite_source(struct radeon_compiler * c, { struct rc_swizzle_split split; unsigned int tempreg = rc_find_free_temporary(c); - unsigned int usemask, chan, phase; + unsigned int usemask; usemask = 0; - for(chan = 0; chan < 4; ++chan) { + for(unsigned int chan = 0; chan < 4; ++chan) { if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED) usemask |= 1 << chan; } c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split); - for(phase = 0; phase < split.NumPhases; ++phase) { + for(unsigned int phase = 0; phase < split.NumPhases; ++phase) { struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev); unsigned int phase_refmask; unsigned int masked_negate; @@ -58,7 +58,7 @@ static void rewrite_source(struct radeon_compiler * c, mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; phase_refmask = 0; - for(chan = 0; chan < 4; ++chan) { + for(unsigned int chan = 0; chan < 4; ++chan) { if (!GET_BIT(split.Phase[phase], chan)) SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED); else @@ -80,7 +80,7 @@ static void rewrite_source(struct radeon_compiler * c, inst->U.I.SrcReg[src].Swizzle = 0; inst->U.I.SrcReg[src].Negate = RC_MASK_NONE; inst->U.I.SrcReg[src].Abs = 0; - for(chan = 0; chan < 4; ++chan) { + for(unsigned int chan = 0; chan < 4; ++chan) { SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED); } diff --git a/r300/compiler/radeon_opcodes.c b/r300/compiler/radeon_opcodes.c index 9285748..c1c0181 100644 --- a/r300/compiler/radeon_opcodes.c +++ b/r300/compiler/radeon_opcodes.c @@ -375,8 +375,6 @@ void rc_compute_sources_for_writemask( unsigned int writemask, unsigned int *srcmasks) { - unsigned int src; - srcmasks[0] = 0; srcmasks[1] = 0; srcmasks[2] = 0; @@ -390,10 +388,10 @@ void rc_compute_sources_for_writemask( return; if (opcode->IsComponentwise) { - for(src = 0; src < opcode->NumSrcRegs; ++src) + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) srcmasks[src] |= writemask; } else if (opcode->IsStandardScalar) { - for(src = 0; src < opcode->NumSrcRegs; ++src) + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) srcmasks[src] |= RC_MASK_X; } else { switch(opcode->Opcode) { diff --git a/r300/compiler/radeon_pair_regalloc.c b/r300/compiler/radeon_pair_regalloc.c index 23c2a5e..b2fe7f7 100644 --- a/r300/compiler/radeon_pair_regalloc.c +++ b/r300/compiler/radeon_pair_regalloc.c @@ -49,7 +49,7 @@ struct register_info { unsigned int Used:1; unsigned int Allocated:1; - rc_register_file File:3; + unsigned int File:3; unsigned int Index:RC_REGISTER_INDEX_BITS; }; @@ -186,11 +186,9 @@ static void scan_callback(void * data, struct rc_instruction * inst, static void compute_live_intervals(struct regalloc_state * s) { - struct rc_instruction * inst; - rc_recompute_ips(s->C); - for(inst = s->C->Program.Instructions.Next; + for(struct rc_instruction * inst = s->C->Program.Instructions.Next; inst != &s->C->Program.Instructions; inst = inst->Next) { rc_for_all_reads(inst, scan_callback, s); @@ -219,7 +217,6 @@ static void rewrite_register(struct regalloc_state * s, static void rewrite_normal_instruction(struct regalloc_state * s, struct rc_sub_instruction * inst) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); - unsigned int src; if (opcode->HasDstReg) { rc_register_file file = inst->DstReg.File; @@ -231,7 +228,7 @@ static void rewrite_normal_instruction(struct regalloc_state * s, struct rc_sub_ inst->DstReg.Index = index; } - for(src = 0; src < opcode->NumSrcRegs; ++src) { + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { rc_register_file file = inst->SrcReg[src].File; unsigned int index = inst->SrcReg[src].Index; @@ -244,8 +241,6 @@ static void rewrite_normal_instruction(struct regalloc_state * s, struct rc_sub_ static void rewrite_pair_instruction(struct regalloc_state * s, struct rc_pair_instruction * inst) { - unsigned int src; - if (inst->RGB.WriteMask) { rc_register_file file = RC_FILE_TEMPORARY; unsigned int index = inst->RGB.DestIndex; @@ -264,7 +259,7 @@ static void rewrite_pair_instruction(struct regalloc_state * s, struct rc_pair_i inst->Alpha.DestIndex = index; } - for(src = 0; src < 3; ++src) { + for(unsigned int src = 0; src < 3; ++src) { if (inst->RGB.Src[src].Used) { rc_register_file file = inst->RGB.Src[src].File; unsigned int index = inst->RGB.Src[src].Index; @@ -289,18 +284,14 @@ static void rewrite_pair_instruction(struct regalloc_state * s, struct rc_pair_i static void do_regalloc(struct regalloc_state * s) { - struct rc_instruction * inst; - unsigned int index; - /* Simple and stupid greedy register allocation */ - for(index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { + for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { struct register_info * reg = &s->Temporary[index]; - unsigned int hwreg; if (!reg->Used) continue; - for(hwreg = 0; hwreg < s->NumHwTemporaries; ++hwreg) { + for(unsigned int hwreg = 0; hwreg < s->NumHwTemporaries; ++hwreg) { if (try_add_live_intervals(s, &s->HwTemporary[hwreg].Used, ®->Live)) { reg->Allocated = 1; reg->File = RC_FILE_TEMPORARY; @@ -316,7 +307,7 @@ static void do_regalloc(struct regalloc_state * s) } /* Rewrite all instructions based on the translation table we built */ - for(inst = s->C->Program.Instructions.Next; + for(struct rc_instruction * inst = s->C->Program.Instructions.Next; inst != &s->C->Program.Instructions; inst = inst->Next) { if (inst->Type == RC_INSTRUCTION_NORMAL) diff --git a/r300/compiler/radeon_pair_schedule.c b/r300/compiler/radeon_pair_schedule.c index 2890c00..df67aaf 100644 --- a/r300/compiler/radeon_pair_schedule.c +++ b/r300/compiler/radeon_pair_schedule.c @@ -165,11 +165,9 @@ static void decrease_dependencies(struct schedule_state * s, struct schedule_ins static void commit_instruction(struct schedule_state * s, struct schedule_instruction * sinst) { - unsigned int i; - DBG("%i: commit\n", sinst->Instruction->IP); - for(i = 0; i < sinst->NumReadValues; ++i) { + for(unsigned int i = 0; i < sinst->NumReadValues; ++i) { struct reg_value * v = sinst->ReadValues[i]; assert(v->NumReaders > 0); v->NumReaders--; @@ -179,12 +177,10 @@ static void commit_instruction(struct schedule_state * s, struct schedule_instru } } - for(i = 0; i < sinst->NumWriteValues; ++i) { + for(unsigned int i = 0; i < sinst->NumWriteValues; ++i) { struct reg_value * v = sinst->WriteValues[i]; - struct reg_value_reader * r; - if (v->NumReaders) { - for(r = v->Readers; r; r = r->Next) { + for(struct reg_value_reader * r = v->Readers; r; r = r->Next) { decrease_dependencies(s, r->Reader); } } else { @@ -235,15 +231,13 @@ static int destructive_merge_instructions( struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha) { - unsigned int arg; - assert(rgb->Alpha.Opcode == RC_OPCODE_NOP); assert(alpha->RGB.Opcode == RC_OPCODE_NOP); /* Copy alpha args into rgb */ const struct rc_opcode_info * opcode = rc_get_opcode_info(alpha->Alpha.Opcode); - for(arg = 0; arg < opcode->NumSrcRegs; ++arg) { + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { unsigned int srcrgb = 0; unsigned int srcalpha = 0; unsigned int oldsrc = alpha->Alpha.Arg[arg].Source; @@ -435,14 +429,13 @@ static void schedule_block(struct r300_fragment_program_compiler * c, struct rc_instruction * begin, struct rc_instruction * end) { struct schedule_state s; - struct rc_instruction * inst; memset(&s, 0, sizeof(s)); s.C = &c->Base; /* Scan instructions for data dependencies */ unsigned int ip = 0; - for(inst = begin; inst != end; inst = inst->Next) { + for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current)); memset(s.Current, 0, sizeof(struct schedule_instruction)); diff --git a/r300/compiler/radeon_pair_translate.c b/r300/compiler/radeon_pair_translate.c index 933cf13..fff5b0c 100644 --- a/r300/compiler/radeon_pair_translate.c +++ b/r300/compiler/radeon_pair_translate.c @@ -203,12 +203,21 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, /* Destination handling */ if (inst->DstReg.File == RC_FILE_OUTPUT) { - if (inst->DstReg.Index == c->OutputColor) { - pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; - pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } else if (inst->DstReg.Index == c->OutputDepth) { - pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } + if (inst->DstReg.Index == c->OutputDepth) { + pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } else { + for (i = 0; i < 4; i++) { + if (inst->DstReg.Index == c->OutputColor[i]) { + pair->RGB.Target = i; + pair->Alpha.Target = i; + pair->RGB.OutputWriteMask |= + inst->DstReg.WriteMask & RC_MASK_XYZ; + pair->Alpha.OutputWriteMask |= + GET_BIT(inst->DstReg.WriteMask, 3); + break; + } + } + } } else { if (needrgb) { pair->RGB.DestIndex = inst->DstReg.Index; @@ -233,9 +242,7 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, */ void rc_pair_translate(struct r300_fragment_program_compiler *c) { - struct rc_instruction *inst; - - for(inst = c->Base.Program.Instructions.Next; + for(struct rc_instruction * inst = c->Base.Program.Instructions.Next; inst != &c->Base.Program.Instructions; inst = inst->Next) { if (inst->Type != RC_INSTRUCTION_NORMAL) diff --git a/r300/compiler/radeon_program.c b/r300/compiler/radeon_program.c index fb4752f..a3c41d7 100644 --- a/r300/compiler/radeon_program.c +++ b/r300/compiler/radeon_program.c @@ -92,9 +92,9 @@ struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register unsigned int rc_find_free_temporary(struct radeon_compiler * c) { - struct rc_instruction * rcinst; char used[RC_REGISTER_MAX_INDEX]; unsigned int i; + struct rc_instruction * rcinst; memset(used, 0, sizeof(used)); diff --git a/r300/compiler/radeon_program.h b/r300/compiler/radeon_program.h index 0359288..e318867 100644 --- a/r300/compiler/radeon_program.h +++ b/r300/compiler/radeon_program.h @@ -39,7 +39,7 @@ struct radeon_compiler; struct rc_src_register { - rc_register_file File:3; + unsigned int File:3; /** Negative values may be used for relative addressing. */ signed int Index:(RC_REGISTER_INDEX_BITS+1); @@ -55,7 +55,7 @@ struct rc_src_register { }; struct rc_dst_register { - rc_register_file File:3; + unsigned int File:3; /** Negative values may be used for relative addressing. */ signed int Index:(RC_REGISTER_INDEX_BITS+1); @@ -79,20 +79,20 @@ struct rc_sub_instruction { /** * Opcode of this instruction, according to \ref rc_opcode enums. */ - rc_opcode Opcode:8; + unsigned int Opcode:8; /** * Saturate each value of the result to the range [0,1] or [-1,1], * according to \ref rc_saturate_mode enums. */ - rc_saturate_mode SaturateMode:2; + unsigned int SaturateMode:2; /** * Writing to the special register RC_SPECIAL_ALU_RESULT */ /*@{*/ - rc_write_aluresult WriteALUResult:2; - rc_compare_func ALUResultCompare:3; + unsigned int WriteALUResult:2; + unsigned int ALUResultCompare:3; /*@}*/ /** @@ -103,7 +103,7 @@ struct rc_sub_instruction { unsigned int TexSrcUnit:5; /** Source texture target, one of the \ref rc_texture_target enums */ - rc_texture_target TexSrcTarget:3; + unsigned int TexSrcTarget:3; /** True if tex instruction should do shadow comparison */ unsigned int TexShadow:1; diff --git a/r300/compiler/radeon_program_alu.c b/r300/compiler/radeon_program_alu.c index ced66af..b5c08ae 100644 --- a/r300/compiler/radeon_program_alu.c +++ b/r300/compiler/radeon_program_alu.c @@ -267,9 +267,9 @@ static void transform_LIT(struct radeon_compiler* c, temp = inst->U.I.DstReg.Index; srctemp = srcreg(RC_FILE_TEMPORARY, temp); - // tmp.x = max(0.0, Src.x); - // tmp.y = max(0.0, Src.y); - // tmp.w = clamp(Src.z, -128+eps, 128-eps); + /* tmp.x = max(0.0, Src.x); */ + /* tmp.y = max(0.0, Src.y); */ + /* tmp.w = clamp(Src.z, -128+eps, 128-eps); */ emit2(c, inst->Prev, RC_OPCODE_MAX, 0, dstregtmpmask(temp, RC_MASK_XYW), inst->U.I.SrcReg[0], @@ -280,7 +280,7 @@ static void transform_LIT(struct radeon_compiler* c, swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle))); - // tmp.w = Pow(tmp.y, tmp.w) + /* tmp.w = Pow(tmp.y, tmp.w) */ emit1(c, inst->Prev, RC_OPCODE_LG2, 0, dstregtmpmask(temp, RC_MASK_W), swizzle(srctemp, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y)); @@ -292,14 +292,14 @@ static void transform_LIT(struct radeon_compiler* c, dstregtmpmask(temp, RC_MASK_W), swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W)); - // tmp.z = (tmp.x > 0) ? tmp.w : 0.0 + /* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, dstregtmpmask(temp, RC_MASK_Z), negate(swizzle(srctemp, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)), swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), builtin_zero); - // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 + /* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */ emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, dstregtmpmask(temp, RC_MASK_XYW), swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE)); @@ -533,16 +533,16 @@ static void sincos_constants(struct radeon_compiler* c, unsigned int *constants) { static const float SinCosConsts[2][4] = { { - 1.273239545, // 4/PI - -0.405284735, // -4/(PI*PI) - 3.141592654, // PI - 0.2225 // weight + 1.273239545, /* 4/PI */ + -0.405284735, /* -4/(PI*PI) */ + 3.141592654, /* PI */ + 0.2225 /* weight */ }, { 0.75, 0.5, - 0.159154943, // 1/(2*PI) - 6.283185307 // 2*PI + 0.159154943, /* 1/(2*PI) */ + 6.283185307 /* 2*PI */ } }; int i; @@ -602,9 +602,9 @@ int radeonTransformTrigSimple(struct radeon_compiler* c, sincos_constants(c, constants); if (inst->U.I.Opcode == RC_OPCODE_COS) { - // MAD tmp.x, src, 1/(2*PI), 0.75 - // FRC tmp.x, tmp.x - // MAD tmp.z, tmp.x, 2*PI, -PI + /* MAD tmp.x, src, 1/(2*PI), 0.75 */ + /* FRC tmp.x, tmp.x */ + /* MAD tmp.z, tmp.x, 2*PI, -PI */ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z), diff --git a/r300/compiler/radeon_program_pair.h b/r300/compiler/radeon_program_pair.h index 1600598..511cc70 100644 --- a/r300/compiler/radeon_program_pair.h +++ b/r300/compiler/radeon_program_pair.h @@ -52,14 +52,15 @@ struct r300_fragment_program_compiler; struct radeon_pair_instruction_source { unsigned int Used:1; - rc_register_file File:3; + unsigned int File:3; unsigned int Index:RC_REGISTER_INDEX_BITS; }; struct radeon_pair_instruction_rgb { - rc_opcode Opcode:8; + unsigned int Opcode:8; unsigned int DestIndex:RC_REGISTER_INDEX_BITS; unsigned int WriteMask:3; + unsigned int Target:2; unsigned int OutputWriteMask:3; unsigned int Saturate:1; @@ -74,9 +75,10 @@ struct radeon_pair_instruction_rgb { }; struct radeon_pair_instruction_alpha { - rc_opcode Opcode:8; + unsigned int Opcode:8; unsigned int DestIndex:RC_REGISTER_INDEX_BITS; unsigned int WriteMask:1; + unsigned int Target:2; unsigned int OutputWriteMask:1; unsigned int DepthWriteMask:1; unsigned int Saturate:1; @@ -95,8 +97,8 @@ struct rc_pair_instruction { struct radeon_pair_instruction_rgb RGB; struct radeon_pair_instruction_alpha Alpha; - rc_write_aluresult WriteALUResult:2; - rc_compare_func ALUResultCompare:3; + unsigned int WriteALUResult:2; + unsigned int ALUResultCompare:3; }; diff --git a/r300/compiler/radeon_program_print.c b/r300/compiler/radeon_program_print.c index c980f5c..28fb9ea 100644 --- a/r300/compiler/radeon_program_print.c +++ b/r300/compiler/radeon_program_print.c @@ -200,9 +200,8 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst { struct rc_pair_instruction * inst = &fullinst->U.P; int printedsrc = 0; - unsigned int src, arg; - for(src = 0; src < 3; ++src) { + for(unsigned int src = 0; src < 3; ++src) { if (inst->RGB.Src[src].Used) { if (printedsrc) fprintf(f, ", "); @@ -230,14 +229,14 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst (inst->RGB.WriteMask & 2) ? "y" : "", (inst->RGB.WriteMask & 4) ? "z" : ""); if (inst->RGB.OutputWriteMask) - fprintf(f, " color.%s%s%s", + fprintf(f, " color[%i].%s%s%s", inst->RGB.Target, (inst->RGB.OutputWriteMask & 1) ? "x" : "", (inst->RGB.OutputWriteMask & 2) ? "y" : "", (inst->RGB.OutputWriteMask & 4) ? "z" : ""); if (inst->WriteALUResult == RC_ALURESULT_X) fprintf(f, " aluresult"); - for (arg = 0; arg < opcode->NumSrcRegs; ++arg) { + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { const char* abs = inst->RGB.Arg[arg].Abs ? "|" : ""; const char* neg = inst->RGB.Arg[arg].Negate ? "-" : ""; fprintf(f, ", %s%ssrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[arg].Source, @@ -256,13 +255,13 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst if (inst->Alpha.WriteMask) fprintf(f, " temp[%i].w", inst->Alpha.DestIndex); if (inst->Alpha.OutputWriteMask) - fprintf(f, " color.w"); + fprintf(f, " color[%i].w", inst->Alpha.Target); if (inst->Alpha.DepthWriteMask) fprintf(f, " depth.w"); if (inst->WriteALUResult == RC_ALURESULT_W) fprintf(f, " aluresult"); - for(arg = 0; arg < opcode->NumSrcRegs; ++arg) { + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : ""; const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : ""; fprintf(f, ", %s%ssrc%i.%c%s", neg, abs, inst->Alpha.Arg[arg].Source, diff --git a/r300/r300_blit.c b/r300/r300_blit.c new file mode 100644 index 0000000..d870c7f --- /dev/null +++ b/r300/r300_blit.c @@ -0,0 +1,663 @@ +/* + * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_common.h" +#include "r300_context.h" + +#include "r300_blit.h" +#include "r300_cmdbuf.h" +#include "r300_emit.h" +#include "r300_tex.h" +#include "compiler/radeon_compiler.h" +#include "compiler/radeon_opcodes.h" + +static void vp_ins_outs(struct r300_vertex_program_compiler *c) +{ + c->code->inputs[VERT_ATTRIB_POS] = 0; + c->code->inputs[VERT_ATTRIB_TEX0] = 1; + c->code->outputs[VERT_RESULT_HPOS] = 0; + c->code->outputs[VERT_RESULT_TEX0] = 1; +} + +static void fp_allocate_hw_inputs( + struct r300_fragment_program_compiler * c, + void (*allocate)(void * data, unsigned input, unsigned hwreg), + void * mydata) +{ + allocate(mydata, FRAG_ATTRIB_TEX0, 0); +} + +static void create_vertex_program(struct r300_context *r300) +{ + struct r300_vertex_program_compiler compiler; + struct rc_instruction *inst; + + rc_init(&compiler.Base); + + inst = rc_insert_new_instruction(&compiler.Base, compiler.Base.Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = VERT_RESULT_HPOS; + inst->U.I.DstReg.RelAddr = 0; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.SrcReg[0].Abs = 0; + inst->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst->U.I.SrcReg[0].Index = VERT_ATTRIB_POS; + inst->U.I.SrcReg[0].Negate = 0; + inst->U.I.SrcReg[0].RelAddr = 0; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + inst = rc_insert_new_instruction(&compiler.Base, compiler.Base.Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = VERT_RESULT_TEX0; + inst->U.I.DstReg.RelAddr = 0; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.SrcReg[0].Abs = 0; + inst->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst->U.I.SrcReg[0].Index = VERT_ATTRIB_TEX0; + inst->U.I.SrcReg[0].Negate = 0; + inst->U.I.SrcReg[0].RelAddr = 0; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + compiler.Base.Program.InputsRead = (1 << VERT_ATTRIB_POS) | (1 << VERT_ATTRIB_TEX0); + compiler.RequiredOutputs = compiler.Base.Program.OutputsWritten = (1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_TEX0); + compiler.SetHwInputOutput = vp_ins_outs; + compiler.code = &r300->blit.vp_code; + + r3xx_compile_vertex_program(&compiler); +} + +static void create_fragment_program(struct r300_context *r300) +{ + struct r300_fragment_program_compiler compiler; + struct rc_instruction *inst; + + memset(&compiler, 0, sizeof(struct r300_fragment_program_compiler)); + rc_init(&compiler.Base); + + inst = rc_insert_new_instruction(&compiler.Base, compiler.Base.Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_TEX; + inst->U.I.TexSrcTarget = RC_TEXTURE_2D; + inst->U.I.TexSrcUnit = 0; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = FRAG_RESULT_COLOR; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.SrcReg[0].Abs = 0; + inst->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst->U.I.SrcReg[0].Index = FRAG_ATTRIB_TEX0; + inst->U.I.SrcReg[0].Negate = 0; + inst->U.I.SrcReg[0].RelAddr = 0; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + compiler.Base.Program.InputsRead = (1 << FRAG_ATTRIB_TEX0); + compiler.OutputColor[0] = FRAG_RESULT_COLOR; + compiler.OutputDepth = FRAG_RESULT_DEPTH; + compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515); + compiler.code = &r300->blit.fp_code; + compiler.AllocateHwInputs = fp_allocate_hw_inputs; + + r3xx_compile_fragment_program(&compiler); +} + +void r300_blit_init(struct r300_context *r300) +{ + if (r300->options.hw_tcl_enabled) + create_vertex_program(r300); + create_fragment_program(r300); +} + +static void r300_emit_tx_setup(struct r300_context *r300, + gl_format mesa_format, + struct radeon_bo *bo, + intptr_t offset, + unsigned width, + unsigned height, + unsigned pitch) +{ + BATCH_LOCALS(&r300->radeon); + + assert(width <= 2048); + assert(height <= 2048); + assert(r300TranslateTexFormat(mesa_format) >= 0); + assert(offset % 32 == 0); + + BEGIN_BATCH(17); + OUT_BATCH_REGVAL(R300_TX_FILTER0_0, + (R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_S_SHIFT) | + (R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_T_SHIFT) | + (R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_R_SHIFT) | + R300_TX_MIN_FILTER_MIP_NONE | + R300_TX_MIN_FILTER_NEAREST | + R300_TX_MAG_FILTER_NEAREST | + (0 << 28)); + OUT_BATCH_REGVAL(R300_TX_FILTER1_0, 0); + OUT_BATCH_REGVAL(R300_TX_SIZE_0, + ((width-1) << R300_TX_WIDTHMASK_SHIFT) | + ((height-1) << R300_TX_HEIGHTMASK_SHIFT) | + (0 << R300_TX_DEPTHMASK_SHIFT) | + (0 << R300_TX_MAX_MIP_LEVEL_SHIFT) | + R300_TX_SIZE_TXPITCH_EN); + + OUT_BATCH_REGVAL(R300_TX_FORMAT_0, r300TranslateTexFormat(mesa_format)); + OUT_BATCH_REGVAL(R300_TX_FORMAT2_0, pitch - 1); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0, 1); + OUT_BATCH_RELOC(0, bo, offset, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + + OUT_BATCH_REGSEQ(R300_TX_INVALTAGS, 2); + OUT_BATCH(0); + OUT_BATCH(1); + + END_BATCH(); +} + +#define EASY_US_FORMAT(FMT, C0, C1, C2, C3, SIGN) \ + (FMT | R500_C0_SEL_##C0 | R500_C1_SEL_##C1 | \ + R500_C2_SEL_##C2 | R500_C3_SEL_##C3 | R500_OUT_SIGN(SIGN)) + +static uint32_t mesa_format_to_us_format(gl_format mesa_format) +{ + switch(mesa_format) + { + case MESA_FORMAT_RGBA8888: // x + return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, B, G, R, 0); + case MESA_FORMAT_RGB565: // x + case MESA_FORMAT_ARGB1555: // x + case MESA_FORMAT_RGBA8888_REV: // x + return EASY_US_FORMAT(R500_OUT_FMT_C4_8, R, G, B, A, 0); + case MESA_FORMAT_ARGB8888: // x + return EASY_US_FORMAT(R500_OUT_FMT_C4_8, B, G, R, A, 0); + case MESA_FORMAT_ARGB8888_REV: + return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, R, G, B, 0); + case MESA_FORMAT_XRGB8888: + return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, R, G, B, 0); + + case MESA_FORMAT_RGB332: + return EASY_US_FORMAT(R500_OUT_FMT_C_3_3_2, A, R, G, B, 0); + + case MESA_FORMAT_RGBA_FLOAT32: + return EASY_US_FORMAT(R500_OUT_FMT_C4_32_FP, R, G, B, A, 0); + case MESA_FORMAT_RGBA_FLOAT16: + return EASY_US_FORMAT(R500_OUT_FMT_C4_16_FP, R, G, B, A, 0); + case MESA_FORMAT_ALPHA_FLOAT32: + return EASY_US_FORMAT(R500_OUT_FMT_C_32_FP, A, A, A, A, 0); + case MESA_FORMAT_ALPHA_FLOAT16: + return EASY_US_FORMAT(R500_OUT_FMT_C_16_FP, A, A, A, A, 0); + + case MESA_FORMAT_SIGNED_RGBA8888: + return EASY_US_FORMAT(R500_OUT_FMT_C4_8, R, G, B, A, 0xf); + case MESA_FORMAT_SIGNED_RGBA8888_REV: + return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, B, G, R, 0xf); + case MESA_FORMAT_SIGNED_RGBA_16: + return EASY_US_FORMAT(R500_OUT_FMT_C4_16, R, G, B, A, 0xf); + + default: + fprintf(stderr, "Unsupported format %s for US output\n", _mesa_get_format_name(mesa_format)); + assert(0); + return 0; + } +} +#undef EASY_US_FORMAT + +static void r500_emit_fp_setup(struct r300_context *r300, + struct r500_fragment_program_code *fp, + gl_format dst_format) +{ + r500_emit_fp(r300, (uint32_t *)fp->inst, (fp->inst_end + 1) * 6, 0, 0, 0); + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH(10); + OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3); + OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(fp->inst_end)); + OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(fp->inst_end)); + OUT_BATCH(0); + OUT_BATCH_REGVAL(R500_US_CONFIG, 0); + OUT_BATCH_REGVAL(R500_US_OUT_FMT_0, mesa_format_to_us_format(dst_format)); + OUT_BATCH_REGVAL(R500_US_PIXSIZE, fp->max_temp_idx); + END_BATCH(); +} + +static void r500_emit_rs_setup(struct r300_context *r300) +{ + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH(7); + OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); + OUT_BATCH((4 << R300_IT_COUNT_SHIFT) | R300_HIRES_EN); + OUT_BATCH(0); + OUT_BATCH_REGVAL(R500_RS_INST_0, + (0 << R500_RS_INST_TEX_ID_SHIFT) | + (0 << R500_RS_INST_TEX_ADDR_SHIFT) | + R500_RS_INST_TEX_CN_WRITE | + R500_RS_INST_COL_CN_NO_WRITE); + OUT_BATCH_REGVAL(R500_RS_IP_0, + (0 << R500_RS_IP_TEX_PTR_S_SHIFT) | + (1 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (2 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (3 << R500_RS_IP_TEX_PTR_Q_SHIFT)); + END_BATCH(); +} + +static void r300_emit_fp_setup(struct r300_context *r300, + struct r300_fragment_program_code *code, + gl_format dst_format) +{ + unsigned i; + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH((code->alu.length + 1) * 4 + code->tex.length + 1 + 11); + + OUT_BATCH_REGSEQ(R300_US_ALU_RGB_INST_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) { + OUT_BATCH(code->alu.inst[i].rgb_inst); + } + OUT_BATCH_REGSEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) { + OUT_BATCH(code->alu.inst[i].rgb_addr); + } + OUT_BATCH_REGSEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) { + OUT_BATCH(code->alu.inst[i].alpha_inst); + } + OUT_BATCH_REGSEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) { + OUT_BATCH(code->alu.inst[i].alpha_addr); + } + + OUT_BATCH_REGSEQ(R300_US_TEX_INST_0, code->tex.length); + OUT_BATCH_TABLE(code->tex.inst, code->tex.length); + + OUT_BATCH_REGSEQ(R300_US_CONFIG, 3); + OUT_BATCH(R300_PFS_CNTL_FIRST_NODE_HAS_TEX); + OUT_BATCH(code->pixsize); + OUT_BATCH(code->code_offset); + OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4); + OUT_BATCH_TABLE(code->code_addr, 4); + OUT_BATCH_REGVAL(R500_US_OUT_FMT_0, mesa_format_to_us_format(dst_format)); + END_BATCH(); +} + +static void r300_emit_rs_setup(struct r300_context *r300) +{ + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH(7); + OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); + OUT_BATCH((4 << R300_IT_COUNT_SHIFT) | R300_HIRES_EN); + OUT_BATCH(0); + OUT_BATCH_REGVAL(R300_RS_INST_0, + R300_RS_INST_TEX_ID(0) | + R300_RS_INST_TEX_ADDR(0) | + R300_RS_INST_TEX_CN_WRITE); + OUT_BATCH_REGVAL(R300_RS_IP_0, + R300_RS_TEX_PTR(0) | + R300_RS_SEL_S(R300_RS_SEL_C0) | + R300_RS_SEL_T(R300_RS_SEL_C1) | + R300_RS_SEL_R(R300_RS_SEL_K0) | + R300_RS_SEL_Q(R300_RS_SEL_K1)); + END_BATCH(); +} + +static void emit_pvs_setup(struct r300_context *r300, + uint32_t *vp_code, + unsigned vp_len) +{ + BATCH_LOCALS(&r300->radeon); + + r300_emit_vpu(r300, vp_code, vp_len * 4, R300_PVS_CODE_START); + + BEGIN_BATCH(4); + OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3); + OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) | + ((vp_len - 1) << R300_PVS_XYZW_VALID_INST_SHIFT) | + ((vp_len - 1)<< R300_PVS_LAST_INST_SHIFT)); + OUT_BATCH(0); + OUT_BATCH((vp_len - 1) << R300_PVS_LAST_VTX_SRC_INST_SHIFT); + END_BATCH(); +} + +static void emit_vap_setup(struct r300_context *r300) +{ + int tex_offset; + BATCH_LOCALS(&r300->radeon); + + if (r300->options.hw_tcl_enabled) + tex_offset = 1; + else + tex_offset = 6; + + BEGIN_BATCH(12); + OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2); + OUT_BATCH(R300_VTX_XY_FMT | R300_VTX_Z_FMT); + OUT_BATCH(4); + + OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa); + OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_0, + ((R300_DATA_TYPE_FLOAT_2 | (0 << R300_DST_VEC_LOC_SHIFT)) << 0) | + (((tex_offset << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_2 | R300_LAST_VEC) << 16)); + OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0, + ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | + (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) | + (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | + (0xf << R300_WRITE_ENA_SHIFT) ) << 0) | + (((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | + (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) | + (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | + (0xf << R300_WRITE_ENA_SHIFT) ) << 16) ) ); + OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); + OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT); + OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS); + END_BATCH(); +} + +static GLboolean validate_buffers(struct r300_context *r300, + struct radeon_bo *src_bo, + struct radeon_bo *dst_bo) +{ + int ret; + + radeon_cs_space_reset_bos(r300->radeon.cmdbuf.cs); + + ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, + src_bo, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + return GL_FALSE; + + ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, + dst_bo, 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT); + if (ret) + return GL_FALSE; + + return GL_TRUE; +} + +/** + * Calculate texcoords for given image region. + * Output values are [minx, maxx, miny, maxy] + */ +static void calc_tex_coords(float img_width, float img_height, + float x, float y, + float reg_width, float reg_height, + unsigned flip_y, float *buf) +{ + buf[0] = x / img_width; + buf[1] = buf[0] + reg_width / img_width; + buf[2] = y / img_height; + buf[3] = buf[2] + reg_height / img_height; + if (flip_y) + { + buf[2] = 1.0 - buf[2]; + buf[3] = 1.0 - buf[3]; + } +} + +static void emit_draw_packet(struct r300_context *r300, + unsigned src_width, unsigned src_height, + unsigned src_x_offset, unsigned src_y_offset, + unsigned dst_x_offset, unsigned dst_y_offset, + unsigned reg_width, unsigned reg_height, + unsigned flip_y) +{ + float texcoords[4]; + + calc_tex_coords(src_width, src_height, + src_x_offset, src_y_offset, + reg_width, reg_height, + flip_y, texcoords); + + float verts[] = { dst_x_offset, dst_y_offset, + texcoords[0], texcoords[2], + dst_x_offset, dst_y_offset + reg_height, + texcoords[0], texcoords[3], + dst_x_offset + reg_width, dst_y_offset + reg_height, + texcoords[1], texcoords[3], + dst_x_offset + reg_width, dst_y_offset, + texcoords[1], texcoords[2] }; + + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH(19); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_IMMD_2, 16); + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | + (4 << 16) | R300_VAP_VF_CNTL__PRIM_QUADS); + OUT_BATCH_TABLE(verts, 16); + END_BATCH(); +} + +static void other_stuff(struct r300_context *r300) +{ + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH(13); + OUT_BATCH_REGVAL(R300_GA_POLY_MODE, + R300_GA_POLY_MODE_FRONT_PTYPE_TRI | R300_GA_POLY_MODE_BACK_PTYPE_TRI); + OUT_BATCH_REGVAL(R300_SU_CULL_MODE, R300_FRONT_FACE_CCW); + OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0); + OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0); + OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2); + OUT_BATCH(0x0); + OUT_BATCH(0x0); + OUT_BATCH_REGVAL(R300_ZB_CNTL, 0); + END_BATCH(); + if (r300->options.hw_tcl_enabled) { + BEGIN_BATCH(2); + OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); + END_BATCH(); + } +} + +static void emit_cb_setup(struct r300_context *r300, + struct radeon_bo *bo, + intptr_t offset, + gl_format mesa_format, + unsigned pitch, + unsigned width, + unsigned height) +{ + BATCH_LOCALS(&r300->radeon); + + unsigned x1, y1, x2, y2; + x1 = 0; + y1 = 0; + x2 = width - 1; + y2 = height - 1; + + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { + x1 += R300_SCISSORS_OFFSET; + y1 += R300_SCISSORS_OFFSET; + x2 += R300_SCISSORS_OFFSET; + y2 += R300_SCISSORS_OFFSET; + } + + r300_emit_cb_setup(r300, bo, offset, mesa_format, + _mesa_get_format_bytes(mesa_format), + _mesa_format_row_stride(mesa_format, pitch)); + + BEGIN_BATCH_NO_AUTOSTATE(5); + OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); + OUT_BATCH((x1 << R300_SCISSORS_X_SHIFT)|(y1 << R300_SCISSORS_Y_SHIFT)); + OUT_BATCH((x2 << R300_SCISSORS_X_SHIFT)|(y2 << R300_SCISSORS_Y_SHIFT)); + OUT_BATCH_REGVAL(R300_RB3D_CCTL, 0); + END_BATCH(); +} + +unsigned r300_check_blit(gl_format dst_format) +{ + switch (dst_format) { + case MESA_FORMAT_RGB565: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_RGBA8888: + case MESA_FORMAT_RGBA8888_REV: + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_ARGB8888_REV: + case MESA_FORMAT_XRGB8888: + break; + default: + return 0; + } + + if (_mesa_get_format_bits(dst_format, GL_DEPTH_BITS) > 0) + return 0; + + return 1; +} + +/** + * Copy a region of [@a width x @a height] pixels from source buffer + * to destination buffer. + * @param[in] r300 r300 context + * @param[in] src_bo source radeon buffer object + * @param[in] src_offset offset of the source image in the @a src_bo + * @param[in] src_mesaformat source image format + * @param[in] src_pitch aligned source image width + * @param[in] src_width source image width + * @param[in] src_height source image height + * @param[in] src_x_offset x offset in the source image + * @param[in] src_y_offset y offset in the source image + * @param[in] dst_bo destination radeon buffer object + * @param[in] dst_offset offset of the destination image in the @a dst_bo + * @param[in] dst_mesaformat destination image format + * @param[in] dst_pitch aligned destination image width + * @param[in] dst_width destination image width + * @param[in] dst_height destination image height + * @param[in] dst_x_offset x offset in the destination image + * @param[in] dst_y_offset y offset in the destination image + * @param[in] width region width + * @param[in] height region height + * @param[in] flip_y set if y coords of the source image need to be flipped + */ +unsigned r300_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned reg_width, + unsigned reg_height, + unsigned flip_y) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + if (!r300_check_blit(dst_mesaformat)) + return 0; + + /* Make sure that colorbuffer has even width - hw limitation */ + if (dst_pitch % 2 > 0) + ++dst_pitch; + + /* Rendering to small buffer doesn't work. + * Looks like a hw limitation. + */ + if (dst_pitch < 32) + return 0; + + /* Need to clamp the region size to make sure + * we don't read outside of the source buffer + * or write outside of the destination buffer. + */ + if (reg_width + src_x_offset > src_width) + reg_width = src_width - src_x_offset; + if (reg_height + src_y_offset > src_height) + reg_height = src_height - src_y_offset; + if (reg_width + dst_x_offset > dst_width) + reg_width = dst_width - dst_x_offset; + if (reg_height + dst_y_offset > dst_height) + reg_height = dst_height - dst_y_offset; + + if (src_bo == dst_bo) { + return 0; + } + + if (src_offset % 32 || dst_offset % 32) { + return GL_FALSE; + } + + if (0) { + fprintf(stderr, "src: size [%d x %d], pitch %d, " + "offset [%d x %d], format %s, bo %p\n", + src_width, src_height, src_pitch, + src_x_offset, src_y_offset, + _mesa_get_format_name(src_mesaformat), + src_bo); + fprintf(stderr, "dst: pitch %d, offset[%d x %d], format %s, bo %p\n", + dst_pitch, dst_x_offset, dst_y_offset, + _mesa_get_format_name(dst_mesaformat), dst_bo); + fprintf(stderr, "region: %d x %d\n", reg_width, reg_height); + } + + /* Flush is needed to make sure that source buffer has correct data */ + radeonFlush(r300->radeon.glCtx); + + if (!validate_buffers(r300, src_bo, dst_bo)) + return 0; + + rcommonEnsureCmdBufSpace(&r300->radeon, 200, __FUNCTION__); + + other_stuff(r300); + + r300_emit_tx_setup(r300, src_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch); + + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + r500_emit_fp_setup(r300, &r300->blit.fp_code.code.r500, dst_mesaformat); + r500_emit_rs_setup(r300); + } else { + r300_emit_fp_setup(r300, &r300->blit.fp_code.code.r300, dst_mesaformat); + r300_emit_rs_setup(r300); + } + + if (r300->options.hw_tcl_enabled) + emit_pvs_setup(r300, r300->blit.vp_code.body.d, 2); + + emit_vap_setup(r300); + + emit_cb_setup(r300, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height); + + emit_draw_packet(r300, src_width, src_height, + src_x_offset, src_y_offset, + dst_x_offset, dst_y_offset, + reg_width, reg_height, + flip_y); + + r300EmitCacheFlush(r300); + + radeonFlush(r300->radeon.glCtx); + + return 1; +} diff --git a/r300/r300_blit.h b/r300/r300_blit.h new file mode 100644 index 0000000..39b157a --- /dev/null +++ b/r300/r300_blit.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef R300_BLIT_H +#define R300_BLIT_H + +void r300_blit_init(struct r300_context *r300); + +unsigned r300_check_blit(gl_format mesa_format); + +unsigned r300_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned reg_width, + unsigned reg_height, + unsigned flip_y); + +#endif // R300_BLIT_H diff --git a/r300/r300_cmdbuf.c b/r300/r300_cmdbuf.c index ad8db6e..6cfa568 100644 --- a/r300/r300_cmdbuf.c +++ b/r300/r300_cmdbuf.c @@ -39,19 +39,16 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/macros.h" #include "main/context.h" #include "main/simple_list.h" -#include "swrast/swrast.h" #include "drm.h" #include "radeon_drm.h" #include "r300_context.h" -#include "r300_ioctl.h" #include "r300_reg.h" #include "r300_cmdbuf.h" #include "r300_emit.h" #include "radeon_bocs_wrapper.h" #include "radeon_mipmap_tree.h" -#include "r300_state.h" #include "radeon_queryobj.h" /** # of dwords reserved for additional instructions that may need to be written @@ -72,7 +69,7 @@ static unsigned packet0_count(r300ContextPtr r300, uint32_t *pkt) #define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) #define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count) -int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom) +static int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom) { r300ContextPtr r300 = R300_CONTEXT(ctx); int cnt; @@ -86,54 +83,72 @@ int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom) return cnt ? (cnt * 4) + extra : 0; } +void r300_emit_vpu(struct r300_context *r300, + uint32_t *data, + unsigned len, + uint32_t addr) +{ + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(3 + len); + OUT_BATCH_REGVAL(R300_VAP_PVS_VECTOR_INDX_REG, addr); + OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, len-1) | RADEON_ONE_REG_WR); + OUT_BATCH_TABLE(data, len); + END_BATCH(); +} -void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom) +static void emit_vpu_state(GLcontext *ctx, struct radeon_state_atom * atom) { - r300ContextPtr r300 = R300_CONTEXT(ctx); - BATCH_LOCALS(&r300->radeon); - drm_r300_cmd_header_t cmd; - uint32_t addr, ndw; + r300ContextPtr r300 = R300_CONTEXT(ctx); + drm_r300_cmd_header_t cmd; + uint32_t addr, ndw; - cmd.u = atom->cmd[0]; - addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo; - ndw = atom->check(ctx, atom); + cmd.u = atom->cmd[0]; + addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo; + ndw = atom->check(ctx, atom); - BEGIN_BATCH_NO_AUTOSTATE(ndw); + r300_emit_vpu(r300, &atom->cmd[1], vpu_count(atom->cmd) * 4, addr); +} - ndw -= 5; - OUT_BATCH_REGVAL(R300_VAP_PVS_VECTOR_INDX_REG, addr); - OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR); - OUT_BATCH_TABLE(&atom->cmd[1], ndw); - OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); - END_BATCH(); +void r500_emit_fp(struct r300_context *r300, + uint32_t *data, + unsigned len, + uint32_t addr, + unsigned type, + unsigned clamp) +{ + BATCH_LOCALS(&r300->radeon); + + addr |= (type << 16); + addr |= (clamp << 17); + + BEGIN_BATCH_NO_AUTOSTATE(len + 3); + OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0)); + OUT_BATCH(addr); + OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, len-1) | RADEON_ONE_REG_WR); + OUT_BATCH_TABLE(data, len); + END_BATCH(); } -void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom) +static void emit_r500fp_atom(GLcontext *ctx, struct radeon_state_atom * atom) { - r300ContextPtr r300 = R300_CONTEXT(ctx); - BATCH_LOCALS(&r300->radeon); - drm_r300_cmd_header_t cmd; - uint32_t addr, ndw, sz; - int type, clamp; - - ndw = atom->check(ctx, atom); - - cmd.u = atom->cmd[0]; - sz = cmd.r500fp.count; - addr = ((cmd.r500fp.adrhi_flags & 1) << 8) | cmd.r500fp.adrlo; - type = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE); - clamp = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP); - - addr |= (type << 16); - addr |= (clamp << 17); - - BEGIN_BATCH_NO_AUTOSTATE(ndw); - OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0)); - OUT_BATCH(addr); - ndw-=3; - OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, ndw-1) | RADEON_ONE_REG_WR); - OUT_BATCH_TABLE(&atom->cmd[1], ndw); - END_BATCH(); + r300ContextPtr r300 = R300_CONTEXT(ctx); + drm_r300_cmd_header_t cmd; + uint32_t addr, count; + int type, clamp; + + cmd.u = atom->cmd[0]; + addr = ((cmd.r500fp.adrhi_flags & 1) << 8) | cmd.r500fp.adrlo; + type = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE); + clamp = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP); + + if (type) { + count = r500fp_count(atom->cmd) * 4; + } else { + count = r500fp_count(atom->cmd) * 6; + } + + r500_emit_fp(r300, &atom->cmd[1], count, addr, type, clamp); } static int check_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom) @@ -256,110 +271,136 @@ static int check_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) return dw; } -static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) +static void emit_scissor(struct r300_context *r300, + unsigned width, + unsigned height) { - r300ContextPtr r300 = R300_CONTEXT(ctx); - BATCH_LOCALS(&r300->radeon); - struct radeon_renderbuffer *rrb; - uint32_t cbpitch; - uint32_t offset = r300->radeon.state.color.draw_offset; - uint32_t dw = 6; - int i; + int i; + BATCH_LOCALS(&r300->radeon); + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + BEGIN_BATCH_NO_AUTOSTATE(3); + OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); + OUT_BATCH(0); + OUT_BATCH(((width - 1) << R300_SCISSORS_X_SHIFT) | + ((height - 1) << R300_SCISSORS_Y_SHIFT)); + END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(16); + for (i = 0; i < 4; i++) { + OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2); + OUT_BATCH((0 << R300_CLIPRECT_X_SHIFT) | (0 << R300_CLIPRECT_Y_SHIFT)); + OUT_BATCH(((width - 1) << R300_CLIPRECT_X_SHIFT) | ((height - 1) << R300_CLIPRECT_Y_SHIFT)); + } + OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1); + OUT_BATCH(0xAAAA); + OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1); + OUT_BATCH(0xffffff); + END_BATCH(); + } else { + BEGIN_BATCH_NO_AUTOSTATE(3); + OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); + OUT_BATCH((R300_SCISSORS_OFFSET << R300_SCISSORS_X_SHIFT) | + (R300_SCISSORS_OFFSET << R300_SCISSORS_Y_SHIFT)); + OUT_BATCH(((width + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_X_SHIFT) | + ((height + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_Y_SHIFT)); + END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(16); + for (i = 0; i < 4; i++) { + OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2); + OUT_BATCH((R300_SCISSORS_OFFSET << R300_CLIPRECT_X_SHIFT) | (R300_SCISSORS_OFFSET << R300_CLIPRECT_Y_SHIFT)); + OUT_BATCH(((R300_SCISSORS_OFFSET + width - 1) << R300_CLIPRECT_X_SHIFT) | + ((R300_SCISSORS_OFFSET + height - 1) << R300_CLIPRECT_Y_SHIFT)); + } + OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1); + OUT_BATCH(0xAAAA); + OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1); + OUT_BATCH(0xffffff); + END_BATCH(); + } +} - rrb = radeon_get_colorbuffer(&r300->radeon); - if (!rrb || !rrb->bo) { - fprintf(stderr, "no rrb\n"); - return; - } +void r300_emit_cb_setup(struct r300_context *r300, + struct radeon_bo *bo, + uint32_t offset, + GLuint format, + unsigned cpp, + unsigned pitch) +{ + BATCH_LOCALS(&r300->radeon); + uint32_t cbpitch = pitch / cpp; + uint32_t dw = 6; - if (RADEON_DEBUG & RADEON_STATE) - fprintf(stderr,"rrb is %p %d %dx%d\n", rrb, offset, rrb->base.Width, rrb->base.Height); - cbpitch = (rrb->pitch / rrb->cpp); - if (rrb->cpp == 4) - cbpitch |= R300_COLOR_FORMAT_ARGB8888; - else switch (rrb->base.Format) { + assert(offset % 32 == 0); + + switch (format) { case MESA_FORMAT_RGB565: - assert(_mesa_little_endian()); - cbpitch |= R300_COLOR_FORMAT_RGB565; - break; + assert(_mesa_little_endian()); + cbpitch |= R300_COLOR_FORMAT_RGB565; + break; case MESA_FORMAT_RGB565_REV: - assert(!_mesa_little_endian()); - cbpitch |= R300_COLOR_FORMAT_RGB565; - break; + assert(!_mesa_little_endian()); + cbpitch |= R300_COLOR_FORMAT_RGB565; + break; case MESA_FORMAT_ARGB4444: - assert(_mesa_little_endian()); - cbpitch |= R300_COLOR_FORMAT_ARGB4444; - break; + assert(_mesa_little_endian()); + cbpitch |= R300_COLOR_FORMAT_ARGB4444; + break; case MESA_FORMAT_ARGB4444_REV: - assert(!_mesa_little_endian()); - cbpitch |= R300_COLOR_FORMAT_ARGB4444; - break; - case MESA_FORMAT_ARGB1555: - assert(_mesa_little_endian()); - cbpitch |= R300_COLOR_FORMAT_ARGB1555; - break; - case MESA_FORMAT_ARGB1555_REV: - assert(!_mesa_little_endian()); - cbpitch |= R300_COLOR_FORMAT_ARGB1555; - break; - default: - _mesa_problem(ctx, "unexpected format in emit_cb_offset()"); - } + assert(!_mesa_little_endian()); + cbpitch |= R300_COLOR_FORMAT_ARGB4444; + break; + case MESA_FORMAT_ARGB1555: + assert(_mesa_little_endian()); + cbpitch |= R300_COLOR_FORMAT_ARGB1555; + break; + case MESA_FORMAT_ARGB1555_REV: + assert(!_mesa_little_endian()); + cbpitch |= R300_COLOR_FORMAT_ARGB1555; + break; + default: + if (cpp == 4) { + cbpitch |= R300_COLOR_FORMAT_ARGB8888; + } else { + _mesa_problem(r300->radeon.glCtx, "unexpected format in emit_cb_offset()");; + } + break; + } - if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) - cbpitch |= R300_COLOR_TILE_ENABLE; + if (bo->flags & RADEON_BO_FLAGS_MACRO_TILE) + cbpitch |= R300_COLOR_TILE_ENABLE; + + if (r300->radeon.radeonScreen->kernel_mm) + dw += 2; + + BEGIN_BATCH_NO_AUTOSTATE(dw); + OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1); + OUT_BATCH_RELOC(offset, bo, offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1); + if (!r300->radeon.radeonScreen->kernel_mm) + OUT_BATCH(cbpitch); + else + OUT_BATCH_RELOC(cbpitch, bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); +} + +static void emit_cb_offset_atom(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct radeon_renderbuffer *rrb; + uint32_t offset = r300->radeon.state.color.draw_offset; + + rrb = radeon_get_colorbuffer(&r300->radeon); + if (!rrb || !rrb->bo) { + fprintf(stderr, "no rrb\n"); + return; + } + + if (RADEON_DEBUG & RADEON_STATE) + fprintf(stderr,"rrb is %p %d %dx%d\n", rrb, offset, rrb->base.Width, rrb->base.Height); + + r300_emit_cb_setup(r300, rrb->bo, offset, rrb->base.Format, rrb->cpp, rrb->pitch); - if (r300->radeon.radeonScreen->kernel_mm) - dw += 2; - BEGIN_BATCH_NO_AUTOSTATE(dw); - OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1); - OUT_BATCH_RELOC(offset, rrb->bo, offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); - OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1); - if (!r300->radeon.radeonScreen->kernel_mm) - OUT_BATCH(cbpitch); - else - OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); - END_BATCH(); if (r300->radeon.radeonScreen->driScreen->dri2.enabled) { - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - BEGIN_BATCH_NO_AUTOSTATE(3); - OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); - OUT_BATCH(0); - OUT_BATCH(((rrb->base.Width - 1) << R300_SCISSORS_X_SHIFT) | - ((rrb->base.Height - 1) << R300_SCISSORS_Y_SHIFT)); - END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(16); - for (i = 0; i < 4; i++) { - OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2); - OUT_BATCH((0 << R300_CLIPRECT_X_SHIFT) | (0 << R300_CLIPRECT_Y_SHIFT)); - OUT_BATCH(((rrb->base.Width - 1) << R300_CLIPRECT_X_SHIFT) | ((rrb->base.Height - 1) << R300_CLIPRECT_Y_SHIFT)); - } - OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1); - OUT_BATCH(0xAAAA); - OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1); - OUT_BATCH(0xffffff); - END_BATCH(); - } else { - BEGIN_BATCH_NO_AUTOSTATE(3); - OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); - OUT_BATCH((R300_SCISSORS_OFFSET << R300_SCISSORS_X_SHIFT) | - (R300_SCISSORS_OFFSET << R300_SCISSORS_Y_SHIFT)); - OUT_BATCH(((rrb->base.Width + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_X_SHIFT) | - ((rrb->base.Height + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_Y_SHIFT)); - END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(16); - for (i = 0; i < 4; i++) { - OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2); - OUT_BATCH((R300_SCISSORS_OFFSET << R300_CLIPRECT_X_SHIFT) | (R300_SCISSORS_OFFSET << R300_CLIPRECT_Y_SHIFT)); - OUT_BATCH(((R300_SCISSORS_OFFSET + rrb->base.Width - 1) << R300_CLIPRECT_X_SHIFT) | - ((R300_SCISSORS_OFFSET + rrb->base.Height - 1) << R300_CLIPRECT_Y_SHIFT)); - } - OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1); - OUT_BATCH(0xAAAA); - OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1); - OUT_BATCH(0xffffff); - END_BATCH(); - } + emit_scissor(r300, rrb->base.Width, rrb->base.Height); } } @@ -455,7 +496,7 @@ static int check_variable(GLcontext *ctx, struct radeon_state_atom *atom) return cnt ? cnt + 1 : 0; } -int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom) +static int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom) { int cnt; r300ContextPtr r300 = R300_CONTEXT(ctx); @@ -467,7 +508,7 @@ int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom) return cnt ? (cnt * 6) + extra : 0; } -int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom) +static int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom) { int cnt; r300ContextPtr r300 = R300_CONTEXT(ctx); @@ -644,13 +685,13 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(r300->radeon.radeonScreen, 0, 0, 0, 0); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.r500fp.emit = emit_r500fp; + r300->hw.r500fp.emit = emit_r500fp_atom; ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0); r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(r300->radeon.radeonScreen, 0, 0, 1, 0); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.r500fp_const.emit = emit_r500fp; + r300->hw.r500fp_const.emit = emit_r500fp_atom; } else { ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CONFIG, 3); @@ -694,7 +735,7 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(rop, always, 2, 0); r300->hw.rop.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_ROPCNTL, 1); ALLOC_STATE(cb, cb_offset, R300_CB_CMDSIZE, 0); - r300->hw.cb.emit = &emit_cb_offset; + r300->hw.cb.emit = &emit_cb_offset_atom; ALLOC_STATE(rb3d_dither_ctl, always, 10, 0); r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9); ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0); @@ -736,42 +777,24 @@ void r300InitCmdBuf(r300ContextPtr r300) /* VPU only on TCL */ if (has_tcl) { int i; - if (r300->radeon.radeonScreen->kernel_mm) { - ALLOC_STATE(vap_flush, always, 10, 0); - /* flush processing vertices */ - r300->hw.vap_flush.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1); - r300->hw.vap_flush.cmd[1] = 0; - r300->hw.vap_flush.cmd[2] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DSTCACHE_CTLSTAT, 1); - r300->hw.vap_flush.cmd[3] = R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D; - r300->hw.vap_flush.cmd[4] = cmdpacket0(r300->radeon.radeonScreen, RADEON_WAIT_UNTIL, 1); - r300->hw.vap_flush.cmd[5] = RADEON_WAIT_3D_IDLECLEAN; - r300->hw.vap_flush.cmd[6] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1); - r300->hw.vap_flush.cmd[7] = 0xffffff; - r300->hw.vap_flush.cmd[8] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1); - r300->hw.vap_flush.cmd[9] = 0; - } else { - ALLOC_STATE(vap_flush, never, 10, 0); - } - - ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0); r300->hw.vpi.cmd[0] = cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.vpi.emit = emit_vpu; + r300->hw.vpi.emit = emit_vpu_state; if (is_r500) { ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); r300->hw.vpp.cmd[0] = cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.vpp.emit = emit_vpu; + r300->hw.vpp.emit = emit_vpu_state; ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); r300->hw.vps.cmd[0] = cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.vps.emit = emit_vpu; + r300->hw.vps.emit = emit_vpu_state; for (i = 0; i < 6; i++) { ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); @@ -779,20 +802,20 @@ void r300InitCmdBuf(r300ContextPtr r300) cmdvpu(r300->radeon.radeonScreen, R500_PVS_UCP_START + i, 1); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.vpucp[i].emit = emit_vpu; + r300->hw.vpucp[i].emit = emit_vpu_state; } } else { ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); r300->hw.vpp.cmd[0] = cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.vpp.emit = emit_vpu; + r300->hw.vpp.emit = emit_vpu_state; ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); r300->hw.vps.cmd[0] = cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.vps.emit = emit_vpu; + r300->hw.vps.emit = emit_vpu_state; for (i = 0; i < 6; i++) { ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); @@ -800,7 +823,7 @@ void r300InitCmdBuf(r300ContextPtr r300) cmdvpu(r300->radeon.radeonScreen, R300_PVS_UCP_START + i, 1); if (r300->radeon.radeonScreen->kernel_mm) - r300->hw.vpucp[i].emit = emit_vpu; + r300->hw.vpucp[i].emit = emit_vpu_state; } } } diff --git a/r300/r300_cmdbuf.h b/r300/r300_cmdbuf.h index 1b703e5..0e68da9 100644 --- a/r300/r300_cmdbuf.h +++ b/r300/r300_cmdbuf.h @@ -44,14 +44,26 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define FIREAOS_BUFSZ (3) #define SCISSORS_BUFSZ (3) -extern void r300InitCmdBuf(r300ContextPtr r300); +void r300InitCmdBuf(r300ContextPtr r300); void r300_emit_scissor(GLcontext *ctx); -void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom); -int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom); +void r300_emit_vpu(struct r300_context *ctx, + uint32_t *data, + unsigned len, + uint32_t addr); -void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom); -int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom); -int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom); +void r500_emit_fp(struct r300_context *r300, + uint32_t *data, + unsigned len, + uint32_t addr, + unsigned type, + unsigned clamp); -#endif /* __R300_CMDBUF_H__ */ +void r300_emit_cb_setup(struct r300_context *r300, + struct radeon_bo *bo, + uint32_t offset, + GLuint format, + unsigned cpp, + unsigned pitch); + +#endif /* __R300_CMDBUF_H__ */ diff --git a/r300/r300_context.c b/r300/r300_context.c index 5f07b95..ff35cd5 100644 --- a/r300/r300_context.c +++ b/r300/r300_context.c @@ -40,9 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/context.h" #include "main/simple_list.h" #include "main/imports.h" -#include "main/matrix.h" #include "main/extensions.h" -#include "main/state.h" #include "main/bufferobj.h" #include "main/texobj.h" @@ -52,16 +50,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/tnl.h" #include "tnl/t_pipeline.h" -#include "tnl/t_vp_build.h" #include "drivers/common/driverfuncs.h" +#include "drivers/common/meta.h" #include "r300_context.h" -#include "radeon_context.h" #include "radeon_span.h" +#include "r300_blit.h" #include "r300_cmdbuf.h" #include "r300_state.h" -#include "r300_ioctl.h" #include "r300_tex.h" #include "r300_emit.h" #include "r300_swtcl.h" @@ -69,7 +66,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_buffer_objects.h" #include "radeon_queryobj.h" -#include "vblank.h" #include "utils.h" #include "xmlpool.h" /* for symbolic values of enum-type options */ @@ -92,7 +88,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/remap_helper.h" - static const struct dri_extension card_extensions[] = { /* *INDENT-OFF* */ {"GL_ARB_depth_texture", NULL}, @@ -114,7 +109,6 @@ static const struct dri_extension card_extensions[] = { {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions}, {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions}, {"GL_EXT_blend_subtract", NULL}, - {"GL_EXT_packed_depth_stencil", NULL}, {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions}, {"GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions }, @@ -324,6 +318,9 @@ static void r300_init_vtbl(radeonContextPtr radeon) radeon->vtbl.emit_query_finish = rv530_emit_query_finish_single_z; } else radeon->vtbl.emit_query_finish = r300_emit_query_finish; + + radeon->vtbl.check_blit = r300_check_blit; + radeon->vtbl.blit = r300_blit; } static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) @@ -336,6 +333,10 @@ static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) driQueryOptioni(&r300->radeon.optionCache, "texture_coord_units"); ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureImageUnits, ctx->Const.MaxTextureCoordUnits); + ctx->Const.MaxCombinedTextureImageUnits = + ctx->Const.MaxVertexTextureImageUnits + + ctx->Const.MaxTextureImageUnits; + ctx->Const.MaxTextureMaxAnisotropy = 16.0; ctx->Const.MaxTextureLodBias = 16.0; @@ -344,11 +345,13 @@ static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) ctx->Const.MaxTextureLevels = 13; ctx->Const.MaxCubeTextureLevels = 13; ctx->Const.MaxTextureRectSize = 4096; + ctx->Const.MaxRenderbufferSize = 4096; } else { ctx->Const.MaxTextureLevels = 12; ctx->Const.MaxCubeTextureLevels = 12; ctx->Const.MaxTextureRectSize = 2048; + ctx->Const.MaxRenderbufferSize = 2048; } ctx->Const.MinPointSize = 1.0; @@ -362,6 +365,7 @@ static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX; ctx->Const.MaxDrawBuffers = 1; + ctx->Const.MaxColorAttachments = 1; /* currently bogus data */ if (r300->options.hw_tcl_enabled) { @@ -449,15 +453,27 @@ static void r300InitGLExtensions(GLcontext *ctx) if (!r300->radeon.radeonScreen->drmSupportsOcclusionQueries) { _mesa_disable_extension(ctx, "GL_ARB_occlusion_query"); } + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV350) + _mesa_enable_extension(ctx, "GL_ARB_half_float_vertex"); + + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + _mesa_enable_extension(ctx, "GL_EXT_packed_depth_stencil"); +} + +static void r300InitIoctlFuncs(struct dd_function_table *functions) +{ + functions->Clear = _mesa_meta_Clear; + functions->Finish = radeonFinish; + functions->Flush = radeonFlush; } /* Create the device specific rendering context. */ GLboolean r300CreateContext(const __GLcontextModes * glVisual, - __DRIcontextPrivate * driContextPriv, + __DRIcontext * driContextPriv, void *sharedContextPrivate) { - __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; + __DRIscreen *sPriv = driContextPriv->driScreenPriv; radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private); struct dd_function_table functions; r300ContextPtr r300; @@ -479,7 +495,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, _mesa_init_driver_functions(&functions); r300InitIoctlFuncs(&functions); r300InitStateFuncs(&functions); - r300InitTextureFuncs(&functions); + r300InitTextureFuncs(&r300->radeon, &functions); r300InitShaderFuncs(&functions); radeonInitQueryObjFunctions(&functions); radeonInitBufferObjectFuncs(&functions); @@ -530,6 +546,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, r300InitSwtcl(ctx); } + r300_blit_init(r300); radeon_fbo_init(&r300->radeon); radeonInitSpanFuncs( ctx ); r300InitCmdBuf(r300); diff --git a/r300/r300_context.h b/r300/r300_context.h index 518d5cd..df7115e 100644 --- a/r300/r300_context.h +++ b/r300/r300_context.h @@ -355,7 +355,6 @@ struct r300_hw_state { struct radeon_state_atom zb_hiz_offset; /* (4F44) */ struct radeon_state_atom zb_hiz_pitch; /* (4F54) */ - struct radeon_state_atom vap_flush; struct radeon_state_atom vpi; /* vp instructions */ struct radeon_state_atom vpp; /* vp parameters */ struct radeon_state_atom vps; /* vertex point size (?) */ @@ -533,14 +532,19 @@ struct r300_context { uint32_t fallback; + struct { + struct r300_vertex_program_code vp_code; + struct rX00_fragment_program_code fp_code; + } blit; + DECLARE_RENDERINPUTS(render_inputs_bitset); }; #define R300_CONTEXT(ctx) ((r300ContextPtr)(ctx->DriverCtx)) -extern void r300DestroyContext(__DRIcontextPrivate * driContextPriv); +extern void r300DestroyContext(__DRIcontext * driContextPriv); extern GLboolean r300CreateContext(const __GLcontextModes * glVisual, - __DRIcontextPrivate * driContextPriv, + __DRIcontext * driContextPriv, void *sharedContextPrivate); extern void r300InitShaderFuncs(struct dd_function_table *functions); diff --git a/r300/r300_draw.c b/r300/r300_draw.c index 06a0490..282c0e1 100644 --- a/r300/r300_draw.c +++ b/r300/r300_draw.c @@ -29,7 +29,6 @@ #include "main/glheader.h" #include "main/context.h" #include "main/state.h" -/* #include "main/api_validate.h" */ #include "main/enums.h" #include "main/simple_list.h" @@ -47,8 +46,6 @@ #include "tnl/tnl.h" #include "tnl/t_vp_build.h" #include "vbo/vbo_context.h" -#include "swrast/swrast.h" -#include "swrast_setup/swrast_setup.h" static int getTypeSize(GLenum type) @@ -56,6 +53,8 @@ static int getTypeSize(GLenum type) switch (type) { case GL_DOUBLE: return sizeof(GLdouble); + case GL_HALF_FLOAT: + return sizeof(GLhalfARB); case GL_FLOAT: return sizeof(GLfloat); case GL_INT: @@ -100,7 +99,7 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer GLubyte *in = (GLubyte *)src_ptr; radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4); - + radeon_bo_map(r300->ind_buf.bo, 1); assert(r300->ind_buf.bo->ptr != NULL); out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); @@ -111,7 +110,7 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer if (i < mesa_ind_buf->count) { *out++ = in[i]; } - + radeon_bo_unmap(r300->ind_buf.bo); #if MESA_BIG_ENDIAN } else { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */ GLushort *in = (GLushort *)src_ptr; @@ -120,6 +119,7 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4); + radeon_bo_map(r300->ind_buf.bo, 1); assert(r300->ind_buf.bo->ptr != NULL); out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); @@ -130,6 +130,7 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer if (i < mesa_ind_buf->count) { *out++ = in[i]; } + radeon_bo_unmap(r300->ind_buf.bo); #endif } @@ -173,10 +174,12 @@ static void r300SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4); + radeon_bo_map(r300->ind_buf.bo, 1); assert(r300->ind_buf.bo->ptr != NULL); dst_ptr = ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); - _mesa_memcpy(dst_ptr, src_ptr, size); + memcpy(dst_ptr, src_ptr, size); + radeon_bo_unmap(r300->ind_buf.bo); r300->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); r300->ind_buf.count = mesa_ind_buf->count; @@ -242,6 +245,7 @@ static void r300ConvertAttrib(GLcontext *ctx, int count, const struct gl_client_ } radeonAllocDmaRegion(&r300->radeon, &attr->bo, &attr->bo_offset, sizeof(GLfloat) * input->Size * count, 32); + radeon_bo_map(attr->bo, 1); dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset); radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT, @@ -280,6 +284,7 @@ static void r300ConvertAttrib(GLcontext *ctx, int count, const struct gl_client_ break; } + radeon_bo_unmap(attr->bo); if (mapped_named_bo) { ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); } @@ -294,6 +299,8 @@ static void r300AlignDataToDword(GLcontext *ctx, const struct gl_client_array *i radeonAllocDmaRegion(&r300->radeon, &attr->bo, &attr->bo_offset, size, 32); + radeon_bo_map(attr->bo, 1); + if (!input->BufferObj->Pointer) { ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); mapped_named_bo = GL_TRUE; @@ -307,7 +314,7 @@ static void r300AlignDataToDword(GLcontext *ctx, const struct gl_client_array *i int i; for (i = 0; i < count; ++i) { - _mesa_memcpy(dst_ptr, src_ptr, input->StrideB); + memcpy(dst_ptr, src_ptr, input->StrideB); src_ptr += input->StrideB; dst_ptr += dst_stride; } @@ -317,6 +324,7 @@ static void r300AlignDataToDword(GLcontext *ctx, const struct gl_client_array *i ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); } + radeon_bo_unmap(attr->bo); attr->stride = dst_stride; } @@ -324,7 +332,7 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st { r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_vertex_buffer *vbuf = &r300->vbuf; - struct vertex_attribute r300_attr; + struct vertex_attribute r300_attr = { 0 }; GLenum type; GLuint stride; @@ -376,6 +384,18 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st r300_attr._signed = 0; r300_attr.normalize = 0; break; + case GL_HALF_FLOAT: + switch (input->Size) { + case 1: + case 2: + r300_attr.data_type = R300_DATA_TYPE_FLT16_2; + break; + case 3: + case 4: + r300_attr.data_type = R300_DATA_TYPE_FLT16_4; + break; + } + break; case GL_SHORT: r300_attr._signed = 1; r300_attr.normalize = input->Normalized; @@ -527,6 +547,7 @@ static void r300AllocDmaRegions(GLcontext *ctx, const struct gl_client_array *in } radeonAllocDmaRegion(&r300->radeon, &vbuf->attribs[index].bo, &vbuf->attribs[index].bo_offset, size, 32); + radeon_bo_map(vbuf->attribs[index].bo, 1); assert(vbuf->attribs[index].bo->ptr != NULL); dst = (uint32_t *)ADD_POINTERS(vbuf->attribs[index].bo->ptr, vbuf->attribs[index].bo_offset); switch (vbuf->attribs[index].dwords) { @@ -536,6 +557,7 @@ static void r300AllocDmaRegions(GLcontext *ctx, const struct gl_client_array *in case 4: radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; default: assert(0); break; } + radeon_bo_unmap(vbuf->attribs[index].bo); } } @@ -583,13 +605,23 @@ static void r300FreeData(GLcontext *ctx) } } -static GLuint r300PredictTryDrawPrimsSize(GLcontext *ctx, GLuint nr_prims) +static GLuint r300PredictTryDrawPrimsSize(GLcontext *ctx, + GLuint nr_prims, const struct _mesa_prim *prim) { struct r300_context *r300 = R300_CONTEXT(ctx); struct r300_vertex_buffer *vbuf = &r300->vbuf; GLboolean flushed; GLuint dwords; GLuint state_size; + int i; + GLuint extra_prims = 0; + + /* Check for primitive splitting. */ + for (i = 0; i < nr_prims; ++i) { + const GLuint num_verts = r300NumVerts(r300, prim[i].count, prim[i].mode); + extra_prims += num_verts/(65535 - 32); + } + nr_prims += extra_prims; dwords = 2*CACHE_FLUSH_BUFSZ; dwords += PRE_EMIT_STATE_BUFSZ; @@ -645,7 +677,7 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, /* ensure we have the cmd buf space in advance to cover * the state + DMA AOS pointers */ - GLuint emit_end = r300PredictTryDrawPrimsSize(ctx, nr_prims) + GLuint emit_end = r300PredictTryDrawPrimsSize(ctx, nr_prims, prim) + r300->radeon.cmdbuf.cs->cdw; r300SetupIndexBuffer(ctx, ib); diff --git a/r300/r300_emit.c b/r300/r300_emit.c index 07e6223..a24d431 100644 --- a/r300/r300_emit.c +++ b/r300/r300_emit.c @@ -39,19 +39,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/colormac.h" #include "main/imports.h" #include "main/macros.h" -#include "main/image.h" #include "swrast_setup/swrast_setup.h" -#include "math/m_translate.h" #include "tnl/tnl.h" #include "tnl/t_context.h" #include "r300_context.h" -#include "r300_state.h" #include "r300_emit.h" -#include "r300_ioctl.h" -#include "r300_render.h" -#include "r300_swtcl.h" + GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead) { @@ -118,7 +113,7 @@ GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes) if (first_free_texcoord > 8) { fprintf(stderr, "\tout of free texcoords\n"); - _mesa_exit(-1); + exit(-1); } return ret; diff --git a/r300/r300_fragprog_common.c b/r300/r300_fragprog_common.c index 267ee81..61ea5e4 100644 --- a/r300/r300_fragprog_common.c +++ b/r300/r300_fragprog_common.c @@ -38,14 +38,12 @@ #include "r300_fragprog_common.h" -#include "shader/program.h" #include "shader/prog_parameter.h" #include "shader/prog_print.h" #include "compiler/radeon_compiler.h" #include "radeon_mesa_to_rc.h" -#include "r300_state.h" static GLuint build_dtm(GLuint depthmode) @@ -74,7 +72,7 @@ static void build_state( { int unit; - _mesa_bzero(state, sizeof(*state)); + memset(state, 0, sizeof(*state)); for(unit = 0; unit < 16; ++unit) { if (fp->Base.ShadowSamplers & (1 << unit)) { @@ -120,7 +118,7 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler, return; } - rc_transform_fragment_wpos(&compiler->Base, FRAG_ATTRIB_WPOS, fp->wpos_attr); + rc_transform_fragment_wpos(&compiler->Base, FRAG_ATTRIB_WPOS, fp->wpos_attr, GL_FALSE); } /** @@ -223,12 +221,13 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog compiler.state = fp->state; compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE; compiler.OutputDepth = FRAG_RESULT_DEPTH; - compiler.OutputColor = FRAG_RESULT_COLOR; + memset(compiler.OutputColor, 0, 4 * sizeof(unsigned)); + compiler.OutputColor[0] = FRAG_RESULT_COLOR; compiler.AllocateHwInputs = &allocate_hw_inputs; if (compiler.Base.Debug) { fflush(stderr); - _mesa_printf("Fragment Program: Initial program:\n"); + printf("Fragment Program: Initial program:\n"); _mesa_print_program(&cont->Base.Base); fflush(stderr); } @@ -272,13 +271,13 @@ struct r300_fragment_program *r300SelectAndTranslateFragmentShader(GLcontext *ct fp = fp_list->progs; while (fp) { - if (_mesa_memcmp(&fp->state, &state, sizeof(state)) == 0) { + if (memcmp(&fp->state, &state, sizeof(state)) == 0) { return r300->selected_fp = fp; } fp = fp->next; } - fp = _mesa_calloc(sizeof(struct r300_fragment_program)); + fp = calloc(1, sizeof(struct r300_fragment_program)); fp->state = state; diff --git a/r300/r300_ioctl.c b/r300/r300_ioctl.c deleted file mode 100644 index 5cb04e2..0000000 --- a/r300/r300_ioctl.c +++ /dev/null @@ -1,782 +0,0 @@ -/* -Copyright (C) The Weather Channel, Inc. 2002. -Copyright (C) 2004 Nicolai Haehnle. -All Rights Reserved. - -The Weather Channel (TM) funded Tungsten Graphics to develop the -initial release of the Radeon 8500 driver under the XFree86 license. -This notice must be preserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/** - * \file - * - * \author Keith Whitwell <keith@tungstengraphics.com> - * - * \author Nicolai Haehnle <prefect_@gmx.net> - */ - -#include <sched.h> -#include <errno.h> - -#include "main/glheader.h" -#include "main/imports.h" -#include "main/macros.h" -#include "main/context.h" -#include "main/simple_list.h" -#include "swrast/swrast.h" - -#include "radeon_common.h" -#include "radeon_lock.h" -#include "r300_context.h" -#include "r300_ioctl.h" -#include "r300_cmdbuf.h" -#include "r300_state.h" -#include "r300_vertprog.h" -#include "radeon_reg.h" -#include "r300_emit.h" -#include "r300_context.h" - -#include "vblank.h" - -#define R200_3D_DRAW_IMMD_2 0xC0003500 - -#define CLEARBUFFER_COLOR 0x1 -#define CLEARBUFFER_DEPTH 0x2 -#define CLEARBUFFER_STENCIL 0x4 - -#if 1 - -/** - * Fragment program helper macros - */ - -/* Produce unshifted source selectors */ -#define FP_TMP(idx) (idx) -#define FP_CONST(idx) ((idx) | (1 << 5)) - -/* Produce source/dest selector dword */ -#define FP_SELC_MASK_NO 0 -#define FP_SELC_MASK_X 1 -#define FP_SELC_MASK_Y 2 -#define FP_SELC_MASK_XY 3 -#define FP_SELC_MASK_Z 4 -#define FP_SELC_MASK_XZ 5 -#define FP_SELC_MASK_YZ 6 -#define FP_SELC_MASK_XYZ 7 - -#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \ - (((destidx) << R300_ALU_DSTC_SHIFT) | \ - (FP_SELC_MASK_##regmask << 23) | \ - (FP_SELC_MASK_##outmask << 26) | \ - ((src0) << R300_ALU_SRC0C_SHIFT) | \ - ((src1) << R300_ALU_SRC1C_SHIFT) | \ - ((src2) << R300_ALU_SRC2C_SHIFT)) - -#define FP_SELA_MASK_NO 0 -#define FP_SELA_MASK_W 1 - -#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \ - (((destidx) << R300_ALU_DSTA_SHIFT) | \ - (FP_SELA_MASK_##regmask << 23) | \ - (FP_SELA_MASK_##outmask << 24) | \ - ((src0) << R300_ALU_SRC0A_SHIFT) | \ - ((src1) << R300_ALU_SRC1A_SHIFT) | \ - ((src2) << R300_ALU_SRC2A_SHIFT)) - -/* Produce unshifted argument selectors */ -#define FP_ARGC(source) R300_ALU_ARGC_##source -#define FP_ARGA(source) R300_ALU_ARGA_##source -#define FP_ABS(arg) ((arg) | (1 << 6)) -#define FP_NEG(arg) ((arg) ^ (1 << 5)) - -/* Produce instruction dword */ -#define FP_INSTRC(opcode,arg0,arg1,arg2) \ - (R300_ALU_OUTC_##opcode | \ - ((arg0) << R300_ALU_ARG0C_SHIFT) | \ - ((arg1) << R300_ALU_ARG1C_SHIFT) | \ - ((arg2) << R300_ALU_ARG2C_SHIFT)) - -#define FP_INSTRA(opcode,arg0,arg1,arg2) \ - (R300_ALU_OUTA_##opcode | \ - ((arg0) << R300_ALU_ARG0A_SHIFT) | \ - ((arg1) << R300_ALU_ARG1A_SHIFT) | \ - ((arg2) << R300_ALU_ARG2A_SHIFT)) - -#endif - -static void r300EmitClearState(GLcontext * ctx); - -static void r300ClearBuffer(r300ContextPtr r300, int flags, - struct radeon_renderbuffer *rrb, - struct radeon_renderbuffer *rrbd) -{ - BATCH_LOCALS(&r300->radeon); - GLcontext *ctx = r300->radeon.glCtx; - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon); - GLuint cbpitch = 0; - r300ContextPtr rmesa = r300; - - if (RADEON_DEBUG & RADEON_IOCTL) - fprintf(stderr, "%s: buffer %p (%i,%i %ix%i)\n", - __FUNCTION__, rrb, dPriv->x, dPriv->y, - dPriv->w, dPriv->h); - - if (rrb) { - cbpitch = (rrb->pitch / rrb->cpp); - if (rrb->cpp == 4) - cbpitch |= R300_COLOR_FORMAT_ARGB8888; - else - cbpitch |= R300_COLOR_FORMAT_RGB565; - - if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){ - cbpitch |= R300_COLOR_TILE_ENABLE; - } - } - - /* TODO in bufmgr */ - cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN); - end_3d(&rmesa->radeon); - - if (flags & CLEARBUFFER_COLOR) { - assert(rrb != 0); - BEGIN_BATCH_NO_AUTOSTATE(6); - OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1); - OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); - OUT_BATCH_REGVAL(R300_RB3D_COLORPITCH0, cbpitch); - END_BATCH(); - } -#if 1 - if (flags & (CLEARBUFFER_DEPTH | CLEARBUFFER_STENCIL)) { - uint32_t zbpitch = (rrbd->pitch / rrbd->cpp); - if (rrbd->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){ - zbpitch |= R300_DEPTHMACROTILE_ENABLE; - } - if (rrbd->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){ - zbpitch |= R300_DEPTHMICROTILE_TILED; - } - BEGIN_BATCH_NO_AUTOSTATE(6); - OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_BATCH_RELOC(0, rrbd->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); - OUT_BATCH_REGSEQ(R300_ZB_DEPTHPITCH, 1); - if (!r300->radeon.radeonScreen->kernel_mm) - OUT_BATCH(zbpitch); - else - OUT_BATCH_RELOC(zbpitch, rrbd->bo, zbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); - END_BATCH(); - } -#endif - BEGIN_BATCH_NO_AUTOSTATE(6); - OUT_BATCH_REGSEQ(RB3D_COLOR_CHANNEL_MASK, 1); - if (flags & CLEARBUFFER_COLOR) { - OUT_BATCH((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) | - (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) | - (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) | - (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0)); - } else { - OUT_BATCH(0); - } - - - { - uint32_t t1, t2; - - t1 = 0x0; - t2 = 0x0; - - if (flags & CLEARBUFFER_DEPTH) { - t1 |= R300_Z_ENABLE | R300_Z_WRITE_ENABLE; - t2 |= - (R300_ZS_ALWAYS << R300_Z_FUNC_SHIFT); - } - - if (flags & CLEARBUFFER_STENCIL) { - t1 |= R300_STENCIL_ENABLE; - t2 |= - (R300_ZS_ALWAYS << - R300_S_FRONT_FUNC_SHIFT) | - (R300_ZS_REPLACE << - R300_S_FRONT_SFAIL_OP_SHIFT) | - (R300_ZS_REPLACE << - R300_S_FRONT_ZPASS_OP_SHIFT) | - (R300_ZS_REPLACE << - R300_S_FRONT_ZFAIL_OP_SHIFT); - } - - OUT_BATCH_REGSEQ(R300_ZB_CNTL, 3); - OUT_BATCH(t1); - OUT_BATCH(t2); - OUT_BATCH(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << - R300_STENCILWRITEMASK_SHIFT) | - (ctx->Stencil.Clear & R300_STENCILREF_MASK)); - END_BATCH(); - } - - if (!rmesa->radeon.radeonScreen->kernel_mm) { - BEGIN_BATCH_NO_AUTOSTATE(9); - OUT_BATCH(cmdpacket3(r300->radeon.radeonScreen, R300_CMD_PACKET3_CLEAR)); - OUT_BATCH_FLOAT32(dPriv->w / 2.0); - OUT_BATCH_FLOAT32(dPriv->h / 2.0); - OUT_BATCH_FLOAT32(ctx->Depth.Clear); - OUT_BATCH_FLOAT32(1.0); - OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]); - OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]); - OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]); - OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]); - END_BATCH(); - } else { - OUT_BATCH(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8)); - OUT_BATCH(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | - (1 << R300_PRIM_NUM_VERTICES_SHIFT)); - OUT_BATCH_FLOAT32(dPriv->w / 2.0); - OUT_BATCH_FLOAT32(dPriv->h / 2.0); - OUT_BATCH_FLOAT32(ctx->Depth.Clear); - OUT_BATCH_FLOAT32(1.0); - OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]); - OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]); - OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]); - OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]); - } - - r300EmitCacheFlush(rmesa); - cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN); - - R300_STATECHANGE(r300, cb); - R300_STATECHANGE(r300, cmk); - R300_STATECHANGE(r300, zs); -} - -static void r300EmitClearState(GLcontext * ctx) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - BATCH_LOCALS(&r300->radeon); - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon); - int i; - int has_tcl; - int is_r500 = 0; - GLuint vap_cntl; - - has_tcl = r300->options.hw_tcl_enabled; - - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) - is_r500 = 1; - - /* State atom dirty tracking is a little subtle here. - * - * On the one hand, we need to make sure base state is emitted - * here if we start with an empty batch buffer, otherwise clear - * works incorrectly with multiple processes. Therefore, the first - * BEGIN_BATCH cannot be a BEGIN_BATCH_NO_AUTOSTATE. - * - * On the other hand, implicit state emission clears the state atom - * dirty bits, so we have to call R300_STATECHANGE later than the - * first BEGIN_BATCH. - * - * The final trickiness is that, because we change state, we need - * to ensure that any stored swtcl primitives are flushed properly - * before we start changing state. See the R300_NEWPRIM in r300Clear - * for this. - */ - BEGIN_BATCH(31); - OUT_BATCH_REGSEQ(R300_VAP_PROG_STREAM_CNTL_0, 1); - if (!has_tcl) - OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | - ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); - else - OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | - ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); - - OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0); - OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0, - ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | - (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | - (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | - (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | - ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) - << R300_SWIZZLE0_SHIFT) | - (((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | - (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | - (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | - (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | - ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) - << R300_SWIZZLE1_SHIFT))); - - /* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */ - OUT_BATCH_REGSEQ(R300_VAP_VTX_STATE_CNTL, 2); - OUT_BATCH((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT)); - OUT_BATCH(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0); - - /* comes from fglrx startup of clear */ - OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2); - OUT_BATCH(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA | - R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | - R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | - R300_VPORT_Z_OFFSET_ENA); - OUT_BATCH(0x8); - - OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa); - - OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); - OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT | - R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT); - OUT_BATCH(0); /* no textures */ - - OUT_BATCH_REGVAL(R300_TX_ENABLE, 0); - - OUT_BATCH_REGSEQ(R300_SE_VPORT_XSCALE, 6); - OUT_BATCH_FLOAT32(1.0); - OUT_BATCH_FLOAT32(dPriv->x); - OUT_BATCH_FLOAT32(1.0); - OUT_BATCH_FLOAT32(dPriv->y); - OUT_BATCH_FLOAT32(1.0); - OUT_BATCH_FLOAT32(0.0); - - OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0); - - OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2); - OUT_BATCH(0x0); - OUT_BATCH(0x0); - END_BATCH(); - - R300_STATECHANGE(r300, vir[0]); - R300_STATECHANGE(r300, fogs); - R300_STATECHANGE(r300, vir[1]); - R300_STATECHANGE(r300, vic); - R300_STATECHANGE(r300, vte); - R300_STATECHANGE(r300, vof); - R300_STATECHANGE(r300, txe); - R300_STATECHANGE(r300, vpt); - R300_STATECHANGE(r300, at); - R300_STATECHANGE(r300, bld); - R300_STATECHANGE(r300, ps); - - if (has_tcl) { - R300_STATECHANGE(r300, vap_clip_cntl); - - BEGIN_BATCH_NO_AUTOSTATE(2); - OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE); - END_BATCH(); - } - - BEGIN_BATCH_NO_AUTOSTATE(2); - OUT_BATCH_REGVAL(R300_GA_POINT_SIZE, - ((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) | - ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT)); - END_BATCH(); - - if (!is_r500) { - R300_STATECHANGE(r300, ri); - R300_STATECHANGE(r300, rc); - R300_STATECHANGE(r300, rr); - - BEGIN_BATCH(14); - OUT_BATCH_REGSEQ(R300_RS_IP_0, 8); - for (i = 0; i < 8; ++i) - OUT_BATCH(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); - - OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); - OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); - OUT_BATCH(0x0); - - OUT_BATCH_REGVAL(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE); - END_BATCH(); - } else { - R300_STATECHANGE(r300, ri); - R300_STATECHANGE(r300, rc); - R300_STATECHANGE(r300, rr); - - BEGIN_BATCH(14); - OUT_BATCH_REGSEQ(R500_RS_IP_0, 8); - for (i = 0; i < 8; ++i) { - OUT_BATCH((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | - (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)); - } - - OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); - OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); - OUT_BATCH(0x0); - - OUT_BATCH_REGVAL(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE); - END_BATCH(); - } - - if (!is_r500) { - R300_STATECHANGE(r300, fp); - R300_STATECHANGE(r300, fpi[0]); - R300_STATECHANGE(r300, fpi[1]); - R300_STATECHANGE(r300, fpi[2]); - R300_STATECHANGE(r300, fpi[3]); - - BEGIN_BATCH(17); - OUT_BATCH_REGSEQ(R300_US_CONFIG, 3); - OUT_BATCH(0x0); - OUT_BATCH(0x0); - OUT_BATCH(0x0); - OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4); - OUT_BATCH(0x0); - OUT_BATCH(0x0); - OUT_BATCH(0x0); - OUT_BATCH(R300_RGBA_OUT); - - OUT_BATCH_REGVAL(R300_US_ALU_RGB_INST_0, - FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); - OUT_BATCH_REGVAL(R300_US_ALU_RGB_ADDR_0, - FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); - OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_INST_0, - FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); - OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_ADDR_0, - FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); - END_BATCH(); - } else { - struct radeon_state_atom r500fp; - uint32_t _cmd[10]; - - R300_STATECHANGE(r300, fp); - R300_STATECHANGE(r300, r500fp); - - BEGIN_BATCH(7); - OUT_BATCH_REGSEQ(R500_US_CONFIG, 2); - OUT_BATCH(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); - OUT_BATCH(0x0); - OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3); - OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1)); - OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1)); - OUT_BATCH(R500_US_CODE_OFFSET_ADDR(0)); - END_BATCH(); - - r500fp.check = check_r500fp; - r500fp.cmd = _cmd; - r500fp.cmd[0] = cmdr500fp(r300->radeon.radeonScreen, 0, 1, 0, 0); - r500fp.cmd[1] = R500_INST_TYPE_OUT | - R500_INST_TEX_SEM_WAIT | - R500_INST_LAST | - R500_INST_RGB_OMASK_R | - R500_INST_RGB_OMASK_G | - R500_INST_RGB_OMASK_B | - R500_INST_ALPHA_OMASK | - R500_INST_RGB_CLAMP | - R500_INST_ALPHA_CLAMP; - r500fp.cmd[2] = R500_RGB_ADDR0(0) | - R500_RGB_ADDR1(0) | - R500_RGB_ADDR1_CONST | - R500_RGB_ADDR2(0) | - R500_RGB_ADDR2_CONST; - r500fp.cmd[3] = R500_ALPHA_ADDR0(0) | - R500_ALPHA_ADDR1(0) | - R500_ALPHA_ADDR1_CONST | - R500_ALPHA_ADDR2(0) | - R500_ALPHA_ADDR2_CONST; - r500fp.cmd[4] = R500_ALU_RGB_SEL_A_SRC0 | - R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_G | - R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRC0 | - R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_B_SWIZ_B_G | - R500_ALU_RGB_G_SWIZ_B_B; - r500fp.cmd[5] = R500_ALPHA_OP_CMP | - R500_ALPHA_SWIZ_A_A | - R500_ALPHA_SWIZ_B_A; - r500fp.cmd[6] = R500_ALU_RGBA_OP_CMP | - R500_ALU_RGBA_R_SWIZ_0 | - R500_ALU_RGBA_G_SWIZ_0 | - R500_ALU_RGBA_B_SWIZ_0 | - R500_ALU_RGBA_A_SWIZ_0; - - r500fp.cmd[7] = 0; - if (r300->radeon.radeonScreen->kernel_mm) { - emit_r500fp(ctx, &r500fp); - } else { - int dwords = r500fp.check(ctx,&r500fp); - BEGIN_BATCH_NO_AUTOSTATE(dwords); - OUT_BATCH_TABLE(r500fp.cmd, dwords); - END_BATCH(); - } - - } - - BEGIN_BATCH(2); - OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); - END_BATCH(); - - if (has_tcl) { - vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | - (5 << R300_PVS_NUM_CNTLRS_SHIFT) | - (12 << R300_VF_MAX_VTX_NUM_SHIFT)); - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) - vap_cntl |= R500_TCL_STATE_OPTIMIZATION; - } else { - vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | - (5 << R300_PVS_NUM_CNTLRS_SHIFT) | - (5 << R300_VF_MAX_VTX_NUM_SHIFT)); - } - - if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) - vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT); - else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) - vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT); - else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420)) - vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT); - else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580)) - vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT); - else - vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT); - - R300_STATECHANGE(r300, vap_cntl); - - BEGIN_BATCH(2); - OUT_BATCH_REGVAL(R300_VAP_CNTL, vap_cntl); - END_BATCH(); - - if (has_tcl) { - struct radeon_state_atom vpu; - uint32_t _cmd[10]; - R300_STATECHANGE(r300, pvs); - R300_STATECHANGE(r300, vap_flush); - R300_STATECHANGE(r300, vpi); - - BEGIN_BATCH(4); - OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3); - OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) | - (0 << R300_PVS_XYZW_VALID_INST_SHIFT) | - (1 << R300_PVS_LAST_INST_SHIFT)); - OUT_BATCH((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | - (0 << R300_PVS_MAX_CONST_ADDR_SHIFT)); - OUT_BATCH(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT); - END_BATCH(); - - vpu.check = check_vpu; - vpu.cmd = _cmd; - vpu.cmd[0] = cmdvpu(r300->radeon.radeonScreen, 0, 2); - - vpu.cmd[1] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, - 0, 0xf, PVS_DST_REG_OUT); - vpu.cmd[2] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, - PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, - PVS_SRC_REG_INPUT, NEGATE_NONE); - vpu.cmd[3] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, - PVS_SRC_SELECT_FORCE_0, - PVS_SRC_SELECT_FORCE_0, - PVS_SRC_SELECT_FORCE_0, - PVS_SRC_REG_INPUT, NEGATE_NONE); - vpu.cmd[4] = 0x0; - - vpu.cmd[5] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, - PVS_DST_REG_OUT); - vpu.cmd[6] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, - PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, - PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, - NEGATE_NONE); - vpu.cmd[7] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, - PVS_SRC_SELECT_FORCE_0, - PVS_SRC_SELECT_FORCE_0, - PVS_SRC_SELECT_FORCE_0, - PVS_SRC_REG_INPUT, NEGATE_NONE); - vpu.cmd[8] = 0x0; - - if (r300->radeon.radeonScreen->kernel_mm) { - int dwords = r300->hw.vap_flush.check(ctx,&r300->hw.vap_flush); - BEGIN_BATCH_NO_AUTOSTATE(dwords); - OUT_BATCH_TABLE(r300->hw.vap_flush.cmd, dwords); - END_BATCH(); - emit_vpu(ctx, &vpu); - } else { - int dwords = vpu.check(ctx,&vpu); - BEGIN_BATCH_NO_AUTOSTATE(dwords); - OUT_BATCH_TABLE(vpu.cmd, dwords); - END_BATCH(); - } - - } -} - -static int r300KernelClear(GLcontext *ctx, GLuint flags) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon); - struct radeon_framebuffer *rfb = dPriv->driverPrivate; - struct radeon_renderbuffer *rrb; - struct radeon_renderbuffer *rrbd; - int bits = 0, ret; - - /* Make sure it fits there. */ - radeon_cs_space_reset_bos(r300->radeon.cmdbuf.cs); - - if (flags & BUFFER_BIT_COLOR0) { - rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_COLOR0); - radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, - rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM); - } - - if (flags & BUFFER_BIT_FRONT_LEFT) { - rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_FRONT_LEFT); - radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, - rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM); - } - - if (flags & BUFFER_BIT_BACK_LEFT) { - rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_BACK_LEFT); - radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, - rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM); - } - - rrbd = radeon_get_renderbuffer(&rfb->base, BUFFER_DEPTH); - if (rrbd) { - radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, - rrbd->bo, 0, RADEON_GEM_DOMAIN_VRAM); - } - - ret = radeon_cs_space_check(r300->radeon.cmdbuf.cs); - if (ret) - return -1; - - rcommonEnsureCmdBufSpace(&r300->radeon, 421 * 3, __FUNCTION__); - if (flags || bits) - r300EmitClearState(ctx); - - rrbd = radeon_get_renderbuffer(&rfb->base, BUFFER_DEPTH); - if (rrbd && (flags & BUFFER_BIT_DEPTH)) - bits |= CLEARBUFFER_DEPTH; - - if (rrbd && (flags & BUFFER_BIT_STENCIL)) - bits |= CLEARBUFFER_STENCIL; - - if (flags & BUFFER_BIT_COLOR0) { - rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_COLOR0); - r300ClearBuffer(r300, CLEARBUFFER_COLOR, rrb, NULL); - bits = 0; - } - - if (flags & BUFFER_BIT_FRONT_LEFT) { - rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_FRONT_LEFT); - r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd); - bits = 0; - } - - if (flags & BUFFER_BIT_BACK_LEFT) { - rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_BACK_LEFT); - r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd); - bits = 0; - } - - if (bits) - r300ClearBuffer(r300, bits, NULL, rrbd); - - COMMIT_BATCH(); - return 0; -} - -/** - * Buffer clear - */ -static void r300Clear(GLcontext * ctx, GLbitfield mask) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon); - const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask); - GLbitfield swrast_mask = 0, tri_mask = 0; - int i, ret; - struct gl_framebuffer *fb = ctx->DrawBuffer; - - if (RADEON_DEBUG & RADEON_IOCTL) - fprintf(stderr, "r300Clear\n"); - - if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { - LOCK_HARDWARE(&r300->radeon); - UNLOCK_HARDWARE(&r300->radeon); - if (dPriv->numClipRects == 0) - return; - } - - /* Flush swtcl vertices if necessary, because we will change hardware - * state during clear. See also the state-related comment in - * r300EmitClearState. - */ - R300_NEWPRIM(r300); - - if (colorMask == ~0) - tri_mask |= (mask & BUFFER_BITS_COLOR); - else - tri_mask |= (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT)); - - - /* HW stencil */ - if (mask & BUFFER_BIT_STENCIL) { - tri_mask |= BUFFER_BIT_STENCIL; - } - - /* HW depth */ - if (mask & BUFFER_BIT_DEPTH) { - tri_mask |= BUFFER_BIT_DEPTH; - } - - /* If we're doing a tri pass for depth/stencil, include a likely color - * buffer with it. - */ - - for (i = 0; i < BUFFER_COUNT; i++) { - GLuint bufBit = 1 << i; - if ((tri_mask) & bufBit) { - if (!fb->Attachment[i].Renderbuffer->ClassID) { - tri_mask &= ~bufBit; - swrast_mask |= bufBit; - } - } - } - - /* SW fallback clearing */ - swrast_mask = mask & ~tri_mask; - - ret = 0; - if (tri_mask) { - if (r300->radeon.radeonScreen->kernel_mm) - radeonUserClear(ctx, tri_mask); - else { - /* if kernel clear fails due to size restraints fallback */ - ret = r300KernelClear(ctx, tri_mask); - if (ret < 0) - swrast_mask |= tri_mask; - } - } - - if (swrast_mask) { - if (RADEON_DEBUG & RADEON_FALLBACKS) - fprintf(stderr, "%s: swrast clear, mask: %x\n", - __FUNCTION__, swrast_mask); - _swrast_Clear(ctx, swrast_mask); - } -} - -void r300InitIoctlFuncs(struct dd_function_table *functions) -{ - functions->Clear = r300Clear; - functions->Finish = radeonFinish; - functions->Flush = radeonFlush; -} diff --git a/r300/r300_ioctl.h b/r300/r300_ioctl.h deleted file mode 100644 index 3abfa71..0000000 --- a/r300/r300_ioctl.h +++ /dev/null @@ -1,44 +0,0 @@ -/* -Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. - -The Weather Channel (TM) funded Tungsten Graphics to develop the -initial release of the Radeon 8500 driver under the XFree86 license. -This notice must be preserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - * Nicolai Haehnle <prefect_@gmx.net> - */ - -#ifndef __R300_IOCTL_H__ -#define __R300_IOCTL_H__ - -#include "r300_context.h" -#include "radeon_drm.h" - -extern void r300InitIoctlFuncs(struct dd_function_table *functions); - -#endif /* __R300_IOCTL_H__ */ diff --git a/r300/r300_reg.h b/r300/r300_reg.h index ea684e7..ac93563 100644 --- a/r300/r300_reg.h +++ b/r300/r300_reg.h @@ -230,6 +230,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_DATA_TYPE_SHORT_4 7 # define R300_DATA_TYPE_VECTOR_3_TTT 8 # define R300_DATA_TYPE_VECTOR_3_EET 9 +# define R300_DATA_TYPE_FLT16_2 11 +# define R300_DATA_TYPE_FLT16_4 12 + # define R300_SKIP_DWORDS_SHIFT 4 # define R300_DST_VEC_LOC_SHIFT 8 # define R300_LAST_VEC (1 << 13) @@ -479,7 +482,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_PVS_FIRST_INST_SHIFT 0 # define R300_PVS_XYZW_VALID_INST_SHIFT 10 # define R300_PVS_LAST_INST_SHIFT 20 -/* Addresses are relative the the vertex program parameters area. */ +/* Addresses are relative to the vertex program parameters area. */ #define R300_VAP_PVS_CONST_CNTL 0x22D4 # define R300_PVS_CONST_BASE_OFFSET_SHIFT 0 # define R300_PVS_MAX_CONST_ADDR_SHIFT 16 @@ -1757,7 +1760,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * The destination register index is in FPI1 (color) and FPI3 (alpha) * together with enable bits. * There are separate enable bits for writing into temporary registers - * (DSTC_REG_* /DSTA_REG) and and program output registers (DSTC_OUTPUT_* + * (DSTC_REG_* /DSTA_REG) and program output registers (DSTC_OUTPUT_* * /DSTA_OUTPUT). You can write to both at once, or not write at all (the * same index must be used for both). * diff --git a/r300/r300_render.c b/r300/r300_render.c index 4ae593c..9596131 100644 --- a/r300/r300_render.c +++ b/r300/r300_render.c @@ -53,7 +53,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_render.h" #include "main/glheader.h" -#include "main/state.h" #include "main/imports.h" #include "main/enums.h" #include "main/macros.h" @@ -65,15 +64,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "swrast_setup/swrast_setup.h" #include "vbo/vbo.h" #include "vbo/vbo_split.h" -#include "tnl/tnl.h" -#include "tnl/t_vp_build.h" #include "r300_context.h" -#include "r300_ioctl.h" #include "r300_state.h" #include "r300_reg.h" -#include "r300_tex.h" #include "r300_emit.h" -#include "r300_fragprog_common.h" #include "r300_swtcl.h" /** @@ -446,7 +440,7 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) if (mode) { if ((fallback_warn & bit) == 0) { if (RADEON_DEBUG & RADEON_FALLBACKS) - _mesa_fprintf(stderr, "WARNING! Falling back to software for %s\n", getFallbackString(bit)); + fprintf(stderr, "WARNING! Falling back to software for %s\n", getFallbackString(bit)); fallback_warn |= bit; } rmesa->fallback |= bit; diff --git a/r300/r300_shader.c b/r300/r300_shader.c index a4f9db1..9c24166 100644 --- a/r300/r300_shader.c +++ b/r300/r300_shader.c @@ -39,7 +39,7 @@ static void freeFragProgCache(GLcontext *ctx, struct r300_fragment_program_cont while (fp) { tmp = fp->next; rc_constants_destroy(&fp->code.constants); - _mesa_free(fp); + free(fp); fp = tmp; } } @@ -52,7 +52,7 @@ static void freeVertProgCache(GLcontext *ctx, struct r300_vertex_program_cont *c tmp = vp->next; rc_constants_destroy(&vp->code.constants); _mesa_reference_vertprog(ctx, &vp->Base, NULL); - _mesa_free(vp); + free(vp); vp = tmp; } } @@ -98,7 +98,7 @@ static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog) _mesa_delete_program(ctx, prog); } -static void +static GLboolean r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) { struct r300_vertex_program_cont *vp = (struct r300_vertex_program_cont *)prog; @@ -116,7 +116,10 @@ r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) } /* need this for tcl fallbacks */ - _tnl_program_string(ctx, target, prog); + (void) _tnl_program_string(ctx, target, prog); + + /* XXX check if program is legal, within limits */ + return GL_TRUE; } static GLboolean diff --git a/r300/r300_state.c b/r300/r300_state.c index ac20c08..5979ded 100644 --- a/r300/r300_state.c +++ b/r300/r300_state.c @@ -55,17 +55,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/t_vp_build.h" #include "r300_context.h" -#include "r300_ioctl.h" #include "r300_state.h" #include "r300_reg.h" #include "r300_emit.h" -#include "r300_tex.h" #include "r300_fragprog_common.h" #include "r300_render.h" #include "r300_vertprog.h" -#include "drirenderbuffer.h" - static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4]) { r300ContextPtr rmesa = R300_CONTEXT(ctx); @@ -370,7 +366,6 @@ static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) p = (GLint) plane - (GLint) GL_CLIP_PLANE0; ip = (GLint *)ctx->Transform._ClipUserPlane[p]; - R300_STATECHANGE( rmesa, vap_flush ); R300_STATECHANGE( rmesa, vpucp[p] ); rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0]; rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1]; @@ -798,12 +793,14 @@ static void r300PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * pa R300_STATECHANGE(r300, ga_point_minmax); r300->hw.ga_point_minmax.cmd[1] &= ~R300_GA_POINT_MINMAX_MIN_MASK; r300->hw.ga_point_minmax.cmd[1] |= (GLuint)(ctx->Point.MinSize * 6.0); + r300PointSize(ctx, ctx->Point.Size); break; case GL_POINT_SIZE_MAX: R300_STATECHANGE(r300, ga_point_minmax); r300->hw.ga_point_minmax.cmd[1] &= ~R300_GA_POINT_MINMAX_MAX_MASK; r300->hw.ga_point_minmax.cmd[1] |= (GLuint)(ctx->Point.MaxSize * 6.0) << R300_GA_POINT_MINMAX_MAX_SHIFT; + r300PointSize(ctx, ctx->Point.Size); break; case GL_POINT_DISTANCE_ATTENUATION: break; @@ -998,7 +995,7 @@ static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, static void r300UpdateWindow(GLcontext * ctx) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); + __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0; GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0; const GLfloat *v = ctx->Viewport._WindowMap.m; @@ -1051,7 +1048,7 @@ static void r300DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) void r300UpdateViewportOffset(GLcontext * ctx) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); + __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLfloat xoffset = (GLfloat) dPriv->x; GLfloat yoffset = (GLfloat) dPriv->y + dPriv->h; const GLfloat *v = ctx->Viewport._WindowMap.m; @@ -1312,7 +1309,7 @@ static void r300SetupTextures(GLcontext * ctx) fprintf(stderr, "Aiiee ! mtu=%d is greater than R300_MAX_TEXTURE_UNITS=%d\n", mtu, R300_MAX_TEXTURE_UNITS); - _mesa_exit(-1); + exit(-1); } /* We cannot let disabled tmu offsets pass DRM */ @@ -1766,12 +1763,11 @@ static void r300ResetHwState(r300ContextPtr r300) if (RADEON_DEBUG & RADEON_STATE) fprintf(stderr, "%s\n", __FUNCTION__); - radeon_firevertices(&r300->radeon); - r300ColorMask(ctx, - ctx->Color.ColorMask[RCOMP], - ctx->Color.ColorMask[GCOMP], - ctx->Color.ColorMask[BCOMP], ctx->Color.ColorMask[ACOMP]); + ctx->Color.ColorMask[0][RCOMP], + ctx->Color.ColorMask[0][GCOMP], + ctx->Color.ColorMask[0][BCOMP], + ctx->Color.ColorMask[0][ACOMP]); r300Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test); r300DepthMask(ctx, ctx->Depth.Mask); @@ -1973,7 +1969,7 @@ void r300UpdateShaders(r300ContextPtr rmesa) /* should only happenen once, just after context is created */ /* TODO: shouldn't we fallback to sw here? */ if (!ctx->FragmentProgram._Current) { - _mesa_fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); + fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); return; } @@ -1988,23 +1984,6 @@ void r300UpdateShaders(r300ContextPtr rmesa) if (rmesa->options.hw_tcl_enabled) { struct r300_vertex_program *vp; - if (rmesa->radeon.NewGLState) { - int i; - for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { - rmesa->temp_attrib[i] = - TNL_CONTEXT(ctx)->vb.AttribPtr[i]; - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = - &rmesa->dummy_attrib[i]; - } - - _tnl_UpdateFixedFunctionProgram(ctx); - - for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = - rmesa->temp_attrib[i]; - } - } - vp = r300SelectAndTranslateVertexShader(ctx); r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, vp->error); @@ -2040,7 +2019,7 @@ static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, GLuint index, } case RC_STATE_R300_WINDOW_DIMENSION: { - __DRIdrawablePrivate * drawable = radeon_get_drawable(&rmesa->radeon); + __DRIdrawable * drawable = radeon_get_drawable(&rmesa->radeon); buffer[0] = drawable->w * 0.5f; /* width*0.5 */ buffer[1] = drawable->h * 0.5f; /* height*0.5 */ buffer[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */ diff --git a/r300/r300_swtcl.c b/r300/r300_swtcl.c index ee2c71e..4dcc7cb 100644 --- a/r300/r300_swtcl.c +++ b/r300/r300_swtcl.c @@ -124,7 +124,7 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_ } if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) { - VB->AttribPtr[VERT_ATTRIB_GENERIC0] = VB->ColorPtr[1]; + VB->AttribPtr[VERT_ATTRIB_GENERIC0] = VB->BackfaceColorPtr; OutputsWritten |= 1 << VERT_RESULT_BFC0; #if MESA_LITTLE_ENDIAN EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_RGBA ); @@ -134,7 +134,7 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_ ADD_ATTR(VERT_ATTRIB_GENERIC0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW, 1); #endif if (fp_reads & FRAG_BIT_COL1) { - VB->AttribPtr[VERT_ATTRIB_GENERIC1] = VB->SecondaryColorPtr[1]; + VB->AttribPtr[VERT_ATTRIB_GENERIC1] = VB->BackfaceSecondaryColorPtr; GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); OutputsWritten |= 1 << VERT_RESULT_BFC1; #if MESA_LITTLE_ENDIAN @@ -159,7 +159,7 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_ int tex_id = rmesa->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0; VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS]; - VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_POS]; + VB->AttribPtr[_TNL_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS]; RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id); } @@ -167,7 +167,7 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_ int tex_id = rmesa->selected_fp->fog_attr - FRAG_ATTRIB_TEX0; VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG]; - VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG]; + VB->AttribPtr[_TNL_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG]; RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id); } @@ -180,7 +180,7 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_ GLuint swiz, format, hw_format; for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { if (fp_reads & FRAG_BIT_TEX(i)) { - switch (VB->TexCoordPtr[i]->size) { + switch (VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size) { case 1: format = EMIT_1F; hw_format = R300_DATA_TYPE_FLOAT_1; @@ -215,7 +215,7 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_ if (first_free_tex >= ctx->Const.MaxTextureUnits) { fprintf(stderr, "\tout of free texcoords to write fog coordinate\n"); - _mesa_exit(-1); + exit(-1); } R300_NEWPRIM(rmesa); @@ -364,7 +364,6 @@ static struct { #define DO_POINTS 1 #define DO_FULL_QUAD 1 -#define HAVE_RGBA 1 #define HAVE_SPEC 1 #define HAVE_BACK_COLORS 0 #define HAVE_HW_FLATSHADE 1 @@ -665,11 +664,11 @@ void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset) r300EmitCacheFlush(rmesa); radeonEmitState(&rmesa->radeon); - r300_emit_scissor(ctx); + r300_emit_scissor(ctx); r300EmitVertexAOS(rmesa, - rmesa->radeon.swtcl.vertex_size, - first_elem(&rmesa->radeon.dma.reserved)->bo, - current_offset); + rmesa->radeon.swtcl.vertex_size, + rmesa->radeon.swtcl.bo, + current_offset); r300EmitVbufPrim(rmesa, rmesa->radeon.swtcl.hw_primitive, diff --git a/r300/r300_tex.c b/r300/r300_tex.c index 726b3ff..8dd8507 100644 --- a/r300/r300_tex.c +++ b/r300/r300_tex.c @@ -41,19 +41,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/mipmap.h" #include "main/simple_list.h" #include "main/texstore.h" -#include "main/teximage.h" #include "main/texobj.h" #include "texmem.h" #include "r300_context.h" -#include "r300_state.h" -#include "r300_ioctl.h" #include "radeon_mipmap_tree.h" #include "r300_tex.h" -#include "xmlpool.h" - static unsigned int translate_wrap_mode(GLenum wrapmode) { @@ -216,7 +211,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, break; case GL_TEXTURE_BORDER_COLOR: - r300SetTexBorderColor(t, texObj->BorderColor); + r300SetTexBorderColor(t, texObj->BorderColor.f); break; case GL_TEXTURE_BASE_LEVEL: @@ -308,12 +303,12 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, /* Initialize hardware state */ r300UpdateTexWrap(t); r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy); - r300SetTexBorderColor(t, t->base.BorderColor); + r300SetTexBorderColor(t, t->base.BorderColor.f); return &t->base; } -void r300InitTextureFuncs(struct dd_function_table *functions) +void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions) { /* Note: we only plug in the functions we implement in the driver * since _mesa_init_driver_functions() was already called. @@ -341,6 +336,11 @@ void r300InitTextureFuncs(struct dd_function_table *functions) functions->CompressedTexImage2D = radeonCompressedTexImage2D; functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; + if (radeon->radeonScreen->kernel_mm) { + functions->CopyTexImage2D = radeonCopyTexImage2D; + functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; + } + functions->GenerateMipmap = radeonGenerateMipmap; driInitTextureFormats(); diff --git a/r300/r300_tex.h b/r300/r300_tex.h index 8a653ea..9694e70 100644 --- a/r300/r300_tex.h +++ b/r300/r300_tex.h @@ -49,6 +49,8 @@ extern void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname, extern GLboolean r300ValidateBuffers(GLcontext * ctx); -extern void r300InitTextureFuncs(struct dd_function_table *functions); +extern void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions); + +int32_t r300TranslateTexFormat(gl_format mesaFormat); #endif /* __r300_TEX_H__ */ diff --git a/r300/r300_texstate.c b/r300/r300_texstate.c index bbe8b1e..4ba6740 100644 --- a/r300/r300_texstate.c +++ b/r300/r300_texstate.c @@ -45,20 +45,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/simple_list.h" #include "r300_context.h" -#include "r300_state.h" -#include "r300_ioctl.h" #include "radeon_mipmap_tree.h" #include "r300_tex.h" #include "r300_reg.h" -#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5 \ - || ((f) >= MESA_FORMAT_RGBA_FLOAT32 && \ - (f) <= MESA_FORMAT_INTENSITY_FLOAT16)) \ - && tx_table[f].flag ) - -#define _ASSIGN(entry, format) \ - [ MESA_FORMAT_ ## entry ] = { format, 0, 1} - /* * Note that the _REV formats are the same as the non-REV formats. This is * because the REV and non-REV formats are identical as a byte string, but @@ -68,67 +58,119 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * identically. -- paulus */ -static const struct tx_table { - GLuint format, filter, flag; -} tx_table[] = { - /* *INDENT-OFF* */ +int32_t r300TranslateTexFormat(gl_format mesaFormat) +{ + switch (mesaFormat) + { #ifdef MESA_LITTLE_ENDIAN - _ASSIGN(RGBA8888, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8)), - _ASSIGN(RGBA8888_REV, R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)), - _ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)), - _ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)), + case MESA_FORMAT_RGBA8888: + return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8); + case MESA_FORMAT_RGBA8888_REV: + return R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8); + case MESA_FORMAT_ARGB8888: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); + case MESA_FORMAT_ARGB8888_REV: + return R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8); #else - _ASSIGN(RGBA8888, R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)), - _ASSIGN(RGBA8888_REV, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8)), - _ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)), - _ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)), + case MESA_FORMAT_RGBA8888: + return R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8); + case MESA_FORMAT_RGBA8888_REV: + return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8); + case MESA_FORMAT_ARGB8888: + return R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8); + case MESA_FORMAT_ARGB8888_REV: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); #endif - _ASSIGN(XRGB8888, R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)), - _ASSIGN(RGB888, R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)), - _ASSIGN(RGB565, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)), - _ASSIGN(RGB565_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)), - _ASSIGN(ARGB4444, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)), - _ASSIGN(ARGB4444_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)), - _ASSIGN(ARGB1555, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)), - _ASSIGN(ARGB1555_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)), - _ASSIGN(AL88, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)), - _ASSIGN(AL88_REV, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)), - _ASSIGN(RGB332, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z3Y3X2)), - _ASSIGN(A8, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8)), - _ASSIGN(L8, R300_EASY_TX_FORMAT(X, X, X, ONE, X8)), - _ASSIGN(I8, R300_EASY_TX_FORMAT(X, X, X, X, X8)), - _ASSIGN(CI8, R300_EASY_TX_FORMAT(X, X, X, X, X8)), - _ASSIGN(YCBCR, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE), - _ASSIGN(YCBCR_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE), - _ASSIGN(RGB_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1)), - _ASSIGN(RGBA_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1)), - _ASSIGN(RGBA_DXT3, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3)), - _ASSIGN(RGBA_DXT5, R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5)), - _ASSIGN(RGBA_FLOAT32, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R32G32B32A32)), - _ASSIGN(RGBA_FLOAT16, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16)), - _ASSIGN(RGB_FLOAT32, 0xffffffff), - _ASSIGN(RGB_FLOAT16, 0xffffffff), - _ASSIGN(ALPHA_FLOAT32, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I32)), - _ASSIGN(ALPHA_FLOAT16, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I16)), - _ASSIGN(LUMINANCE_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I32)), - _ASSIGN(LUMINANCE_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I16)), - _ASSIGN(LUMINANCE_ALPHA_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I32A32)), - _ASSIGN(LUMINANCE_ALPHA_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I16A16)), - _ASSIGN(INTENSITY_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, X, FL_I32)), - _ASSIGN(INTENSITY_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, X, FL_I16)), - _ASSIGN(Z16, R300_EASY_TX_FORMAT(X, X, X, X, X16)), - _ASSIGN(Z24_S8, R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8)), - _ASSIGN(S8_Z24, R300_EASY_TX_FORMAT(Y, Y, Y, Y, X24_Y8)), - _ASSIGN(Z32, R300_EASY_TX_FORMAT(X, X, X, X, X32)), - /* EXT_texture_sRGB */ - _ASSIGN(SRGBA8, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA), - _ASSIGN(SLA8, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8) | R300_TX_FORMAT_GAMMA), - _ASSIGN(SL8, R300_EASY_TX_FORMAT(X, X, X, ONE, X8) | R300_TX_FORMAT_GAMMA), - /* *INDENT-ON* */ + case MESA_FORMAT_XRGB8888: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); + case MESA_FORMAT_RGB888: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); + case MESA_FORMAT_RGB565: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); + case MESA_FORMAT_RGB565_REV: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); + case MESA_FORMAT_ARGB4444: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4); + case MESA_FORMAT_ARGB4444_REV: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4); + case MESA_FORMAT_ARGB1555: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5); + case MESA_FORMAT_ARGB1555_REV: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5); + case MESA_FORMAT_AL88: + return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8); + case MESA_FORMAT_AL88_REV: + return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8); + case MESA_FORMAT_RGB332: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z3Y3X2); + case MESA_FORMAT_A8: + return R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8); + case MESA_FORMAT_L8: + return R300_EASY_TX_FORMAT(X, X, X, ONE, X8); + case MESA_FORMAT_I8: + return R300_EASY_TX_FORMAT(X, X, X, X, X8); + case MESA_FORMAT_CI8: + return R300_EASY_TX_FORMAT(X, X, X, X, X8); + case MESA_FORMAT_YCBCR: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE; + case MESA_FORMAT_YCBCR_REV: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE; + case MESA_FORMAT_RGB_DXT1: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1); + case MESA_FORMAT_RGBA_DXT1: + return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1); + case MESA_FORMAT_RGBA_DXT3: + return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3); + case MESA_FORMAT_RGBA_DXT5: + return R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5); + case MESA_FORMAT_RGBA_FLOAT32: + return R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R32G32B32A32); + case MESA_FORMAT_RGBA_FLOAT16: + return R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16); + case MESA_FORMAT_ALPHA_FLOAT32: + return R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I32); + case MESA_FORMAT_ALPHA_FLOAT16: + return R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I16); + case MESA_FORMAT_LUMINANCE_FLOAT32: + return R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I32); + case MESA_FORMAT_LUMINANCE_FLOAT16: + return R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I16); + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32: + return R300_EASY_TX_FORMAT(X, X, X, Y, FL_I32A32); + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16: + return R300_EASY_TX_FORMAT(X, X, X, Y, FL_I16A16); + case MESA_FORMAT_INTENSITY_FLOAT32: + return R300_EASY_TX_FORMAT(X, X, X, X, FL_I32); + case MESA_FORMAT_INTENSITY_FLOAT16: + return R300_EASY_TX_FORMAT(X, X, X, X, FL_I16); + case MESA_FORMAT_Z16: + return R300_EASY_TX_FORMAT(X, X, X, X, X16); + case MESA_FORMAT_Z24_S8: + return R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8); + case MESA_FORMAT_S8_Z24: + return R300_EASY_TX_FORMAT(Y, Y, Y, Y, X24_Y8); + case MESA_FORMAT_Z32: + return R300_EASY_TX_FORMAT(X, X, X, X, X32); + /* EXT_texture_sRGB */ + case MESA_FORMAT_SRGBA8: + return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA; + case MESA_FORMAT_SLA8: + return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8) | R300_TX_FORMAT_GAMMA; + case MESA_FORMAT_SL8: + return R300_EASY_TX_FORMAT(X, X, X, ONE, X8) | R300_TX_FORMAT_GAMMA; + case MESA_FORMAT_SRGB_DXT1: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1) | R300_TX_FORMAT_GAMMA; + case MESA_FORMAT_SRGBA_DXT1: + return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1) | R300_TX_FORMAT_GAMMA; + case MESA_FORMAT_SRGBA_DXT3: + return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3) | R300_TX_FORMAT_GAMMA; + case MESA_FORMAT_SRGBA_DXT5: + return R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5) | R300_TX_FORMAT_GAMMA; + default: + return -1; + } }; -#undef _ASSIGN - void r300SetDepthTexMode(struct gl_texture_object *tObj) { static const GLuint formats[3][3] = { @@ -205,19 +247,18 @@ static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t) const struct gl_texture_image *firstImage; firstImage = t->base.Image[0][t->minLod]; - if (!t->image_override - && VALID_FORMAT(firstImage->TexFormat)) { + if (!t->image_override) { if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { r300SetDepthTexMode(&t->base); } else { - t->pp_txformat = tx_table[firstImage->TexFormat].format; + int32_t txformat = r300TranslateTexFormat(firstImage->TexFormat); + if (txformat < 0) { + _mesa_problem(rmesa->radeon.glCtx, "%s: Invalid format %s", + __FUNCTION__, _mesa_get_format_name(firstImage->TexFormat)); + exit(1); + } + t->pp_txformat = (uint32_t) txformat; } - - t->pp_txfilter |= tx_table[firstImage->TexFormat].filter; - } else if (!t->image_override) { - _mesa_problem(NULL, "unexpected texture format in %s", - __FUNCTION__); - return; } if (t->image_override && t->bo) @@ -357,18 +398,15 @@ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, switch (depth) { case 32: t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); - t->pp_txfilter |= tx_table[2].filter; pitch_val /= 4; break; case 24: default: t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); - t->pp_txfilter |= tx_table[4].filter; pitch_val /= 4; break; case 16: t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); - t->pp_txfilter |= tx_table[5].filter; pitch_val /= 2; break; } @@ -377,7 +415,7 @@ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, t->pp_txpitch |= pitch_val; } -void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format, __DRIdrawable *dPriv) +void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format, __DRIdrawable *dPriv) { struct gl_texture_unit *texUnit; struct gl_texture_object *texObj; @@ -393,7 +431,7 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo type = GL_BGRA; format = GL_UNSIGNED_BYTE; - internalFormat = (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT ? 3 : 4); + internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); radeon = pDRICtx->driverPrivate; rmesa = pDRICtx->driverPrivate; @@ -409,18 +447,7 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo return; } - radeon_update_renderbuffers(pDRICtx, dPriv); - /* back & depth buffer are useless free them right away */ - rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } - rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } + radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE); rb = rfb->color_rb[0]; if (rb->bo == NULL) { /* Failed to BO for the buffer */ @@ -454,22 +481,19 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo pitch_val = rb->pitch; switch (rb->cpp) { case 4: - if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT) + if (texture_format == __DRI_TEXTURE_FORMAT_RGB) t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); else t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); - t->pp_txfilter |= tx_table[2].filter; pitch_val /= 4; break; case 3: default: t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); - t->pp_txfilter |= tx_table[4].filter; pitch_val /= 4; break; case 2: t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); - t->pp_txfilter |= tx_table[5].filter; pitch_val /= 2; break; } @@ -496,5 +520,5 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) { - r300SetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv); + r300SetTexBuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv); } diff --git a/r300/r300_vertprog.c b/r300/r300_vertprog.c index c2f96af..129004f 100644 --- a/r300/r300_vertprog.c +++ b/r300/r300_vertprog.c @@ -34,7 +34,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "shader/program.h" #include "shader/programopt.h" #include "shader/prog_instruction.h" -#include "shader/prog_optimize.h" #include "shader/prog_parameter.h" #include "shader/prog_print.h" #include "shader/prog_statevars.h" @@ -80,6 +79,7 @@ static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_progra break; } + assert(src); dst[4*i] = src[0]; dst[4*i + 1] = src[1]; dst[4*i + 2] = src[2]; @@ -234,9 +234,9 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, struct r300_vertex_program *vp; struct r300_vertex_program_compiler compiler; - vp = _mesa_calloc(sizeof(*vp)); - vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base); - _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); + vp = calloc(1, sizeof(*vp)); + vp->Base = _mesa_clone_vertex_program(ctx, mesa_vp); + memcpy(&vp->key, wanted_key, sizeof(vp->key)); rc_init(&compiler.Base); compiler.Base.Debug = (RADEON_DEBUG & RADEON_VERTS) ? GL_TRUE : GL_FALSE; @@ -312,13 +312,13 @@ struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx) r300SelectAndTranslateFragmentShader(ctx); } + assert(r300->selected_fp); wanted_key.FpReads = r300->selected_fp->InputsRead; wanted_key.FogAttr = r300->selected_fp->fog_attr; wanted_key.WPosAttr = r300->selected_fp->wpos_attr; for (vp = vpc->progs; vp; vp = vp->next) { - if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) - == 0) { + if (memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == 0) { return r300->selected_vp = vp; } } @@ -342,8 +342,6 @@ static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_ver assert((code->length > 0) && (code->length % 4 == 0)); - R300_STATECHANGE( r300, vap_flush ); - switch ((dest >> 8) & 0xf) { case 0: R300_STATECHANGE(r300, vpi); @@ -365,7 +363,7 @@ static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_ver break; default: fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest); - _mesa_exit(-1); + exit(-1); } } @@ -381,7 +379,7 @@ void r300SetupVertexProgram(r300ContextPtr rmesa) ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; - R300_STATECHANGE(rmesa, vap_flush); + R300_STATECHANGE(rmesa, vap_cntl); R300_STATECHANGE(rmesa, vpp); param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); bump_vpu_count(rmesa->hw.vpp.cmd, param_count); diff --git a/r600/Makefile.am b/r600/Makefile.am index c2ce9c8..f0378ba 100644 --- a/r600/Makefile.am +++ b/r600/Makefile.am @@ -21,6 +21,7 @@ r600_dri_la_SOURCES = \ ../radeon/radeon_span.c \ ../radeon/radeon_texture.c \ ../radeon/radeon_queryobj.c \ + ../radeon/radeon_tex_copy.c \ ../radeon/radeon_screen.c \ r600_context.c \ r600_cmdbuf.c \ @@ -38,6 +39,7 @@ r600_dri_la_SOURCES = \ r700_render.c \ r600_tex.c \ r600_texstate.c \ + r600_blit.c \ r700_debug.c if HAVE_LIBDRM_RADEON diff --git a/r600/r600_blit.c b/r600/r600_blit.c new file mode 100644 index 0000000..244fdc4 --- /dev/null +++ b/r600/r600_blit.c @@ -0,0 +1,1659 @@ +/* + * Copyright (C) 2009 Advanced Micro Devices, Inc. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_common.h" +#include "r600_context.h" + +#include "r600_blit.h" +#include "r600_blit_shaders.h" +#include "r600_cmdbuf.h" + +/* common formats supported as both textures and render targets */ +unsigned r600_check_blit(gl_format mesa_format) +{ + switch (mesa_format) { + case MESA_FORMAT_RGBA8888: + case MESA_FORMAT_SIGNED_RGBA8888: + case MESA_FORMAT_RGBA8888_REV: + case MESA_FORMAT_SIGNED_RGBA8888_REV: + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + case MESA_FORMAT_ARGB8888_REV: + case MESA_FORMAT_XRGB8888_REV: + case MESA_FORMAT_RGB565: + case MESA_FORMAT_RGB565_REV: + case MESA_FORMAT_ARGB4444: + case MESA_FORMAT_ARGB4444_REV: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_ARGB1555_REV: + case MESA_FORMAT_AL88: + case MESA_FORMAT_AL88_REV: + case MESA_FORMAT_RGB332: + case MESA_FORMAT_A8: + case MESA_FORMAT_I8: + case MESA_FORMAT_CI8: + case MESA_FORMAT_L8: + case MESA_FORMAT_RGBA_FLOAT32: + case MESA_FORMAT_RGBA_FLOAT16: + case MESA_FORMAT_ALPHA_FLOAT32: + case MESA_FORMAT_ALPHA_FLOAT16: + case MESA_FORMAT_LUMINANCE_FLOAT32: + case MESA_FORMAT_LUMINANCE_FLOAT16: + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32: + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16: + case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */ + case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */ + case MESA_FORMAT_X8_Z24: + case MESA_FORMAT_S8_Z24: + case MESA_FORMAT_Z24_S8: + case MESA_FORMAT_Z16: + case MESA_FORMAT_Z32: + case MESA_FORMAT_SRGBA8: + case MESA_FORMAT_SLA8: + case MESA_FORMAT_SL8: + break; + default: + return 0; + } + + /* ??? */ + /* not sure blit to depth works or not yet */ + if (_mesa_get_format_bits(mesa_format, GL_DEPTH_BITS) > 0) + return 0; + + return 1; +} + +static inline void +set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_format, + int nPitchInPixel, int w, int h, intptr_t dst_offset) +{ + uint32_t cb_color0_base, cb_color0_size = 0, cb_color0_info = 0, cb_color0_view = 0; + int id = 0; + uint32_t comp_swap, format; + BATCH_LOCALS(&context->radeon); + + cb_color0_base = dst_offset / 256; + + SETfield(cb_color0_size, (nPitchInPixel / 8) - 1, + PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask); + SETfield(cb_color0_size, ((nPitchInPixel * h) / 64) - 1, + SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); + + SETfield(cb_color0_info, ENDIAN_NONE, ENDIAN_shift, ENDIAN_mask); + SETfield(cb_color0_info, ARRAY_LINEAR_GENERAL, + CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); + + SETbit(cb_color0_info, BLEND_BYPASS_bit); + + switch(mesa_format) { + case MESA_FORMAT_RGBA8888: + format = COLOR_8_8_8_8; + comp_swap = SWAP_STD_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_SIGNED_RGBA8888: + format = COLOR_8_8_8_8; + comp_swap = SWAP_STD_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_SNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_RGBA8888_REV: + format = COLOR_8_8_8_8; + comp_swap = SWAP_STD; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_SIGNED_RGBA8888_REV: + format = COLOR_8_8_8_8; + comp_swap = SWAP_STD; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_SNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + format = COLOR_8_8_8_8; + comp_swap = SWAP_ALT; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ARGB8888_REV: + case MESA_FORMAT_XRGB8888_REV: + format = COLOR_8_8_8_8; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_RGB565: + format = COLOR_5_6_5; + comp_swap = SWAP_STD_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_RGB565_REV: + format = COLOR_5_6_5; + comp_swap = SWAP_STD; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ARGB4444: + format = COLOR_4_4_4_4; + comp_swap = SWAP_ALT; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ARGB4444_REV: + format = COLOR_4_4_4_4; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ARGB1555: + format = COLOR_1_5_5_5; + comp_swap = SWAP_ALT; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ARGB1555_REV: + format = COLOR_1_5_5_5; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_AL88: + format = COLOR_8_8; + comp_swap = SWAP_STD; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_AL88_REV: + format = COLOR_8_8; + comp_swap = SWAP_STD_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_RGB332: + format = COLOR_3_3_2; + comp_swap = SWAP_STD_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_A8: + format = COLOR_8; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_I8: + case MESA_FORMAT_CI8: + format = COLOR_8; + comp_swap = SWAP_STD; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_L8: + format = COLOR_8; + comp_swap = SWAP_ALT; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_RGBA_FLOAT32: + format = COLOR_32_32_32_32_FLOAT; + comp_swap = SWAP_STD_REV; + SETbit(cb_color0_info, BLEND_FLOAT32_bit); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_RGBA_FLOAT16: + format = COLOR_16_16_16_16_FLOAT; + comp_swap = SWAP_STD_REV; + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ALPHA_FLOAT32: + format = COLOR_32_FLOAT; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, BLEND_FLOAT32_bit); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ALPHA_FLOAT16: + format = COLOR_16_FLOAT; + comp_swap = SWAP_ALT_REV; + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_LUMINANCE_FLOAT32: + format = COLOR_32_FLOAT; + comp_swap = SWAP_ALT; + SETbit(cb_color0_info, BLEND_FLOAT32_bit); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_LUMINANCE_FLOAT16: + format = COLOR_16_FLOAT; + comp_swap = SWAP_ALT; + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32: + format = COLOR_32_32_FLOAT; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, BLEND_FLOAT32_bit); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16: + format = COLOR_16_16_FLOAT; + comp_swap = SWAP_ALT_REV; + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */ + format = COLOR_32_FLOAT; + comp_swap = SWAP_STD; + SETbit(cb_color0_info, BLEND_FLOAT32_bit); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */ + format = COLOR_16_FLOAT; + comp_swap = SWAP_STD; + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_X8_Z24: + case MESA_FORMAT_S8_Z24: + format = COLOR_8_24; + comp_swap = SWAP_STD; + SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1, + CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_Z24_S8: + format = COLOR_24_8; + comp_swap = SWAP_STD; + SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1, + CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_Z16: + format = COLOR_16; + comp_swap = SWAP_STD; + SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1, + CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_Z32: + format = COLOR_32; + comp_swap = SWAP_STD; + SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1, + CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_SRGBA8: + format = COLOR_8_8_8_8; + comp_swap = SWAP_STD_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_SRGB, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_SLA8: + format = COLOR_8_8; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_SRGB, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_SL8: + format = COLOR_8; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_SRGB, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + default: + fprintf(stderr,"Invalid format for copy %s\n",_mesa_get_format_name(mesa_format)); + assert("Invalid format for US output\n"); + return; + } + + SETfield(cb_color0_info, format, CB_COLOR0_INFO__FORMAT_shift, + CB_COLOR0_INFO__FORMAT_mask); + SETfield(cb_color0_info, comp_swap, COMP_SWAP_shift, COMP_SWAP_mask); + + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(CB_COLOR0_BASE + (4 * id), 1); + R600_OUT_BATCH(cb_color0_base); + R600_OUT_BATCH_RELOC(0, + bo, + 0, + 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); + + if ((context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) && + (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)) { + BEGIN_BATCH_NO_AUTOSTATE(2); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0)); + R600_OUT_BATCH((2 << id)); + END_BATCH(); + } + + /* Set CMASK & TILE buffer to the offset of color buffer as + * we don't use those this shouldn't cause any issue and we + * then have a valid cmd stream + */ + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(CB_COLOR0_TILE + (4 * id), 1); + R600_OUT_BATCH(cb_color0_base); + R600_OUT_BATCH_RELOC(0, + bo, + 0, + 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(CB_COLOR0_FRAG + (4 * id), 1); + R600_OUT_BATCH(cb_color0_base); + R600_OUT_BATCH_RELOC(0, + bo, + 0, + 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(12); + R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), cb_color0_size); + R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), cb_color0_view); + R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), cb_color0_info); + R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), 0); + END_BATCH(); + + COMMIT_BATCH(); + +} + +static inline void load_shaders(GLcontext * ctx) +{ + + radeonContextPtr radeonctx = RADEON_CONTEXT(ctx); + context_t *context = R700_CONTEXT(ctx); + int i, size; + uint32_t *shader; + + if (context->blit_bo_loaded == 1) + return; + + size = 4096; + context->blit_bo = radeon_bo_open(radeonctx->radeonScreen->bom, 0, + size, 256, RADEON_GEM_DOMAIN_GTT, 0); + radeon_bo_map(context->blit_bo, 1); + shader = context->blit_bo->ptr; + + for(i=0; i<sizeof(r6xx_vs)/4; i++) { + shader[128+i] = r6xx_vs[i]; + } + for(i=0; i<sizeof(r6xx_ps)/4; i++) { + shader[256+i] = r6xx_ps[i]; + } + + radeon_bo_unmap(context->blit_bo); + context->blit_bo_loaded = 1; + +} + +static inline void +set_shaders(context_t *context) +{ + struct radeon_bo * pbo = context->blit_bo; + BATCH_LOCALS(&context->radeon); + + uint32_t sq_pgm_start_fs = (512 >> 8); + uint32_t sq_pgm_resources_fs = 0; + uint32_t sq_pgm_cf_offset_fs = 0; + + uint32_t sq_pgm_start_vs = (512 >> 8); + uint32_t sq_pgm_resources_vs = (1 << NUM_GPRS_shift); + uint32_t sq_pgm_cf_offset_vs = 0; + + uint32_t sq_pgm_start_ps = (1024 >> 8); + uint32_t sq_pgm_resources_ps = (1 << NUM_GPRS_shift); + uint32_t sq_pgm_cf_offset_ps = 0; + uint32_t sq_pgm_exports_ps = (1 << 1); + + r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); + + /* FS */ + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(SQ_PGM_START_FS, 1); + R600_OUT_BATCH(sq_pgm_start_fs); + R600_OUT_BATCH_RELOC(sq_pgm_start_fs, + pbo, + sq_pgm_start_fs, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(6); + R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_FS, sq_pgm_resources_fs); + R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_FS, sq_pgm_cf_offset_fs); + END_BATCH(); + + /* VS */ + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1); + R600_OUT_BATCH(sq_pgm_start_vs); + R600_OUT_BATCH_RELOC(sq_pgm_start_vs, + pbo, + sq_pgm_start_vs, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(6); + R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, sq_pgm_resources_vs); + R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, sq_pgm_cf_offset_vs); + END_BATCH(); + + /* PS */ + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1); + R600_OUT_BATCH(sq_pgm_start_ps); + R600_OUT_BATCH_RELOC(sq_pgm_start_ps, + pbo, + sq_pgm_start_ps, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(9); + R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, sq_pgm_resources_ps); + R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, sq_pgm_exports_ps); + R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, sq_pgm_cf_offset_ps); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(18); + R600_OUT_BATCH_REGVAL(SPI_VS_OUT_CONFIG, 0); //EXPORT_COUNT is - 1 + R600_OUT_BATCH_REGVAL(SPI_VS_OUT_ID_0, 0); + R600_OUT_BATCH_REGVAL(SPI_PS_INPUT_CNTL_0, SEL_CENTROID_bit); + R600_OUT_BATCH_REGVAL(SPI_PS_IN_CONTROL_0, (1 << NUM_INTERP_shift)); + R600_OUT_BATCH_REGVAL(SPI_PS_IN_CONTROL_1, 0); + R600_OUT_BATCH_REGVAL(SPI_INTERP_CONTROL_0, 0); + END_BATCH(); + + COMMIT_BATCH(); + +} + +static inline void +set_vtx_resource(context_t *context) +{ + struct radeon_bo *bo = context->blit_bo; + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(6); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); + R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); + R600_OUT_BATCH(0); + + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); + R600_OUT_BATCH(mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX); + R600_OUT_BATCH(0); + END_BATCH(); + COMMIT_BATCH(); + + if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710)) + r700SyncSurf(context, bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit); + else + r700SyncSurf(context, bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit); + + BEGIN_BATCH_NO_AUTOSTATE(9 + 2); + + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + R600_OUT_BATCH(SQ_FETCH_RESOURCE_VS_OFFSET * FETCH_RESOURCE_STRIDE); + R600_OUT_BATCH(0); + R600_OUT_BATCH(48 - 1); + R600_OUT_BATCH(16 << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift); + R600_OUT_BATCH(1 << MEM_REQUEST_SIZE_shift); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(SQ_TEX_VTX_VALID_BUFFER << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift); + R600_OUT_BATCH_RELOC(SQ_VTX_CONSTANT_WORD0_0, + bo, + SQ_VTX_CONSTANT_WORD0_0, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + COMMIT_BATCH(); + +} + +static inline void +set_tex_resource(context_t * context, + gl_format mesa_format, struct radeon_bo *bo, int w, int h, + int TexelPitch, intptr_t src_offset) +{ + uint32_t sq_tex_resource0, sq_tex_resource1, sq_tex_resource2, sq_tex_resource4, sq_tex_resource6; + + sq_tex_resource0 = sq_tex_resource1 = sq_tex_resource2 = sq_tex_resource4 = sq_tex_resource6 = 0; + BATCH_LOCALS(&context->radeon); + + SETfield(sq_tex_resource0, SQ_TEX_DIM_2D, DIM_shift, DIM_mask); + SETfield(sq_tex_resource0, ARRAY_LINEAR_GENERAL, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + + switch (mesa_format) { + case MESA_FORMAT_RGBA8888: + case MESA_FORMAT_SIGNED_RGBA8888: + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888) { + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_X_shift, FORMAT_COMP_X_mask); + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask); + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask); + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_W_shift, FORMAT_COMP_W_mask); + } + break; + case MESA_FORMAT_RGBA8888_REV: + case MESA_FORMAT_SIGNED_RGBA8888_REV: + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888_REV) { + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_X_shift, FORMAT_COMP_X_mask); + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask); + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask); + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_W_shift, FORMAT_COMP_W_mask); + } + break; + case MESA_FORMAT_ARGB8888: + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_XRGB8888: + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ARGB8888_REV: + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_XRGB8888_REV: + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGB565: + SETfield(sq_tex_resource1, FMT_5_6_5, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGB565_REV: + SETfield(sq_tex_resource1, FMT_5_6_5, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ARGB4444: + SETfield(sq_tex_resource1, FMT_4_4_4_4, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ARGB4444_REV: + SETfield(sq_tex_resource1, FMT_4_4_4_4, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ARGB1555: + SETfield(sq_tex_resource1, FMT_1_5_5_5, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ARGB1555_REV: + SETfield(sq_tex_resource1, FMT_1_5_5_5, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_AL88: + case MESA_FORMAT_AL88_REV: /* TODO : Check this. */ + SETfield(sq_tex_resource1, FMT_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGB332: + SETfield(sq_tex_resource1, FMT_3_3_2, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_A8: /* ZERO, ZERO, ZERO, X */ + SETfield(sq_tex_resource1, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_L8: /* X, X, X, ONE */ + SETfield(sq_tex_resource1, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_I8: /* X, X, X, X */ + case MESA_FORMAT_CI8: + SETfield(sq_tex_resource1, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGBA_FLOAT32: + SETfield(sq_tex_resource1, FMT_32_32_32_32_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGBA_FLOAT16: + SETfield(sq_tex_resource1, FMT_16_16_16_16_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ALPHA_FLOAT32: /* ZERO, ZERO, ZERO, X */ + SETfield(sq_tex_resource1, FMT_32_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ALPHA_FLOAT16: /* ZERO, ZERO, ZERO, X */ + SETfield(sq_tex_resource1, FMT_16_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_LUMINANCE_FLOAT32: /* X, X, X, ONE */ + SETfield(sq_tex_resource1, FMT_32_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_LUMINANCE_FLOAT16: /* X, X, X, ONE */ + SETfield(sq_tex_resource1, FMT_16_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32: + SETfield(sq_tex_resource1, FMT_32_32_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16: + SETfield(sq_tex_resource1, FMT_16_16_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */ + SETfield(sq_tex_resource1, FMT_32_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */ + SETfield(sq_tex_resource1, FMT_16_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_Z16: + SETbit(sq_tex_resource0, TILE_TYPE_bit); + SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + SETfield(sq_tex_resource1, FMT_16, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_X8_Z24: + SETbit(sq_tex_resource0, TILE_TYPE_bit); + SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + SETfield(sq_tex_resource1, FMT_8_24, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_S8_Z24: + SETbit(sq_tex_resource0, TILE_TYPE_bit); + SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + SETfield(sq_tex_resource1, FMT_8_24, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_Z24_S8: + SETbit(sq_tex_resource0, TILE_TYPE_bit); + SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + SETfield(sq_tex_resource1, FMT_24_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_Z32: + SETbit(sq_tex_resource0, TILE_TYPE_bit); + SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + SETfield(sq_tex_resource1, FMT_32, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_S8: + SETbit(sq_tex_resource0, TILE_TYPE_bit); + SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + SETfield(sq_tex_resource1, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_SRGBA8: + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + SETbit(sq_tex_resource4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); + break; + case MESA_FORMAT_SLA8: + SETfield(sq_tex_resource1, FMT_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + SETbit(sq_tex_resource4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); + break; + case MESA_FORMAT_SL8: /* X, X, X, ONE */ + SETfield(sq_tex_resource1, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + SETbit(sq_tex_resource4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); + break; + default: + fprintf(stderr,"Invalid format for copy %s\n",_mesa_get_format_name(mesa_format)); + assert("Invalid format for US output\n"); + return; + }; + + SETfield(sq_tex_resource0, (TexelPitch/8)-1, PITCH_shift, PITCH_mask); + SETfield(sq_tex_resource0, w - 1, TEX_WIDTH_shift, TEX_WIDTH_mask); + SETfield(sq_tex_resource1, h - 1, TEX_HEIGHT_shift, TEX_HEIGHT_mask); + + sq_tex_resource2 = src_offset / 256; + + SETfield(sq_tex_resource6, SQ_TEX_VTX_VALID_TEXTURE, + SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, + SQ_TEX_RESOURCE_WORD6_0__TYPE_mask); + + r700SyncSurf(context, bo, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, + 0, TC_ACTION_ENA_bit); + + BEGIN_BATCH_NO_AUTOSTATE(9 + 4); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + R600_OUT_BATCH(0 * 7); + + R600_OUT_BATCH(sq_tex_resource0); + R600_OUT_BATCH(sq_tex_resource1); + R600_OUT_BATCH(sq_tex_resource2); + R600_OUT_BATCH(0); //SQ_TEX_RESOURCE3 + R600_OUT_BATCH(sq_tex_resource4); + R600_OUT_BATCH(0); //SQ_TEX_RESOURCE5 + R600_OUT_BATCH(sq_tex_resource6); + R600_OUT_BATCH_RELOC(0, + bo, + 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + R600_OUT_BATCH_RELOC(0, + bo, + 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + COMMIT_BATCH(); +} + +static inline void +set_tex_sampler(context_t * context) +{ + uint32_t sq_tex_sampler_word0 = 0, sq_tex_sampler_word1 = 0, sq_tex_sampler_word2 = 0; + int i = 0; + + SETbit(sq_tex_sampler_word2, SQ_TEX_SAMPLER_WORD2_0__TYPE_bit); + + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(5); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3)); + R600_OUT_BATCH(i * 3); + R600_OUT_BATCH(sq_tex_sampler_word0); + R600_OUT_BATCH(sq_tex_sampler_word1); + R600_OUT_BATCH(sq_tex_sampler_word2); + END_BATCH(); + +} + +static inline void +set_scissors(context_t *context, int x1, int y1, int x2, int y2) +{ + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(17); + R600_OUT_BATCH_REGSEQ(PA_SC_SCREEN_SCISSOR_TL, 2); + R600_OUT_BATCH((x1 << 0) | (y1 << 16)); + R600_OUT_BATCH((x2 << 0) | (y2 << 16)); + + R600_OUT_BATCH_REGSEQ(PA_SC_WINDOW_OFFSET, 3); + R600_OUT_BATCH(0); //PA_SC_WINDOW_OFFSET + R600_OUT_BATCH((x1 << 0) | (y1 << 16) | (WINDOW_OFFSET_DISABLE_bit)); //PA_SC_WINDOW_SCISSOR_TL + R600_OUT_BATCH((x2 << 0) | (y2 << 16)); + + R600_OUT_BATCH_REGSEQ(PA_SC_GENERIC_SCISSOR_TL, 2); + R600_OUT_BATCH((x1 << 0) | (y1 << 16) | (WINDOW_OFFSET_DISABLE_bit)); + R600_OUT_BATCH((x2 << 0) | (y2 << 16)); + + /* XXX 16 of these PA_SC_VPORT_SCISSOR_0_TL_num ... */ + R600_OUT_BATCH_REGSEQ(PA_SC_VPORT_SCISSOR_0_TL, 2 ); + R600_OUT_BATCH((x1 << 0) | (y1 << 16) | (WINDOW_OFFSET_DISABLE_bit)); + R600_OUT_BATCH((x2 << 0) | (y2 << 16)); + END_BATCH(); + + COMMIT_BATCH(); + +} + +static inline void +set_vb_data(context_t * context, int src_x, int src_y, int dst_x, int dst_y, + int w, int h, int src_h, unsigned flip_y) +{ + float *vb; + radeon_bo_map(context->blit_bo, 1); + vb = context->blit_bo->ptr; + + vb[0] = (float)(dst_x); + vb[1] = (float)(dst_y); + vb[2] = (float)(src_x); + vb[3] = (flip_y) ? (float)(src_h - src_y) : (float)src_y; + + vb[4] = (float)(dst_x); + vb[5] = (float)(dst_y + h); + vb[6] = (float)(src_x); + vb[7] = (flip_y) ? (float)(src_h - (src_y + h)) : (float)(src_y + h); + + vb[8] = (float)(dst_x + w); + vb[9] = (float)(dst_y + h); + vb[10] = (float)(src_x + w); + vb[11] = (flip_y) ? (float)(src_h - (src_y + h)) : (float)(src_y + h); + + radeon_bo_unmap(context->blit_bo); + +} + +static inline void +draw_auto(context_t *context) +{ + BATCH_LOCALS(&context->radeon); + uint32_t vgt_primitive_type = 0, vgt_index_type = 0, vgt_draw_initiator = 0, vgt_num_indices; + + SETfield(vgt_primitive_type, DI_PT_RECTLIST, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); + SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, + INDEX_TYPE_mask); + SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, + MAJOR_MODE_mask); + SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, + SOURCE_SELECT_mask); + + vgt_num_indices = 3; + + BEGIN_BATCH_NO_AUTOSTATE(10); + // prim + R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1); + R600_OUT_BATCH(vgt_primitive_type); + // index type + R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + R600_OUT_BATCH(vgt_index_type); + // num instances + R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + R600_OUT_BATCH(1); + // + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + + END_BATCH(); + COMMIT_BATCH(); +} + +static inline void +set_default_state(context_t *context) +{ + int ps_prio = 0; + int vs_prio = 1; + int gs_prio = 2; + int es_prio = 3; + int num_ps_gprs; + int num_vs_gprs; + int num_gs_gprs; + int num_es_gprs; + int num_temp_gprs; + int num_ps_threads; + int num_vs_threads; + int num_gs_threads; + int num_es_threads; + int num_ps_stack_entries; + int num_vs_stack_entries; + int num_gs_stack_entries; + int num_es_stack_entries; + uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; + uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; + uint32_t ta_cntl_aux, db_watermarks, sq_dyn_gpr_cntl_ps_flush_req, db_debug; + BATCH_LOCALS(&context->radeon); + + switch (context->radeon.radeonScreen->chip_family) { + case CHIP_FAMILY_R600: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_FAMILY_RV630: + case CHIP_FAMILY_RV635: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 40; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_FAMILY_RV610: + case CHIP_FAMILY_RV620: + case CHIP_FAMILY_RS780: + case CHIP_FAMILY_RS880: + default: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_FAMILY_RV670: + num_ps_gprs = 144; + num_vs_gprs = 40; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_FAMILY_RV770: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 256; + num_vs_stack_entries = 256; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_FAMILY_RV730: + case CHIP_FAMILY_RV740: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_FAMILY_RV710: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 48; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + } + + sq_config = 0; + if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710)) + CLEARbit(sq_config, VC_ENABLE_bit); + else + SETbit(sq_config, VC_ENABLE_bit); + SETbit(sq_config, DX9_CONSTS_bit); + SETbit(sq_config, ALU_INST_PREFER_VECTOR_bit); + SETfield(sq_config, ps_prio, PS_PRIO_shift, PS_PRIO_mask); + SETfield(sq_config, vs_prio, VS_PRIO_shift, VS_PRIO_mask); + SETfield(sq_config, gs_prio, GS_PRIO_shift, GS_PRIO_mask); + SETfield(sq_config, es_prio, ES_PRIO_shift, ES_PRIO_mask); + + sq_gpr_resource_mgmt_1 = 0; + SETfield(sq_gpr_resource_mgmt_1, num_ps_gprs, NUM_PS_GPRS_shift, NUM_PS_GPRS_mask); + SETfield(sq_gpr_resource_mgmt_1, num_vs_gprs, NUM_VS_GPRS_shift, NUM_VS_GPRS_mask); + SETfield(sq_gpr_resource_mgmt_1, num_temp_gprs, + NUM_CLAUSE_TEMP_GPRS_shift, NUM_CLAUSE_TEMP_GPRS_mask); + + sq_gpr_resource_mgmt_2 = 0; + SETfield(sq_gpr_resource_mgmt_2, num_gs_gprs, NUM_GS_GPRS_shift, NUM_GS_GPRS_mask); + SETfield(sq_gpr_resource_mgmt_2, num_es_gprs, NUM_ES_GPRS_shift, NUM_ES_GPRS_mask); + + sq_thread_resource_mgmt = 0; + SETfield(sq_thread_resource_mgmt, num_ps_threads, + NUM_PS_THREADS_shift, NUM_PS_THREADS_mask); + SETfield(sq_thread_resource_mgmt, num_vs_threads, + NUM_VS_THREADS_shift, NUM_VS_THREADS_mask); + SETfield(sq_thread_resource_mgmt, num_gs_threads, + NUM_GS_THREADS_shift, NUM_GS_THREADS_mask); + SETfield(sq_thread_resource_mgmt, num_es_threads, + NUM_ES_THREADS_shift, NUM_ES_THREADS_mask); + + sq_stack_resource_mgmt_1 = 0; + SETfield(sq_stack_resource_mgmt_1, num_ps_stack_entries, + NUM_PS_STACK_ENTRIES_shift, NUM_PS_STACK_ENTRIES_mask); + SETfield(sq_stack_resource_mgmt_1, num_vs_stack_entries, + NUM_VS_STACK_ENTRIES_shift, NUM_VS_STACK_ENTRIES_mask); + + sq_stack_resource_mgmt_2 = 0; + SETfield(sq_stack_resource_mgmt_2, num_gs_stack_entries, + NUM_GS_STACK_ENTRIES_shift, NUM_GS_STACK_ENTRIES_mask); + SETfield(sq_stack_resource_mgmt_2, num_es_stack_entries, + NUM_ES_STACK_ENTRIES_shift, NUM_ES_STACK_ENTRIES_mask); + + ta_cntl_aux = 0; + SETfield(ta_cntl_aux, 28, TD_FIFO_CREDIT_shift, TD_FIFO_CREDIT_mask); + db_watermarks = 0; + SETfield(db_watermarks, 4, DEPTH_FREE_shift, DEPTH_FREE_mask); + SETfield(db_watermarks, 16, DEPTH_FLUSH_shift, DEPTH_FLUSH_mask); + SETfield(db_watermarks, 0, FORCE_SUMMARIZE_shift, FORCE_SUMMARIZE_mask); + SETfield(db_watermarks, 4, DEPTH_PENDING_FREE_shift, DEPTH_PENDING_FREE_mask); + sq_dyn_gpr_cntl_ps_flush_req = 0; + db_debug = 0; + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) { + SETfield(ta_cntl_aux, 3, GRADIENT_CREDIT_shift, GRADIENT_CREDIT_mask); + db_debug = 0x82000000; + SETfield(db_watermarks, 16, DEPTH_CACHELINE_FREE_shift, DEPTH_CACHELINE_FREE_mask); + } else { + SETfield(ta_cntl_aux, 2, GRADIENT_CREDIT_shift, GRADIENT_CREDIT_mask); + SETfield(db_watermarks, 4, DEPTH_CACHELINE_FREE_shift, DEPTH_CACHELINE_FREE_mask); + SETbit(sq_dyn_gpr_cntl_ps_flush_req, VS_PC_LIMIT_ENABLE_bit); + } + + BEGIN_BATCH_NO_AUTOSTATE(117); + R600_OUT_BATCH_REGSEQ(SQ_CONFIG, 6); + R600_OUT_BATCH(sq_config); + R600_OUT_BATCH(sq_gpr_resource_mgmt_1); + R600_OUT_BATCH(sq_gpr_resource_mgmt_2); + R600_OUT_BATCH(sq_thread_resource_mgmt); + R600_OUT_BATCH(sq_stack_resource_mgmt_1); + R600_OUT_BATCH(sq_stack_resource_mgmt_2); + + R600_OUT_BATCH_REGVAL(TA_CNTL_AUX, ta_cntl_aux); + R600_OUT_BATCH_REGVAL(VC_ENHANCE, 0); + R600_OUT_BATCH_REGVAL(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, sq_dyn_gpr_cntl_ps_flush_req); + R600_OUT_BATCH_REGVAL(DB_DEBUG, db_debug); + R600_OUT_BATCH_REGVAL(DB_WATERMARKS, db_watermarks); + + R600_OUT_BATCH_REGSEQ(SQ_ESGS_RING_ITEMSIZE, 9); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + + R600_OUT_BATCH_REGVAL(CB_CLRCMP_CONTROL, + (CLRCMP_SEL_SRC << CLRCMP_FCN_SEL_shift)); + R600_OUT_BATCH_REGVAL(SQ_VTX_BASE_VTX_LOC, 0); + R600_OUT_BATCH_REGVAL(SQ_VTX_START_INST_LOC, 0); + R600_OUT_BATCH_REGVAL(DB_DEPTH_INFO, 0); + R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, 0); + R600_OUT_BATCH_REGVAL(CB_SHADER_MASK, (OUTPUT0_ENABLE_mask)); + R600_OUT_BATCH_REGVAL(CB_TARGET_MASK, (TARGET0_ENABLE_mask)); + R600_OUT_BATCH_REGVAL(R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); + R600_OUT_BATCH_REGVAL(CB_COLOR_CONTROL, (0xcc << ROP3_shift)); + + R600_OUT_BATCH_REGVAL(PA_CL_VTE_CNTL, VTX_XY_FMT_bit); + R600_OUT_BATCH_REGVAL(PA_CL_VS_OUT_CNTL, 0); + R600_OUT_BATCH_REGVAL(PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); + R600_OUT_BATCH_REGVAL(PA_SU_SC_MODE_CNTL, (FACE_bit) | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)); + R600_OUT_BATCH_REGVAL(PA_SU_VTX_CNTL, (PIX_CENTER_bit) | + (X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) | + (X_1_256TH << QUANT_MODE_shift)); + + R600_OUT_BATCH_REGSEQ(VGT_MAX_VTX_INDX, 4); + R600_OUT_BATCH(2048); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + + R600_OUT_BATCH_REGSEQ(VGT_OUTPUT_PATH_CNTL, 13); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + + R600_OUT_BATCH_REGVAL(VGT_PRIMITIVEID_EN, 0); + R600_OUT_BATCH_REGVAL(VGT_MULTI_PRIM_IB_RESET_EN, 0); + R600_OUT_BATCH_REGVAL(VGT_INSTANCE_STEP_RATE_0, 0); + R600_OUT_BATCH_REGVAL(VGT_INSTANCE_STEP_RATE_1, 0); + + R600_OUT_BATCH_REGSEQ(VGT_STRMOUT_EN, 3); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + + R600_OUT_BATCH_REGVAL(VGT_STRMOUT_BUFFER_EN, 0); + + END_BATCH(); + COMMIT_BATCH(); +} + +static GLboolean validate_buffers(context_t *rmesa, + struct radeon_bo *src_bo, + struct radeon_bo *dst_bo) +{ + int ret; + + radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs); + + ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, + src_bo, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + return GL_FALSE; + + ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, + dst_bo, 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT); + if (ret) + return GL_FALSE; + + ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, + rmesa->blit_bo, + RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + return GL_FALSE; + + return GL_TRUE; +} + +unsigned r600_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x, + unsigned src_y, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x, + unsigned dst_y, + unsigned w, + unsigned h, + unsigned flip_y) +{ + context_t *context = R700_CONTEXT(ctx); + int id = 0; + + if (!r600_check_blit(dst_mesaformat)) + return GL_FALSE; + + if (src_bo == dst_bo) { + return GL_FALSE; + } + + if (src_offset % 256 || dst_offset % 256) { + return GL_FALSE; + } + + if (0) { + fprintf(stderr, "src: width %d, height %d, pitch %d vs %d, format %s\n", + src_width, src_height, src_pitch, + _mesa_format_row_stride(src_mesaformat, src_width), + _mesa_get_format_name(src_mesaformat)); + fprintf(stderr, "dst: width %d, height %d, pitch %d, format %s\n", + dst_width, dst_height, + _mesa_format_row_stride(dst_mesaformat, dst_width), + _mesa_get_format_name(dst_mesaformat)); + } + + /* Flush is needed to make sure that source buffer has correct data */ + radeonFlush(ctx); + + rcommonEnsureCmdBufSpace(&context->radeon, 304, __FUNCTION__); + + /* load shaders */ + load_shaders(context->radeon.glCtx); + + if (!validate_buffers(context, src_bo, dst_bo)) + return GL_FALSE; + + /* set clear state */ + /* 117 */ + set_default_state(context); + + /* shaders */ + /* 72 */ + set_shaders(context); + + /* src */ + /* 20 */ + set_tex_resource(context, src_mesaformat, src_bo, + src_width, src_height, src_pitch, src_offset); + + /* 5 */ + set_tex_sampler(context); + + /* dst */ + /* 27 */ + set_render_target(context, dst_bo, dst_mesaformat, + dst_pitch, dst_width, dst_height, dst_offset); + /* scissors */ + /* 17 */ + set_scissors(context, dst_x, dst_y, dst_x + dst_width, dst_y + dst_height); + + set_vb_data(context, src_x, src_y, dst_x, dst_y, w, h, src_height, flip_y); + /* Vertex buffer setup */ + /* 24 */ + set_vtx_resource(context); + + /* draw */ + /* 10 */ + draw_auto(context); + + /* 7 */ + r700SyncSurf(context, dst_bo, 0, + RADEON_GEM_DOMAIN_VRAM|RADEON_GEM_DOMAIN_GTT, + CB_ACTION_ENA_bit | (1 << (id + 6))); + + /* 5 */ + /* XXX drm should handle this in fence submit */ + r700WaitForIdleClean(context); + + radeonFlush(ctx); + + return GL_TRUE; +} diff --git a/r600/r600_blit.h b/r600/r600_blit.h new file mode 100644 index 0000000..d56b21b --- /dev/null +++ b/r600/r600_blit.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2009 Advanced Micro Devices, Inc. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef R600_BLIT_H +#define R600_BLIT_H + +unsigned r600_check_blit(gl_format mesa_format); + +unsigned r600_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned w, + unsigned h, + unsigned flip_y); + +#endif // R600_BLIT_H diff --git a/r600/r600_blit_shaders.h b/r600/r600_blit_shaders.h new file mode 100644 index 0000000..492dde9 --- /dev/null +++ b/r600/r600_blit_shaders.h @@ -0,0 +1,28 @@ +const uint32_t r6xx_vs[] = +{ + 0x00000004, // CF_DWORD0(ADDR(4)) + 0x81000000, // SQ_CF_INST_VTX COUNT(1) + 0x0000203c, // CF_EXP_IMP CF_POS0 SQ_EXPORT_POS RW_GPR(0) ELEM_SIZE(0) + 0x94000b08, // SQ_CF_INST_EXPORT_DONE SWZ XY01 BARRIER(1) + 0x00004000, // CF_EXP_IMP 0 SQ_EXPORT_PARAM RW_GPR(0) ELEM_SIZE(0) + 0x14200b1a, // SQ_CF_INST_EXPORT_DONE SWZ ZW01 EOP(1) BARRIER(0) + 0x00000000, + 0x00000000, + 0x3c000000, // SQ_VTX_INST_FETCH BUFFER_ID(0) MEGA_FETCH_COUNT(16) + 0x68cd1000, // DST_GPR(0) DST_SWZ: XYZW DATA_FORMAT(35) SQ_NUM_FORMAT_SCALED SQ_FORMAT_COMP_SIGNED + 0x00080000, // ENDIAN_SWAP(SQ_ENDIAN_NONE) MEGA_FETCH(1) + 0x00000000, // VTX_DWORD_PAD +}; + +const uint32_t r6xx_ps[] = +{ + 0x00000002, // CF_DWORD0 AADR(2) + 0x80800000, // SQ_CF_INST_TEX COUNT(1) + 0x00000000, // CF_ALLOC_IMP_EXP0 SQ_EXPORT_PIXEL RW_GPR(0) ELEM_SIZE(0) + 0x94200688, // SQ_CF_INST_EXPORT_DONE EOP(1) BARRIER(1) SWZ: XYZW + 0x00000010, // SQ_TEX_INST_SAMPLE SRC_GPR(0) RESOURCE_ID(0) + 0x000d1000, // DST_GPR(0) SWZ: XYZW TEX_UNNORMALIZED + 0xb0800000, // SAMPLER_ID(0) SRC_SWZ XYZW + 0x00000000, // TEX_DWORD_PAD +}; + diff --git a/r600/r600_cmdbuf.c b/r600/r600_cmdbuf.c index 370bb04..afe2d55 100644 --- a/r600/r600_cmdbuf.c +++ b/r600/r600_cmdbuf.c @@ -39,7 +39,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/macros.h" #include "main/context.h" #include "main/simple_list.h" -#include "swrast/swrast.h" #include "drm.h" #include "radeon_drm.h" @@ -49,7 +48,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r600_cmdbuf.h" #include "r600_emit.h" #include "radeon_bocs_wrapper.h" -#include "radeon_mipmap_tree.h" #include "radeon_reg.h" #ifdef HAVE_LIBDRM_RADEON diff --git a/r600/r600_context.c b/r600/r600_context.c index dbd2337..134e97e 100644 --- a/r600/r600_context.c +++ b/r600/r600_context.c @@ -40,9 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/context.h" #include "main/simple_list.h" #include "main/imports.h" -#include "main/matrix.h" #include "main/extensions.h" -#include "main/state.h" #include "main/bufferobj.h" #include "main/texobj.h" @@ -52,7 +50,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/tnl.h" #include "tnl/t_pipeline.h" -#include "tnl/t_vp_build.h" #include "drivers/common/driverfuncs.h" @@ -65,14 +62,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r600_emit.h" #include "radeon_bocs_wrapper.h" #include "radeon_queryobj.h" +#include "r600_blit.h" #include "r700_state.h" #include "r700_ioctl.h" -#include "vblank.h" #include "utils.h" -#include "xmlpool.h" /* for symbolic values of enum-type options */ + +#define R600_ENABLE_GLSL_TEST 1 #define need_GL_VERSION_2_0 #define need_GL_ARB_occlusion_query @@ -97,6 +95,7 @@ static const struct dri_extension card_extensions[] = { {"GL_ARB_depth_clamp", NULL}, {"GL_ARB_depth_texture", NULL}, {"GL_ARB_fragment_program", NULL}, + {"GL_ARB_fragment_program_shadow", NULL}, {"GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions}, {"GL_ARB_multitexture", NULL}, {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, @@ -109,6 +108,7 @@ static const struct dri_extension card_extensions[] = { {"GL_ARB_texture_env_crossbar", NULL}, {"GL_ARB_texture_env_dot3", NULL}, {"GL_ARB_texture_mirrored_repeat", NULL}, + {"GL_ARB_texture_non_power_of_two", NULL}, {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions}, {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions}, {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions}, @@ -140,6 +140,7 @@ static const struct dri_extension card_extensions[] = { {"GL_NV_blend_square", NULL}, {"GL_NV_vertex_program", GL_NV_vertex_program_functions}, {"GL_SGIS_generate_mipmap", NULL}, + {"GL_ARB_pixel_buffer_object", NULL}, {NULL, NULL} /* *INDENT-ON* */ }; @@ -155,7 +156,12 @@ static const struct dri_extension mm_extensions[] = { * functions added by GL_ATI_separate_stencil. */ static const struct dri_extension gl_20_extension[] = { +#ifdef R600_ENABLE_GLSL_TEST + {"GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, +#else {"GL_VERSION_2_0", GL_VERSION_2_0_functions }, +#endif /* R600_ENABLE_GLSL_TEST */ + {NULL, NULL} }; static const struct tnl_pipeline_stage *r600_pipeline[] = { @@ -231,19 +237,24 @@ static void r600_init_vtbl(radeonContextPtr radeon) radeon->vtbl.pre_emit_atoms = r600_vtbl_pre_emit_atoms; radeon->vtbl.fallback = r600_fallback; radeon->vtbl.emit_query_finish = r600_emit_query_finish; + radeon->vtbl.check_blit = r600_check_blit; + radeon->vtbl.blit = r600_blit; } static void r600InitConstValues(GLcontext *ctx, radeonScreenPtr screen) { - context_t *r600 = R700_CONTEXT(ctx); - - ctx->Const.MaxTextureImageUnits = - driQueryOptioni(&r600->radeon.optionCache, "texture_image_units"); - ctx->Const.MaxTextureCoordUnits = - driQueryOptioni(&r600->radeon.optionCache, "texture_coord_units"); + ctx->Const.MaxTextureImageUnits = 16; + /* 8 per clause on r6xx, 16 on r7xx + * but I think mesa only supports 8 at the moment + */ + ctx->Const.MaxTextureCoordUnits = 8; ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureImageUnits, ctx->Const.MaxTextureCoordUnits); + ctx->Const.MaxCombinedTextureImageUnits = + ctx->Const.MaxVertexTextureImageUnits + + ctx->Const.MaxTextureImageUnits; + ctx->Const.MaxTextureMaxAnisotropy = 16.0; ctx->Const.MaxTextureLodBias = 16.0; @@ -261,6 +272,8 @@ static void r600InitConstValues(GLcontext *ctx, radeonScreenPtr screen) ctx->Const.MaxLineWidthAA = 0xffff / 8.0; ctx->Const.MaxDrawBuffers = 1; /* hw supports 8 */ + ctx->Const.MaxColorAttachments = 1; + ctx->Const.MaxRenderbufferSize = 4096; /* 256 for reg-based consts, inline consts also supported */ ctx->Const.VertexProgram.MaxInstructions = 8192; /* in theory no limit */ @@ -275,9 +288,8 @@ static void r600InitConstValues(GLcontext *ctx, radeonScreenPtr screen) ctx->Const.FragmentProgram.MaxNativeAttribs = 32; ctx->Const.FragmentProgram.MaxNativeParameters = 256; ctx->Const.FragmentProgram.MaxNativeAluInstructions = 8192; - /* 8 per clause on r6xx, 16 on rv670/r7xx */ - if ((screen->chip_family == CHIP_FAMILY_RV670) || - (screen->chip_family >= CHIP_FAMILY_RV770)) + /* 8 per clause on r6xx, 16 on r7xx */ + if (screen->chip_family >= CHIP_FAMILY_RV770) ctx->Const.FragmentProgram.MaxNativeTexInstructions = 16; else ctx->Const.FragmentProgram.MaxNativeTexInstructions = 8; @@ -308,6 +320,14 @@ static void r600InitGLExtensions(GLcontext *ctx) if (r600->radeon.radeonScreen->kernel_mm) driInitExtensions(ctx, mm_extensions, GL_FALSE); +#ifdef R600_ENABLE_GLSL_TEST + driInitExtensions(ctx, gl_20_extension, GL_TRUE); + _mesa_enable_2_0_extensions(ctx); + + /* glsl compiler has problem if this is not GL_TRUE */ + ctx->Shader.EmitCondCodes = GL_TRUE; +#endif /* R600_ENABLE_GLSL_TEST */ + if (driQueryOptionb (&r600->radeon.optionCache, "disable_stencil_two_side")) _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side"); @@ -322,18 +342,21 @@ static void r600InitGLExtensions(GLcontext *ctx) _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); } - /* XXX: RV740 only seems to report results from half of its DBs */ - if (r600->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV740) - _mesa_disable_extension(ctx, "GL_ARB_occlusion_query"); + /* RV740 had a broken pipe config prior to drm 1.32 */ + if (!r600->radeon.radeonScreen->kernel_mm) { + if ((r600->radeon.dri.drmMinor < 32) && + (r600->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV740)) + _mesa_disable_extension(ctx, "GL_ARB_occlusion_query"); + } } /* Create the device specific rendering context. */ GLboolean r600CreateContext(const __GLcontextModes * glVisual, - __DRIcontextPrivate * driContextPriv, + __DRIcontext * driContextPriv, void *sharedContextPrivate) { - __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; + __DRIscreen *sPriv = driContextPriv->driScreenPriv; radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private); struct dd_function_table functions; context_t *r600; @@ -361,7 +384,7 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _mesa_init_driver_functions(&functions); r700InitStateFuncs(&functions); - r600InitTextureFuncs(&functions); + r600InitTextureFuncs(&r600->radeon, &functions); r700InitShaderFuncs(&functions); radeonInitQueryObjFunctions(&functions); r700InitIoctlFuncs(&functions); diff --git a/r600/r600_context.h b/r600/r600_context.h index 394fd75..72c8c86 100644 --- a/r600/r600_context.h +++ b/r600/r600_context.h @@ -108,6 +108,7 @@ typedef struct StreamDesc GLint size; //number of data element GLenum type; //data element type GLsizei stride; + GLenum format; // GL_RGBA,GLBGRA struct radeon_bo *bo; GLint bo_offset; @@ -147,13 +148,15 @@ struct r600_context { GLint nNumActiveAos; StreamDesc stream_desc[VERT_ATTRIB_MAX]; struct r700_index_buffer ind_buf; + struct radeon_bo *blit_bo; + GLboolean blit_bo_loaded; }; #define R700_CONTEXT(ctx) ((context_t *)(ctx->DriverCtx)) #define GL_CONTEXT(context) ((GLcontext *)(context->radeon.glCtx)) extern GLboolean r600CreateContext(const __GLcontextModes * glVisual, - __DRIcontextPrivate * driContextPriv, + __DRIcontext * driContextPriv, void *sharedContextPrivate); #define R700_CONTEXT_STATES(context) ((R700_CHIP_CONTEXT *)(&context->hw)) @@ -177,6 +180,8 @@ extern GLboolean r700SyncSurf(context_t *context, uint32_t write_domain, uint32_t sync_type); +extern void r700WaitForIdleClean(context_t *context); + extern void r700Start3D(context_t *context); extern void r600InitAtoms(context_t *context); extern void r700InitDraw(GLcontext *ctx); diff --git a/r600/r600_emit.c b/r600/r600_emit.c index 5c250c2..1eb89a5 100644 --- a/r600/r600_emit.c +++ b/r600/r600_emit.c @@ -37,10 +37,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/colormac.h" #include "main/imports.h" #include "main/macros.h" -#include "main/image.h" #include "swrast_setup/swrast_setup.h" -#include "math/m_translate.h" #include "tnl/tnl.h" #include "tnl/t_context.h" diff --git a/r600/r600_tex.c b/r600/r600_tex.c index 9d83a64..36a6e6e 100644 --- a/r600/r600_tex.c +++ b/r600/r600_tex.c @@ -41,18 +41,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/mipmap.h" #include "main/simple_list.h" #include "main/texstore.h" -#include "main/teximage.h" #include "main/texobj.h" #include "texmem.h" #include "r600_context.h" -#include "r700_state.h" #include "radeon_mipmap_tree.h" #include "r600_tex.h" -#include "xmlpool.h" - static unsigned int translate_wrap_mode(GLenum wrapmode) { @@ -305,7 +301,7 @@ static void r600TexParameter(GLcontext * ctx, GLenum target, break; case GL_TEXTURE_BORDER_COLOR: - r600SetTexBorderColor(t, texObj->BorderColor); + r600SetTexBorderColor(t, texObj->BorderColor.f); break; case GL_TEXTURE_BASE_LEVEL: @@ -391,12 +387,12 @@ static struct gl_texture_object *r600NewTextureObject(GLcontext * ctx, r600SetTexDefaultState(t); r600UpdateTexWrap(t); r600SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy); - r600SetTexBorderColor(t, t->base.BorderColor); + r600SetTexBorderColor(t, t->base.BorderColor.f); return &t->base; } -void r600InitTextureFuncs(struct dd_function_table *functions) +void r600InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions) { /* Note: we only plug in the functions we implement in the driver * since _mesa_init_driver_functions() was already called. @@ -424,6 +420,11 @@ void r600InitTextureFuncs(struct dd_function_table *functions) functions->CompressedTexImage2D = radeonCompressedTexImage2D; functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; + if (radeon->radeonScreen->kernel_mm) { + functions->CopyTexImage2D = radeonCopyTexImage2D; + functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; + } + functions->GenerateMipmap = radeonGenerateMipmap; driInitTextureFormats(); diff --git a/r600/r600_tex.h b/r600/r600_tex.h index fb0e1a0..1d75a2e 100644 --- a/r600/r600_tex.h +++ b/r600/r600_tex.h @@ -42,7 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /* Texel pitch is 8 alignment. */ #define R700_TEXEL_PITCH_ALIGNMENT_MASK 0x7 -#define R700_MAX_TEXTURE_UNITS 8 /* TODO : should be 16, lets make it work, review later */ +#define R700_MAX_TEXTURE_UNITS 16 extern void r600SetDepthTexMode(struct gl_texture_object *tObj); @@ -58,6 +58,6 @@ extern void r600SetTexOffset(__DRIcontext *pDRICtx, GLint texname, extern GLboolean r600ValidateBuffers(GLcontext * ctx); -extern void r600InitTextureFuncs(struct dd_function_table *functions); +extern void r600InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions); #endif /* __r600_TEX_H__ */ diff --git a/r600/r600_texstate.c b/r600/r600_texstate.c index 4ec315b..1600033 100644 --- a/r600/r600_texstate.c +++ b/r600/r600_texstate.c @@ -45,7 +45,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/simple_list.h" #include "r600_context.h" -#include "r700_state.h" #include "radeon_mipmap_tree.h" #include "r600_tex.h" #include "r700_fragprog.h" @@ -85,16 +84,22 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, gl_format mesa CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + CLEARbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED, FORMAT_COMP_X_shift, FORMAT_COMP_X_mask); SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED, FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask); SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED, - FORMAT_COMP_X_shift, FORMAT_COMP_Z_mask); + FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask); SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED, FORMAT_COMP_W_shift, FORMAT_COMP_W_mask); + CLEARbit(t->SQ_TEX_RESOURCE0, TILE_TYPE_bit); + SETfield(t->SQ_TEX_RESOURCE0, ARRAY_LINEAR_GENERAL, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + switch (mesa_format) /* This is mesa format. */ { case MESA_FORMAT_RGBA8888: @@ -158,6 +163,32 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, gl_format mesa SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; + case MESA_FORMAT_XRGB8888: + SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_XRGB8888_REV: + SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; case MESA_FORMAT_ARGB8888_REV: SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); @@ -342,52 +373,46 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, gl_format mesa break; */ case MESA_FORMAT_RGB_DXT1: /* not supported yet */ - - break; case MESA_FORMAT_RGBA_DXT1: /* not supported yet */ - - break; case MESA_FORMAT_RGBA_DXT3: /* not supported yet */ - - break; case MESA_FORMAT_RGBA_DXT5: /* not supported yet */ + return GL_FALSE; - break; case MESA_FORMAT_RGBA_FLOAT32: SETfield(t->SQ_TEX_RESOURCE1, FMT_32_32_32_32_FLOAT, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); - SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, - SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); - SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_RGBA_FLOAT16: SETfield(t->SQ_TEX_RESOURCE1, FMT_16_16_16_16_FLOAT, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); - SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, - SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); - SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_RGB_FLOAT32: /* X, Y, Z, ONE */ SETfield(t->SQ_TEX_RESOURCE1, FMT_32_32_32_FLOAT, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); - SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); @@ -396,11 +421,11 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, gl_format mesa SETfield(t->SQ_TEX_RESOURCE1, FMT_16_16_16_FLOAT, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); - SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); @@ -461,26 +486,26 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, gl_format mesa SETfield(t->SQ_TEX_RESOURCE1, FMT_32_32_FLOAT, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); - SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); - SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, - SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16: SETfield(t->SQ_TEX_RESOURCE1, FMT_16_16_FLOAT, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); - SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); - SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, - SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */ @@ -515,6 +540,10 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, gl_format mesa case MESA_FORMAT_Z24_S8: case MESA_FORMAT_Z32: case MESA_FORMAT_S8: + SETbit(t->SQ_TEX_RESOURCE0, TILE_TYPE_bit); + SETfield(t->SQ_TEX_RESOURCE0, ARRAY_1D_TILED_THIN1, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); switch (mesa_format) { case MESA_FORMAT_Z16: SETfield(t->SQ_TEX_RESOURCE1, FMT_16, @@ -626,6 +655,37 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, gl_format mesa return GL_TRUE; } +static GLuint r600_translate_shadow_func(GLenum func) +{ + switch (func) { + case GL_NEVER: + return SQ_TEX_DEPTH_COMPARE_NEVER; + case GL_LESS: + return SQ_TEX_DEPTH_COMPARE_LESS; + case GL_LEQUAL: + return SQ_TEX_DEPTH_COMPARE_LESSEQUAL; + case GL_GREATER: + return SQ_TEX_DEPTH_COMPARE_GREATER; + case GL_GEQUAL: + return SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; + case GL_NOTEQUAL: + return SQ_TEX_DEPTH_COMPARE_NOTEQUAL; + case GL_EQUAL: + return SQ_TEX_DEPTH_COMPARE_EQUAL; + case GL_ALWAYS: + return SQ_TEX_DEPTH_COMPARE_ALWAYS; + default: + WARN_ONCE("Unknown shadow compare function! %d", func); + return 0; + } +} + +static INLINE uint32_t +S_FIXED(float value, uint32_t frac_bits) +{ + return value * (1 << frac_bits); +} + void r600SetDepthTexMode(struct gl_texture_object *tObj) { radeonTexObjPtr t; @@ -635,8 +695,8 @@ void r600SetDepthTexMode(struct gl_texture_object *tObj) t = radeon_tex_obj(tObj); - r600GetTexFormat(tObj, tObj->Image[0][tObj->BaseLevel]->TexFormat); - + if(!r600GetTexFormat(tObj, tObj->Image[0][tObj->BaseLevel]->TexFormat)) + t->validated = GL_FALSE; } /** @@ -645,8 +705,9 @@ void r600SetDepthTexMode(struct gl_texture_object *tObj) * \param rmesa Context pointer * \param t the r300 texture object */ -static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *texObj) +static GLboolean setup_hardware_state(GLcontext * ctx, struct gl_texture_object *texObj, int unit) { + context_t *rmesa = R700_CONTEXT(ctx); radeonTexObj *t = radeon_tex_obj(texObj); const struct gl_texture_image *firstImage; GLuint uTexelPitch, row_align; @@ -654,15 +715,15 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex if (rmesa->radeon.radeonScreen->driScreen->dri2.enabled && t->image_override && t->bo) - return; + return GL_TRUE; firstImage = t->base.Image[0][t->minLod]; if (!t->image_override) { if (!r600GetTexFormat(texObj, firstImage->TexFormat)) { - radeon_error("unexpected texture format in %s\n", - __FUNCTION__); - return; + radeon_warning("unsupported texture format in %s\n", + __FUNCTION__); + return GL_FALSE; } } @@ -687,7 +748,7 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex break; default: radeon_error("unexpected texture target type in %s\n", __FUNCTION__); - return; + return GL_FALSE; } row_align = rmesa->radeon.texture_row_align - 1; @@ -706,11 +767,33 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex SETfield(t->SQ_TEX_RESOURCE1, firstImage->Height - 1, TEX_HEIGHT_shift, TEX_HEIGHT_mask); - if ((t->maxLod - t->minLod) > 0) { - t->SQ_TEX_RESOURCE3 = t->mt->levels[t->minLod].size / 256; - SETfield(t->SQ_TEX_RESOURCE4, 0, BASE_LEVEL_shift, BASE_LEVEL_mask); - SETfield(t->SQ_TEX_RESOURCE5, t->maxLod - t->minLod, LAST_LEVEL_shift, LAST_LEVEL_mask); + t->SQ_TEX_RESOURCE2 = get_base_teximage_offset(t) / 256; + + t->SQ_TEX_RESOURCE3 = radeon_miptree_image_offset(t->mt, 0, t->minLod + 1) / 256; + + SETfield(t->SQ_TEX_RESOURCE4, 0, BASE_LEVEL_shift, BASE_LEVEL_mask); + SETfield(t->SQ_TEX_RESOURCE5, t->maxLod - t->minLod, LAST_LEVEL_shift, LAST_LEVEL_mask); + + SETfield(t->SQ_TEX_SAMPLER1, + S_FIXED(CLAMP(t->base.MinLod - t->minLod, 0, 15), 6), + MIN_LOD_shift, MIN_LOD_mask); + SETfield(t->SQ_TEX_SAMPLER1, + S_FIXED(CLAMP(t->base.MaxLod - t->minLod, 0, 15), 6), + MAX_LOD_shift, MAX_LOD_mask); + SETfield(t->SQ_TEX_SAMPLER1, + S_FIXED(CLAMP(ctx->Texture.Unit[unit].LodBias + t->base.LodBias, -16, 16), 6), + SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift, SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_mask); + + if(texObj->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) + { + SETfield(t->SQ_TEX_SAMPLER0, r600_translate_shadow_func(texObj->CompareFunc), DEPTH_COMPARE_FUNCTION_shift, DEPTH_COMPARE_FUNCTION_mask); + } + else + { + CLEARfield(t->SQ_TEX_SAMPLER0, DEPTH_COMPARE_FUNCTION_mask); } + + return GL_TRUE; } /** @@ -718,9 +801,8 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex * * Mostly this means populating the texture object's mipmap tree. */ -static GLboolean r600_validate_texture(GLcontext * ctx, struct gl_texture_object *texObj) +static GLboolean r600_validate_texture(GLcontext * ctx, struct gl_texture_object *texObj, int unit) { - context_t *rmesa = R700_CONTEXT(ctx); radeonTexObj *t = radeon_tex_obj(texObj); if (!radeon_validate_texture_miptree(ctx, texObj)) @@ -728,7 +810,8 @@ static GLboolean r600_validate_texture(GLcontext * ctx, struct gl_texture_object /* Configure the hardware registers (more precisely, the cached version * of the hardware registers). */ - setup_hardware_state(rmesa, texObj); + if (!setup_hardware_state(ctx, texObj, unit)) + return GL_FALSE; t->validated = GL_TRUE; return GL_TRUE; @@ -769,7 +852,7 @@ GLboolean r600ValidateBuffers(GLcontext * ctx) if (!ctx->Texture.Unit[i]._ReallyEnabled) continue; - if (!r600_validate_texture(ctx, ctx->Texture.Unit[i]._Current)) { + if (!r600_validate_texture(ctx, ctx->Texture.Unit[i]._Current, i)) { radeon_warning("failed to validate texture for unit %d.\n", i); } t = radeon_tex_obj(ctx->Texture.Unit[i]._Current); @@ -901,7 +984,7 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo type = GL_BGRA; format = GL_UNSIGNED_BYTE; - internalFormat = (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT ? 3 : 4); + internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); radeon = pDRICtx->driverPrivate; rmesa = pDRICtx->driverPrivate; @@ -917,18 +1000,7 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo return; } - radeon_update_renderbuffers(pDRICtx, dPriv); - /* back & depth buffer are useless free them right away */ - rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } - rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } + radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE); rb = rfb->color_rb[0]; if (rb->bo == NULL) { /* Failed to BO for the buffer */ @@ -961,7 +1033,7 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo pitch_val = rb->pitch; switch (rb->cpp) { case 4: - if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT) { + if (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB) { SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); @@ -1040,5 +1112,5 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo void r600SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) { - r600SetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv); + r600SetTexBuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv); } diff --git a/r600/r700_assembler.c b/r600/r700_assembler.c index 67e0ee7..834bcc6 100644 --- a/r600/r700_assembler.c +++ b/r600/r700_assembler.c @@ -32,12 +32,49 @@ #include "main/mtypes.h" #include "main/imports.h" +#include "shader/prog_parameter.h" #include "radeon_debug.h" #include "r600_context.h" #include "r700_assembler.h" +#define USE_CF_FOR_CONTINUE_BREAK 1 +#define USE_CF_FOR_POP_AFTER 1 + +struct prog_instruction noise1_insts[12] = { + {OPCODE_BGNSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_SGT , {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_IF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_ENDIF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_ENDSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0} +}; +float noise1_const[2][4] = { + {0.300000f, 0.900000f, 0.500000f, 0.300000f} +}; + +COMPILED_SUB noise1_presub = { + &(noise1_insts[0]), + 12, + 2, + 1, + 0, + &(noise1_const[0]), + SWIZZLE_X, + SWIZZLE_X, + SWIZZLE_X, + SWIZZLE_X, + {0,0,0}, + 0 +}; + BITS addrmode_PVSDST(PVSDST * pPVSDST) { return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1); @@ -327,22 +364,27 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size) return(format); } -unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) +unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3) { - if(pAsm->D.dst.op3) + if(nIsOp3 > 0) { return 3; } - switch (pAsm->D.dst.opcode) + switch (opcode) { case SQ_OP2_INST_ADD: + case SQ_OP2_INST_KILLE: case SQ_OP2_INST_KILLGT: + case SQ_OP2_INST_KILLGE: + case SQ_OP2_INST_KILLNE: case SQ_OP2_INST_MUL: case SQ_OP2_INST_MAX: case SQ_OP2_INST_MIN: //case SQ_OP2_INST_MAX_DX10: //case SQ_OP2_INST_MIN_DX10: + case SQ_OP2_INST_SETE: + case SQ_OP2_INST_SETNE: case SQ_OP2_INST_SETGT: case SQ_OP2_INST_SETGE: case SQ_OP2_INST_PRED_SETE: @@ -358,6 +400,7 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) case SQ_OP2_INST_MOVA_FLOOR: case SQ_OP2_INST_FRACT: case SQ_OP2_INST_FLOOR: + case SQ_OP2_INST_TRUNC: case SQ_OP2_INST_EXP_IEEE: case SQ_OP2_INST_LOG_CLAMPED: case SQ_OP2_INST_LOG_IEEE: @@ -369,7 +412,7 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) return 1; default: radeon_error( - "Need instruction operand number for %x.\n", pAsm->D.dst.opcode); + "Need instruction operand number for %x.\n", opcode); }; return 3; @@ -383,103 +426,128 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700 pAsm->pR700Shader = pShader; pAsm->currentShaderType = spt; - pAsm->cf_last_export_ptr = NULL; + pAsm->cf_last_export_ptr = NULL; + + pAsm->cf_current_export_clause_ptr = NULL; + pAsm->cf_current_alu_clause_ptr = NULL; + pAsm->cf_current_tex_clause_ptr = NULL; + pAsm->cf_current_vtx_clause_ptr = NULL; + pAsm->cf_current_cf_clause_ptr = NULL; - pAsm->cf_current_export_clause_ptr = NULL; - pAsm->cf_current_alu_clause_ptr = NULL; - pAsm->cf_current_tex_clause_ptr = NULL; - pAsm->cf_current_vtx_clause_ptr = NULL; - pAsm->cf_current_cf_clause_ptr = NULL; + // No clause has been created yet + pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; - // No clause has been created yet - pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; + pAsm->number_of_colorandz_exports = 0; + pAsm->number_of_exports = 0; + pAsm->number_of_export_opcodes = 0; - pAsm->number_of_colorandz_exports = 0; - pAsm->number_of_exports = 0; - pAsm->number_of_export_opcodes = 0; + pAsm->alu_x_opcode = 0; + pAsm->D2.bits = 0; - pAsm->D.bits = 0; - pAsm->S[0].bits = 0; - pAsm->S[1].bits = 0; - pAsm->S[2].bits = 0; + pAsm->D.bits = 0; + pAsm->S[0].bits = 0; + pAsm->S[1].bits = 0; + pAsm->S[2].bits = 0; - pAsm->uLastPosUpdate = 0; + pAsm->uLastPosUpdate = 0; - *(BITS *) &pAsm->fp_stOutFmt0 = 0; + *(BITS *) &pAsm->fp_stOutFmt0 = 0; - pAsm->uIIns = 0; - pAsm->uOIns = 0; - pAsm->number_used_registers = 0; - pAsm->uUsedConsts = 256; + pAsm->uIIns = 0; + pAsm->uOIns = 0; + pAsm->number_used_registers = 0; + pAsm->uUsedConsts = 256; - // Fragment programs - pAsm->uBoolConsts = 0; - pAsm->uIntConsts = 0; - pAsm->uInsts = 0; - pAsm->uConsts = 0; + // Fragment programs + pAsm->uBoolConsts = 0; + pAsm->uIntConsts = 0; + pAsm->uInsts = 0; + pAsm->uConsts = 0; - pAsm->FCSP = 0; - pAsm->fc_stack[0].type = FC_NONE; + pAsm->FCSP = 0; + pAsm->fc_stack[0].type = FC_NONE; - pAsm->branch_depth = 0; - pAsm->max_branch_depth = 0; + pAsm->aArgSubst[0] = + pAsm->aArgSubst[1] = + pAsm->aArgSubst[2] = + pAsm->aArgSubst[3] = (-1); - pAsm->aArgSubst[0] = - pAsm->aArgSubst[1] = - pAsm->aArgSubst[2] = - pAsm->aArgSubst[3] = (-1); + pAsm->uOutputs = 0; - pAsm->uOutputs = 0; + for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++) + { + pAsm->color_export_register_number[i] = (-1); + } - for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++) - { - pAsm->color_export_register_number[i] = (-1); - } + pAsm->depth_export_register_number = (-1); + pAsm->stencil_export_register_number = (-1); + pAsm->coverage_to_mask_export_register_number = (-1); + pAsm->mask_export_register_number = (-1); + + pAsm->starting_export_register_number = 0; + pAsm->starting_vfetch_register_number = 0; + pAsm->starting_temp_register_number = 0; + pAsm->uFirstHelpReg = 0; + + pAsm->input_position_is_used = GL_FALSE; + pAsm->input_normal_is_used = GL_FALSE; + + for (i=0; i<NUMBER_OF_INPUT_COLORS; i++) + { + pAsm->input_color_is_used[ i ] = GL_FALSE; + } + + for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++) + { + pAsm->input_texture_unit_is_used[ i ] = GL_FALSE; + } - pAsm->depth_export_register_number = (-1); - pAsm->stencil_export_register_number = (-1); - pAsm->coverage_to_mask_export_register_number = (-1); - pAsm->mask_export_register_number = (-1); + for (i=0; i<VERT_ATTRIB_MAX; i++) + { + pAsm->vfetch_instruction_ptr_array[ i ] = NULL; + } - pAsm->starting_export_register_number = 0; - pAsm->starting_vfetch_register_number = 0; - pAsm->starting_temp_register_number = 0; - pAsm->uFirstHelpReg = 0; + pAsm->number_of_inputs = 0; + pAsm->is_tex = GL_FALSE; + pAsm->need_tex_barrier = GL_FALSE; - pAsm->input_position_is_used = GL_FALSE; - pAsm->input_normal_is_used = GL_FALSE; + pAsm->subs = NULL; + pAsm->unSubArraySize = 0; + pAsm->unSubArrayPointer = 0; + pAsm->callers = NULL; + pAsm->unCallerArraySize = 0; + pAsm->unCallerArrayPointer = 0; + pAsm->CALLSP = 0; + pAsm->CALLSTACK[0].FCSP_BeforeEntry = 0; + pAsm->CALLSTACK[0].plstCFInstructions_local + = &(pAsm->pR700Shader->lstCFInstructions); - for (i=0; i<NUMBER_OF_INPUT_COLORS; i++) - { - pAsm->input_color_is_used[ i ] = GL_FALSE; - } + pAsm->CALLSTACK[0].max = 0; + pAsm->CALLSTACK[0].current = 0; - for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++) - { - pAsm->input_texture_unit_is_used[ i ] = GL_FALSE; - } + SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local); - for (i=0; i<VERT_ATTRIB_MAX; i++) - { - pAsm->vfetch_instruction_ptr_array[ i ] = NULL; - } + pAsm->unCFflags = 0; - pAsm->number_of_inputs = 0; + pAsm->presubs = NULL; + pAsm->unPresubArraySize = 0; + pAsm->unNumPresub = 0; + pAsm->unCurNumILInsts = 0; - pAsm->is_tex = GL_FALSE; - pAsm->need_tex_barrier = GL_FALSE; + pAsm->unVetTexBits = 0; - return 0; + return 0; } GLboolean IsTex(gl_inst_opcode Opcode) { - if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) ) + if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) || + (OPCODE_DDX==Opcode) || (OPCODE_DDY==Opcode) ) { return GL_TRUE; } @@ -592,6 +660,31 @@ int check_current_clause(r700_AssemblerBase* pAsm, return GL_TRUE; } +GLboolean add_cf_instruction(r700_AssemblerBase* pAsm) +{ + if(GL_FALSE == check_current_clause(pAsm, CF_OTHER_CLAUSE)) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr = + (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause); + + if (pAsm->cf_current_cf_clause_ptr != NULL) + { + Init_R700ControlFlowGenericClause(pAsm->cf_current_cf_clause_ptr); + AddCFInstruction( pAsm->pR700Shader, + (R700ControlFlowInstruction *)pAsm->cf_current_cf_clause_ptr ); + } + else + { + radeon_error("Could not allocate a new VFetch CF instruction.\n"); + return GL_FALSE; + } + + return GL_TRUE; +} + GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm, R700VertexInstruction* vertex_instruction_ptr) { @@ -737,6 +830,8 @@ GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm, if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here { //TODO : mini fetch + mega_fetch_count = 0; + is_mega_fetch_flag = 0; } else { @@ -798,6 +893,7 @@ GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, GLubyte element, GLuint _signed, GLboolean normalize, + GLenum format, VTX_FETCH_METHOD * pFetchMethod) { GLuint client_size_inbyte; @@ -828,6 +924,8 @@ GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here { //TODO : mini fetch + mega_fetch_count = 0; + is_mega_fetch_flag = 0; } else { @@ -846,10 +944,21 @@ GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X; vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count; - vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X; - vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y; - vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z; - vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W; + if(format == GL_BGRA) + { + vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_Z; + vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y; + vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_X; + vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W; + } + else + { + vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X; + vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y; + vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z; + vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W; + + } vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1; vfetch_instruction_ptr->m_Word1.f.data_format = data_format; @@ -987,7 +1096,8 @@ GLboolean checkop2(r700_AssemblerBase* pAsm) checkop_init(pAsm); - if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) ) @@ -998,7 +1108,8 @@ GLboolean checkop2(r700_AssemblerBase* pAsm) { bSrcConst[0] = GL_FALSE; } - if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) ) @@ -1031,7 +1142,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm) checkop_init(pAsm); - if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) ) @@ -1042,7 +1154,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm) { bSrcConst[0] = GL_FALSE; } - if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) ) @@ -1053,7 +1166,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm) { bSrcConst[1] = GL_FALSE; } - if( (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[2].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) ) @@ -1136,6 +1250,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, if(pAsm->aArgSubst[1+src] >= 0) { + assert(fld >= 0); setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY; pAsm->S[fld].src.reg = pAsm->aArgSubst[1+src]; @@ -1153,6 +1268,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, case PROGRAM_LOCAL_PARAM: case PROGRAM_ENV_PARAM: case PROGRAM_STATE_VAR: + case PROGRAM_UNIFORM: if (1 == pILInst->SrcReg[src].RelAddr) { setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0); @@ -1163,10 +1279,18 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, } pAsm->S[fld].src.rtype = SRC_REG_CONSTANT; - pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index; + if(pILInst->SrcReg[src].Index < 0) + { + WARN_ONCE("Negative register offsets not supported yet!\n"); + pAsm->S[fld].src.reg = 0; + } + else + { + pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index; + } break; case PROGRAM_INPUT: - setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); + setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); pAsm->S[fld].src.rtype = SRC_REG_INPUT; switch (pAsm->currentShaderType) { @@ -1179,7 +1303,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, } break; default: - radeon_error("Invalid source argument type\n"); + radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File); return GL_FALSE; } } @@ -1235,6 +1359,15 @@ GLboolean assemble_dst(r700_AssemblerBase *pAsm) pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1; pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1; + if(pILInst->SaturateMode == SATURATE_ZERO_ONE) + { + pAsm->D2.dst2.SaturateMode = 1; + } + else + { + pAsm->D2.dst2.SaturateMode = 0; + } + return GL_TRUE; } @@ -1294,6 +1427,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) else { switch (pILInst->SrcReg[0].File) { + case PROGRAM_UNIFORM: case PROGRAM_CONSTANT: case PROGRAM_LOCAL_PARAM: case PROGRAM_ENV_PARAM: @@ -1306,36 +1440,65 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; break; case PROGRAM_INPUT: - switch (pILInst->SrcReg[0].Index) + if(SPT_VP == pAsm->currentShaderType) { - case FRAG_ATTRIB_WPOS: - case FRAG_ATTRIB_COL0: - case FRAG_ATTRIB_COL1: - case FRAG_ATTRIB_FOGC: - case FRAG_ATTRIB_TEX0: - case FRAG_ATTRIB_TEX1: - case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: - case FRAG_ATTRIB_TEX4: - case FRAG_ATTRIB_TEX5: - case FRAG_ATTRIB_TEX6: - case FRAG_ATTRIB_TEX7: - bValidTexCoord = GL_TRUE; + switch (pILInst->SrcReg[0].Index) + { + case VERT_ATTRIB_TEX0: + case VERT_ATTRIB_TEX1: + case VERT_ATTRIB_TEX2: + case VERT_ATTRIB_TEX3: + case VERT_ATTRIB_TEX4: + case VERT_ATTRIB_TEX5: + case VERT_ATTRIB_TEX6: + case VERT_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + break; + } + } + else + { + switch (pILInst->SrcReg[0].Index) + { + case FRAG_ATTRIB_WPOS: + case FRAG_ATTRIB_COL0: + case FRAG_ATTRIB_COL1: + case FRAG_ATTRIB_FOGC: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + break; + case FRAG_ATTRIB_FACE: + fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n"); + break; + case FRAG_ATTRIB_PNTC: + fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n"); + break; + } + + if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) || + (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) ) + { + bValidTexCoord = GL_TRUE; pAsm->S[0].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; pAsm->S[0].src.rtype = SRC_REG_INPUT; - break; - case FRAG_ATTRIB_FACE: - fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n"); - break; - case FRAG_ATTRIB_PNTC: - fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n"); - break; - case FRAG_ATTRIB_VAR0: - fprintf(stderr, "FRAG_ATTRIB_VAR0 unsupported\n"); - break; + } } - break; + + break; } } @@ -1380,8 +1543,17 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode; tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0; tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0; + tex_instruction_ptr->m_Word0.f.alt_const = 0; - tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg; + if(SPT_VP == pAsm->currentShaderType) + { + tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX; + pAsm->unVetTexBits |= 1 << texture_unit_source->reg; + } + else + { + tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg; + } tex_instruction_ptr->m_Word1.f.lod_bias = 0x0; if (normalized) { @@ -1400,7 +1572,6 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize tex_instruction_ptr->m_Word2.f.offset_x = 0x0; tex_instruction_ptr->m_Word2.f.offset_y = 0x0; tex_instruction_ptr->m_Word2.f.offset_z = 0x0; - tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg; // dst @@ -1517,6 +1688,10 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, { src_sel = pSource->reg + CFILE_REGISTER_OFFSET; } + else if (pSource->rtype == SRC_REC_LITERAL) + { + src_sel = SQ_ALU_SRC_LITERAL; + } else { radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n", @@ -1571,18 +1746,21 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, switch (source_index) { case 0: + assert(alu_instruction_ptr); alu_instruction_ptr->m_Word0.f.src0_sel = src_sel; alu_instruction_ptr->m_Word0.f.src0_rel = src_rel; alu_instruction_ptr->m_Word0.f.src0_chan = src_chan; alu_instruction_ptr->m_Word0.f.src0_neg = src_neg; break; case 1: + assert(alu_instruction_ptr); alu_instruction_ptr->m_Word0.f.src1_sel = src_sel; alu_instruction_ptr->m_Word0.f.src1_rel = src_rel; alu_instruction_ptr->m_Word0.f.src1_chan = src_chan; alu_instruction_ptr->m_Word0.f.src1_neg = src_neg; break; case 2: + assert(alu_instruction_ptr); alu_instruction_ptr->m_Word1_OP3.f.src2_sel = src_sel; alu_instruction_ptr->m_Word1_OP3.f.src2_rel = src_rel; alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan; @@ -1606,7 +1784,8 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, return GL_FALSE; } - if ( pAsm->cf_current_alu_clause_ptr == NULL || + if ( pAsm->alu_x_opcode != 0 || + pAsm->cf_current_alu_clause_ptr == NULL || ( (pAsm->cf_current_alu_clause_ptr != NULL) && (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) ) ) ) @@ -1636,9 +1815,17 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0; pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0; - //cf_current_alu_clause_ptr->m_Word1.f.count = number_of_scalar_operations - 1; pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0; - pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU; + + if(pAsm->alu_x_opcode != 0) + { + pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = pAsm->alu_x_opcode; + pAsm->alu_x_opcode = 0; + } + else + { + pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU; + } pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; @@ -1646,7 +1833,7 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, } else { - pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++; + pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2); } // If this clause constains any instruction that is forward dependent on a TEX instruction, @@ -1923,7 +2110,7 @@ GLboolean check_scalar(r700_AssemblerBase* pAsm, GLuint swizzle_key; - GLuint number_of_operands = r700GetNumOperands(pAsm); + GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); for (src=0; src<number_of_operands; src++) { @@ -2012,7 +2199,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, GLuint swizzle_key; - GLuint number_of_operands = r700GetNumOperands(pAsm); + GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); for (src=0; src<number_of_operands; src++) { @@ -2045,7 +2232,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, if( is_gpr(sel) ) { if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) ) - { + { return GL_FALSE; } @@ -2057,7 +2244,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, else { if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) ) - { + { return GL_FALSE; } } @@ -2069,7 +2256,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, if( is_cfile(sel) ) { if( GL_FALSE == reserve_cfile(pAsm, sel, chan) ) - { + { return GL_FALSE; } } @@ -2081,6 +2268,10 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { + R700ALUInstruction * alu_instruction_ptr = NULL; + R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl; + R700ALUInstructionFullLiteral * alu_instruction_ptr_fl; + GLuint number_of_scalar_operations; GLboolean is_single_scalar_operation; GLuint scalar_channel_index; @@ -2089,7 +2280,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) int current_source_index; GLuint contiguous_slots_needed; - GLuint uNumSrc = r700GetNumOperands(pAsm); + GLuint uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); //GLuint channel_swizzle, j; //GLuint chan_counter[4] = {0, 0, 0, 0}; //PVSSRC * pSource[3]; @@ -2146,23 +2337,44 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) contiguous_slots_needed = 0; - if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) ) + if(!is_single_scalar_operation) { contiguous_slots_needed = 4; } + contiguous_slots_needed += pAsm->D2.dst2.literal_slots; + initialize(pAsm); for (scalar_channel_index=0; scalar_channel_index < number_of_scalar_operations; scalar_channel_index++) { - R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); - if (alu_instruction_ptr == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstruction(alu_instruction_ptr); + if(scalar_channel_index == (number_of_scalar_operations-1)) + { + switch(pAsm->D2.dst2.literal_slots) + { + case 0: + alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + Init_R700ALUInstruction(alu_instruction_ptr); + break; + case 1: + alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral); + Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pAsm->C[0].f, pAsm->C[1].f); + alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl; + break; + case 2: + alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral); + Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl,pAsm->C[0].f, pAsm->C[1].f, pAsm->C[2].f, pAsm->C[3].f); + alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl; + break; + }; + } + else + { + alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + Init_R700ALUInstruction(alu_instruction_ptr); + } //src 0 current_source_index = 0; @@ -2172,7 +2384,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) current_source_index, pcurrent_source, scalar_channel_index) ) - { + { return GL_FALSE; } @@ -2186,13 +2398,13 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) current_source_index, pcurrent_source, scalar_channel_index) ) - { + { return GL_FALSE; } } //other bits - alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_AR_X; + alu_instruction_ptr->m_Word0.f.index_mode = pAsm->D2.dst2.index_mode; if( (is_single_scalar_operation == GL_TRUE) || (GL_TRUE == bSplitInst) ) @@ -2204,9 +2416,17 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0; } - alu_instruction_ptr->m_Word0.f.pred_sel = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; + alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0; + if(1 == pAsm->D.dst.predicated) + { + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1; + } + else + { + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; + } // dst if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || @@ -2215,7 +2435,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; } else - { + { radeon_error("Only temp destination registers supported for ALU dest regs.\n"); return GL_FALSE; } @@ -2245,7 +2465,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index; - alu_instruction_ptr->m_Word1.f.clamp = pAsm->pILInst[pAsm->uiCurInst].SaturateMode; + alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode; if (pAsm->D.dst.op3) { @@ -2272,8 +2492,8 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = pAsm->S[0].src.abs; + alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = pAsm->S[1].src.abs; //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0; //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0; @@ -2301,8 +2521,8 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.src0_abs = pAsm->S[0].src.abs; + alu_instruction_ptr->m_Word1_OP2.f.src1_abs = pAsm->S[1].src.abs; //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; @@ -2329,7 +2549,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) } if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) ) - { + { return GL_FALSE; } @@ -2340,19 +2560,19 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) if (is_single_scalar_operation) { if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) ) - { + { return GL_FALSE; } } else { if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) ) - { - return 1; + { + return GL_FALSE; } } - contiguous_slots_needed = 0; + contiguous_slots_needed -= 1; } return GL_TRUE; @@ -2403,11 +2623,14 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) //reset for next inst. pAsm->D.bits = 0; + pAsm->D2.bits = 0; pAsm->S[0].bits = 0; pAsm->S[1].bits = 0; pAsm->S[2].bits = 0; pAsm->is_tex = GL_FALSE; pAsm->need_tex_barrier = GL_FALSE; + pAsm->D2.bits = 0; + pAsm->C[0].bits = pAsm->C[1].bits = pAsm->C[2].bits = pAsm->C[3].bits = 0; return GL_TRUE; } @@ -2640,9 +2863,44 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm) return GL_TRUE; } -GLboolean assemble_COS(r700_AssemblerBase *pAsm) +GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) { - return assemble_math_function(pAsm, SQ_OP2_INST_COS); + int tmp; + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + assemble_src(pAsm, 0, -1); + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1/(3.1415926535 * 2); + pAsm->C[1].f = 0.0F; + next_ins(pAsm); + + pAsm->D.dst.opcode = opcode; + pAsm->D.dst.math = 1; + + assemble_dst(pAsm); + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + next_ins(pAsm); + + //TODO - replicate if more channels set in WriteMask + return GL_TRUE; + } GLboolean assemble_DOT(r700_AssemblerBase *pAsm) @@ -2910,13 +3168,15 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm) return GL_TRUE; } -GLboolean assemble_KIL(r700_AssemblerBase *pAsm) -{ - /* TODO: doc says KILL has to be last(end) ALU clause */ - - checkop1(pAsm); +GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT; + if(pILInst->Opcode == OPCODE_KIL) + checkop1(pAsm); + + pAsm->D.dst.opcode = opcode; + //pAsm->D.dst.math = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; @@ -2929,21 +3189,34 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm) setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; pAsm->S[0].src.reg = 0; - setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0); noneg_PVSSRC(&(pAsm->S[0].src)); - if ( GL_FALSE == assemble_src(pAsm, 0, 1) ) + if(pILInst->Opcode == OPCODE_KIL_NV) { - return GL_FALSE; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = 0; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1); + neg_PVSSRC(&(pAsm->S[1].src)); } - + else + { + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + { + return GL_FALSE; + } + + } + if ( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } + /* Doc says KILL has to be last(end) ALU clause */ pAsm->pR700Shader->killIsUsed = GL_TRUE; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; return GL_TRUE; } @@ -3007,6 +3280,7 @@ GLboolean assemble_LRP(r700_AssemblerBase *pAsm) { return GL_FALSE; } + if( GL_FALSE == assemble_src(pAsm, 2, -1) ) { return GL_FALSE; @@ -3742,77 +4016,137 @@ GLboolean assemble_RSQ(r700_AssemblerBase *pAsm) return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE); } -GLboolean assemble_SIN(r700_AssemblerBase *pAsm) -{ - return assemble_math_function(pAsm, SQ_OP2_INST_SIN); -} - GLboolean assemble_SCS(r700_AssemblerBase *pAsm) { BITS tmp; - checkop1(pAsm); + checkop1(pAsm); - tmp = gethelpr(pAsm); + tmp = gethelpr(pAsm); + /* tmp.x = src /2*PI */ + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; - // COS tmp.x, a.x - pAsm->D.dst.opcode = SQ_OP2_INST_COS; - pAsm->D.dst.math = 1; + assemble_src(pAsm, 0, -1); - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; - pAsm->D.dst.writex = 1; + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1/(3.1415926535 * 2); + pAsm->C[1].f = 0.0F; - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) - { - return GL_FALSE; - } + next_ins(pAsm); - if ( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + // COS dst.x, a.x + pAsm->D.dst.opcode = SQ_OP2_INST_COS; + pAsm->D.dst.math = 1; - // SIN tmp.y, a.x - pAsm->D.dst.opcode = SQ_OP2_INST_SIN; - pAsm->D.dst.math = 1; + assemble_dst(pAsm); + /* mask y */ + pAsm->D.dst.writey = 0; - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; - pAsm->D.dst.writey = 1; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) - { - return GL_FALSE; - } + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } - if( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + // SIN dst.y, a.x + pAsm->D.dst.opcode = SQ_OP2_INST_SIN; + pAsm->D.dst.math = 1; - // MOV dst.mask, tmp - pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + assemble_dst(pAsm); + /* mask x */ + pAsm->D.dst.writex = 0; - if( GL_FALSE == assemble_dst(pAsm) ) - { - return GL_FALSE; - } + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = DST_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp; + return GL_TRUE; +} - noswizzle_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlez = SQ_SEL_0; - pAsm->S[0].src.swizzlew = SQ_SEL_0; +GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } - if ( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + pAsm->D.dst.opcode = opcode; + //pAsm->D.dst.math = 1; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + pAsm->D.dst.opcode = opcode; + pAsm->D.dst.math = 1; + pAsm->D.dst.predicated = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pAsm->uHelpReg; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->last_cond_register + pAsm->starting_temp_register_number; + pAsm->S[0].src.swizzlex = pILInst->DstReg.CondSwizzle & 0x7; + noneg_PVSSRC(&(pAsm->S[0].src)); + + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = pAsm->uHelpReg; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_0; + pAsm->S[1].src.swizzley = SQ_SEL_0; + pAsm->S[1].src.swizzlez = SQ_SEL_0; + pAsm->S[1].src.swizzlew = SQ_SEL_0; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } return GL_TRUE; } @@ -3895,6 +4229,7 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File) { + case PROGRAM_UNIFORM: case PROGRAM_CONSTANT: case PROGRAM_LOCAL_PARAM: case PROGRAM_ENV_PARAM: @@ -3992,24 +4327,6 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) return GL_FALSE; } - /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently - * have to do explicit instruction - */ - pAsm->D.dst.opcode = SQ_OP2_INST_MAX; - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp1; - pAsm->D.dst.writez = 1; - - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp1; - noswizzle_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[1].bits = pAsm->S[0].bits; - flipneg_PVSSRC(&(pAsm->S[1].src)); - - next_ins(pAsm); - /* tmp1.z = RCP_e(|tmp1.z|) */ pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; pAsm->D.dst.math = 1; @@ -4022,13 +4339,13 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; pAsm->S[0].src.reg = tmp1; pAsm->S[0].src.swizzlex = SQ_SEL_Z; + pAsm->S[0].src.abs = 1; next_ins(pAsm); /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x * muladd has no writemask, have to use another temp - * also no support for imm constants, so add 1 here */ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; pAsm->D.dst.op3 = 1; @@ -4045,30 +4362,12 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) pAsm->S[1].src.reg = tmp1; setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z); setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); - pAsm->S[2].src.rtype = SRC_REG_TEMPORARY; + /* immediate c 1.5 */ + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1.5F; + pAsm->S[2].src.rtype = SRC_REC_LITERAL; pAsm->S[2].src.reg = tmp1; - setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_1); - - next_ins(pAsm); - - /* ADD the remaining .5 */ - pAsm->D.dst.opcode = SQ_OP2_INST_ADD; - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp2; - pAsm->D.dst.writex = 1; - pAsm->D.dst.writey = 1; - pAsm->D.dst.writez = 0; - pAsm->D.dst.writew = 0; - - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp2; - noswizzle_PVSSRC(&(pAsm->S[0].src)); - setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); - pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5 - noswizzle_PVSSRC(&(pAsm->S[1].src)); + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X); next_ins(pAsm); @@ -4093,22 +4392,35 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) } - if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXB) + switch(pAsm->pILInst[pAsm->uiCurInst].Opcode) { - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L; - } - else - { - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + case OPCODE_DDX: + /* will these need WQM(1) on CF inst ? */ + pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_H; + break; + case OPCODE_DDY: + pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_V; + break; + case OPCODE_TXB: + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L; + break; + default: + if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1) + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_C; + else + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; } pAsm->is_tex = GL_TRUE; if ( GL_TRUE == need_barrier ) + + pAsm->is_tex = GL_TRUE; + if ( GL_TRUE == need_barrier ) { pAsm->need_tex_barrier = GL_TRUE; } // Set src1 to tex unit id - pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit; + pAsm->S[1].src.reg = pAsm->SamplerUnits[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit]; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; //No sw info from mesa compiler, so hard code here. @@ -4142,30 +4454,66 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) pAsm->S[0].src.swizzlew = SQ_SEL_Y; } + if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1) + { + /* compare value goes to w chan ? */ + pAsm->S[0].src.swizzlew = SQ_SEL_Z; + } + if ( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } + /* add ARB shadow ambient but clamp to 0..1 */ + if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1) + { + /* ADD_SAT dst, dst, ambient[texunit] */ + pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + pAsm->D2.dst2.SaturateMode = 1; + + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->D.dst.reg; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + noneg_PVSSRC(&(pAsm->S[0].src)); + + pAsm->S[1].src.rtype = SRC_REG_CONSTANT; + pAsm->S[1].src.reg = pAsm->shadow_regs[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit]; + noswizzle_PVSSRC(&(pAsm->S[1].src)); + noneg_PVSSRC(&(pAsm->S[1].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + } + return GL_TRUE; } GLboolean assemble_XPD(r700_AssemblerBase *pAsm) { - BITS tmp; + BITS tmp1; + BITS tmp2 = 0; if( GL_FALSE == checkop2(pAsm) ) { return GL_FALSE; } - tmp = gethelpr(pAsm); + tmp1 = gethelpr(pAsm); pAsm->D.dst.opcode = SQ_OP2_INST_MUL; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; + pAsm->D.dst.reg = tmp1; nomask_PVSDST(&(pAsm->D.dst)); if( GL_FALSE == assemble_src(pAsm, 0, -1) ) @@ -4191,11 +4539,11 @@ GLboolean assemble_XPD(r700_AssemblerBase *pAsm) if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) { - tmp = gethelpr(pAsm); + tmp2 = gethelpr(pAsm); setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; + pAsm->D.dst.reg = tmp2; nomask_PVSDST(&(pAsm->D.dst)); } @@ -4223,7 +4571,7 @@ GLboolean assemble_XPD(r700_AssemblerBase *pAsm) // result1 + (neg) result0 setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE); pAsm->S[2].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[2].src.reg = tmp; + pAsm->S[2].src.reg = tmp1; neg_PVSSRC(&(pAsm->S[2].src)); noswizzle_PVSSRC(&(pAsm->S[2].src)); @@ -4246,7 +4594,7 @@ GLboolean assemble_XPD(r700_AssemblerBase *pAsm) // Use tmp as source setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp; + pAsm->S[0].src.reg = tmp2; noneg_PVSSRC(&(pAsm->S[0].src)); noswizzle_PVSSRC(&(pAsm->S[0].src)); @@ -4265,27 +4613,909 @@ GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm) return GL_TRUE; } -GLboolean assemble_IF(r700_AssemblerBase *pAsm) +static inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason) { + switch (uReason) + { + case FC_PUSH_VPM: + pAsm->CALLSTACK[pAsm->CALLSP].current--; + break; + case FC_PUSH_WQM: + pAsm->CALLSTACK[pAsm->CALLSP].current -= 4; + break; + case FC_LOOP: + pAsm->CALLSTACK[pAsm->CALLSP].current -= 4; + break; + case FC_REP: + /* TODO : for 16 vp asic, should -= 2; */ + pAsm->CALLSTACK[pAsm->CALLSP].current -= 1; + break; + }; +} + +static inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly) +{ + if(GL_TRUE == bCheckMaxOnly) + { + switch (uReason) + { + case FC_PUSH_VPM: + if((pAsm->CALLSTACK[pAsm->CALLSP].current + 1) + > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = + pAsm->CALLSTACK[pAsm->CALLSP].current + 1; + } + break; + case FC_PUSH_WQM: + if((pAsm->CALLSTACK[pAsm->CALLSP].current + 4) + > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = + pAsm->CALLSTACK[pAsm->CALLSP].current + 4; + } + break; + } + return; + } + + switch (uReason) + { + case FC_PUSH_VPM: + pAsm->CALLSTACK[pAsm->CALLSP].current++; + break; + case FC_PUSH_WQM: + pAsm->CALLSTACK[pAsm->CALLSP].current += 4; + break; + case FC_LOOP: + pAsm->CALLSTACK[pAsm->CALLSP].current += 4; + break; + case FC_REP: + /* TODO : for 16 vp asic, should += 2; */ + pAsm->CALLSTACK[pAsm->CALLSP].current += 1; + break; + }; + + if(pAsm->CALLSTACK[pAsm->CALLSP].current + > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = + pAsm->CALLSTACK[pAsm->CALLSP].current; + } +} + +GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset; + + return GL_TRUE; +} + +GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + + return GL_TRUE; +} + +GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse) +{ + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + if(GL_TRUE != bHasElse) + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + } + else + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + } + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->FCSP++; + pAsm->fc_stack[pAsm->FCSP].type = FC_IF; + pAsm->fc_stack[pAsm->FCSP].mid = NULL; + pAsm->fc_stack[pAsm->FCSP].midLen= 0; + pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr; + +#ifndef USE_CF_FOR_POP_AFTER + if(GL_TRUE != bHasElse) + { + pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER; + } +#endif /* USE_CF_FOR_POP_AFTER */ + + checkStackDepth(pAsm, FC_PUSH_VPM, GL_FALSE); + + return GL_TRUE; +} + +GLboolean assemble_ELSE(r700_AssemblerBase *pAsm) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; /// + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid, + 0, + sizeof(R700ControlFlowGenericClause *) ); + pAsm->fc_stack[pAsm->FCSP].mid[0] = pAsm->cf_current_cf_clause_ptr; + //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1; + +#ifndef USE_CF_FOR_POP_AFTER + pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER; +#endif /* USE_CF_FOR_POP_AFTER */ + + pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode - 1; + return GL_TRUE; } GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm) { +#ifdef USE_CF_FOR_POP_AFTER + pops(pAsm, 1); +#endif /* USE_CF_FOR_POP_AFTER */ + + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + + if(NULL == pAsm->fc_stack[pAsm->FCSP].mid) + { + /* no else in between */ + pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode; + } + else + { + pAsm->fc_stack[pAsm->FCSP].mid[0]->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode; + } + + if(NULL != pAsm->fc_stack[pAsm->FCSP].mid) + { + FREE(pAsm->fc_stack[pAsm->FCSP].mid); + } + + if(pAsm->fc_stack[pAsm->FCSP].type != FC_IF) + { + radeon_error("if/endif in shader code are not paired. \n"); + return GL_FALSE; + } + + pAsm->FCSP--; + + decreaseCurrent(pAsm, FC_PUSH_VPM); + + return GL_TRUE; +} + +GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->FCSP++; + pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP; + pAsm->fc_stack[pAsm->FCSP].mid = NULL; + pAsm->fc_stack[pAsm->FCSP].unNumMid = 0; + pAsm->fc_stack[pAsm->FCSP].midLen = 0; + pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr; + + checkStackDepth(pAsm, FC_LOOP, GL_FALSE); + + return GL_TRUE; +} + +GLboolean assemble_BRK(r700_AssemblerBase *pAsm) +{ +#ifdef USE_CF_FOR_CONTINUE_BREAK + + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + + unsigned int unFCSP; + for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--) + { + if(FC_LOOP == pAsm->fc_stack[unFCSP].type) + { + break; + } + } + if(0 == FC_LOOP) + { + radeon_error("Break is not inside loop/endloop pair.\n"); + return GL_FALSE; + } + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( + (void *)pAsm->fc_stack[unFCSP].mid, + sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid, + sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) ); + pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr; + pAsm->fc_stack[unFCSP].unNumMid++; + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + + checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE); + +#endif //USE_CF_FOR_CONTINUE_BREAK + return GL_TRUE; +} + +GLboolean assemble_CONT(r700_AssemblerBase *pAsm) +{ +#ifdef USE_CF_FOR_CONTINUE_BREAK + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + + unsigned int unFCSP; + for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--) + { + if(FC_LOOP == pAsm->fc_stack[unFCSP].type) + { + break; + } + } + if(0 == FC_LOOP) + { + radeon_error("Continue is not inside loop/endloop pair.\n"); + return GL_FALSE; + } + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( + (void *)pAsm->fc_stack[unFCSP].mid, + sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid, + sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) ); + pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr; + pAsm->fc_stack[unFCSP].unNumMid++; + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + + checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE); + +#endif /* USE_CF_FOR_CONTINUE_BREAK */ + + return GL_TRUE; +} + +GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm) +{ + GLuint i; + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1; + pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + +#ifdef USE_CF_FOR_CONTINUE_BREAK + for(i=0; i<pAsm->fc_stack[pAsm->FCSP].unNumMid; i++) + { + pAsm->fc_stack[pAsm->FCSP].mid[i]->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex; + } + if(NULL != pAsm->fc_stack[pAsm->FCSP].mid) + { + FREE(pAsm->fc_stack[pAsm->FCSP].mid); + } +#endif + + if(pAsm->fc_stack[pAsm->FCSP].type != FC_LOOP) + { + radeon_error("loop/endloop in shader code are not paired. \n"); + return GL_FALSE; + } + + GLuint unFCSP; + GLuint unIF = 0; + if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0) + { + for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--) + { + if(FC_LOOP == pAsm->fc_stack[unFCSP].type) + { + breakLoopOnFlag(pAsm, unFCSP); + break; + } + else if(FC_IF == pAsm->fc_stack[unFCSP].type) + { + unIF++; + } + } + if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry) + { +#ifdef USE_CF_FOR_POP_AFTER + returnOnFlag(pAsm, unIF); +#else + returnOnFlag(pAsm, 0); +#endif /* USE_CF_FOR_POP_AFTER */ + pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET; + } + } + + pAsm->FCSP--; + + decreaseCurrent(pAsm, FC_LOOP); + + return GL_TRUE; +} + +void add_return_inst(r700_AssemblerBase *pAsm) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return; + } + //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; +} + +GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift) +{ + /* Put in sub */ + if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize ) + { + pAsm->subs = (SUB_OFFSET*)_mesa_realloc( (void *)pAsm->subs, + sizeof(SUB_OFFSET) * pAsm->unSubArraySize, + sizeof(SUB_OFFSET) * (pAsm->unSubArraySize + 10) ); + if(NULL == pAsm->subs) + { + return GL_FALSE; + } + pAsm->unSubArraySize += 10; + } + + pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex + uiIL_Shift; + pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL; + pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL; + pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0; + + pAsm->CALLSP++; + pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex = pAsm->unSubArrayPointer; + pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP; + pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local + = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local); + pAsm->CALLSTACK[pAsm->CALLSP].max = 0; + pAsm->CALLSTACK[pAsm->CALLSP].current = 0; + SetActiveCFlist(pAsm->pR700Shader, + pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local); + + pAsm->unSubArrayPointer++; + + /* start sub */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + + pAsm->FCSP++; + pAsm->fc_stack[pAsm->FCSP].type = FC_REP; + + checkStackDepth(pAsm, FC_REP, GL_FALSE); + + return GL_TRUE; +} + +GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm) +{ + if(pAsm->fc_stack[pAsm->FCSP].type != FC_REP) + { + radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n"); + return GL_FALSE; + } + + /* copy max to sub structure */ + pAsm->subs[pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex].unStackDepthMax + = pAsm->CALLSTACK[pAsm->CALLSP].max; + + decreaseCurrent(pAsm, FC_REP); + + pAsm->CALLSP--; + SetActiveCFlist(pAsm->pR700Shader, + pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local); + + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + + pAsm->FCSP--; + + return GL_TRUE; +} + +GLboolean assemble_RET(r700_AssemblerBase *pAsm) +{ + GLuint unIF = 0; + + if(pAsm->CALLSP > 0) + { /* in sub */ + GLuint unFCSP; + for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--) + { + if(FC_LOOP == pAsm->fc_stack[unFCSP].type) + { + setRetInLoopFlag(pAsm, SQ_SEL_1); + breakLoopOnFlag(pAsm, unFCSP); + pAsm->unCFflags |= LOOPRET_FLAGS; + + return GL_TRUE; + } + else if(FC_IF == pAsm->fc_stack[unFCSP].type) + { + unIF++; + } + } + } + +#ifdef USE_CF_FOR_POP_AFTER + if(unIF > 0) + { + pops(pAsm, unIF); + } +#endif /* USE_CF_FOR_POP_AFTER */ + + add_return_inst(pAsm); + + return GL_TRUE; +} + +GLboolean assemble_CAL(r700_AssemblerBase *pAsm, + GLint nILindex, + GLuint uiIL_Shift, + GLuint uiNumberInsts, + struct prog_instruction *pILInst, + PRESUB_DESC * pPresubDesc) +{ + GLint uiIL_Offset; + + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + /* Put in caller */ + if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize ) + { + pAsm->callers = (CALLER_POINTER*)_mesa_realloc( (void *)pAsm->callers, + sizeof(CALLER_POINTER) * pAsm->unCallerArraySize, + sizeof(CALLER_POINTER) * (pAsm->unCallerArraySize + 10) ); + if(NULL == pAsm->callers) + { + return GL_FALSE; + } + pAsm->unCallerArraySize += 10; + } + + uiIL_Offset = nILindex + uiIL_Shift; + pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = uiIL_Offset; + pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr; + + pAsm->callers[pAsm->unCallerArrayPointer].finale_cf_ptr = NULL; + pAsm->callers[pAsm->unCallerArrayPointer].prelude_cf_ptr = NULL; + + pAsm->unCallerArrayPointer++; + + int j; + GLuint max; + GLuint unSubID; + GLboolean bRet; + for(j=0; j<pAsm->unSubArrayPointer; j++) + { + if(uiIL_Offset == pAsm->subs[j].subIL_Offset) + { /* compiled before */ + + max = pAsm->subs[j].unStackDepthMax + + pAsm->CALLSTACK[pAsm->CALLSP].current; + if(max > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = max; + } + + pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j; + return GL_TRUE; + } + } + + pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer; + unSubID = pAsm->unSubArrayPointer; + + bRet = AssembleInstr(nILindex, uiIL_Shift, uiNumberInsts, pILInst, pAsm); + + if(GL_TRUE == bRet) + { + max = pAsm->subs[unSubID].unStackDepthMax + + pAsm->CALLSTACK[pAsm->CALLSP].current; + if(max > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = max; + } + + pAsm->subs[unSubID].pPresubDesc = pPresubDesc; + } + + return bRet; +} + +GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) +{ + /*GLfloat fLiteral[2] = {0.1, 0.0};*/ + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + pAsm->D.dst.op3 = 0; + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pAsm->flag_reg_index; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + pAsm->D2.dst2.literal_slots = 1; + pAsm->D2.dst2.SaturateMode = SATURATE_OFF; + pAsm->D.dst.predicated = 0; + /* in reloc where dislink flag init inst, only one slot alu inst is handled. */ + pAsm->D.dst.math = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */ + pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */ +#if 0 + pAsm->S[0].src.rtype = SRC_REC_LITERAL; + //pAsm->S[0].src.reg = 0; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_X; + pAsm->S[0].src.swizzley = SQ_SEL_Y; + pAsm->S[0].src.swizzlez = SQ_SEL_Z; + pAsm->S[0].src.swizzlew = SQ_SEL_W; + + if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) ) + { + return GL_FALSE; + } +#else + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = 0; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = flagValue; + pAsm->S[0].src.swizzley = flagValue; + pAsm->S[0].src.swizzlez = flagValue; + pAsm->S[0].src.swizzlew = flagValue; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } +#endif + return GL_TRUE; } -GLboolean AssembleInstr(GLuint uiNumberInsts, +GLboolean testFlag(r700_AssemblerBase *pAsm) +{ + /*GLfloat fLiteral[2] = {0.1, 0.0};*/ + + //Test flag + GLuint tmp = gethelpr(pAsm); + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + pAsm->D.dst.opcode = SQ_OP2_INST_PRED_SETE; + pAsm->D.dst.math = 1; + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + pAsm->D2.dst2.literal_slots = 1; + pAsm->D2.dst2.SaturateMode = SATURATE_OFF; + pAsm->D.dst.predicated = 1; + pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */ + + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->flag_reg_index; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_X; + pAsm->S[0].src.swizzley = SQ_SEL_Y; + pAsm->S[0].src.swizzlez = SQ_SEL_Z; + pAsm->S[0].src.swizzlew = SQ_SEL_W; +#if 0 + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + //pAsm->S[1].src.reg = 0; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_X; + pAsm->S[1].src.swizzley = SQ_SEL_Y; + pAsm->S[1].src.swizzlez = SQ_SEL_Z; + pAsm->S[1].src.swizzlew = SQ_SEL_W; + + if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) ) + { + return GL_FALSE; + } +#else + pAsm->S[1].src.rtype = DST_REG_TEMPORARY; + pAsm->S[1].src.reg = 0; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_1; + pAsm->S[1].src.swizzley = SQ_SEL_1; + pAsm->S[1].src.swizzlez = SQ_SEL_1; + pAsm->S[1].src.swizzlew = SQ_SEL_1; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } +#endif + + checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE); + + return GL_TRUE; +} + +GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF) +{ + testFlag(pAsm); + jumpToOffest(pAsm, 1, 4); + setRetInLoopFlag(pAsm, SQ_SEL_0); + pops(pAsm, unIF + 1); + add_return_inst(pAsm); + + return GL_TRUE; +} + +GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP) +{ + testFlag(pAsm); + + //break + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( + (void *)pAsm->fc_stack[unFCSP].mid, + sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid, + sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) ); + pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr; + pAsm->fc_stack[unFCSP].unNumMid++; + + pops(pAsm, 1); + + return GL_TRUE; +} + +GLboolean AssembleInstr(GLuint uiFirstInst, + GLuint uiIL_Shift, + GLuint uiNumberInsts, struct prog_instruction *pILInst, r700_AssemblerBase *pR700AsmCode) { GLuint i; pR700AsmCode->pILInst = pILInst; - for(i=0; i<uiNumberInsts; i++) + for(i=uiFirstInst; i<uiNumberInsts; i++) { pR700AsmCode->uiCurInst = i; +#ifndef USE_CF_FOR_CONTINUE_BREAK + if(OPCODE_BRK == pILInst[i+1].Opcode) + { + switch(pILInst[i].Opcode) + { + case OPCODE_SLE: + pILInst[i].Opcode = OPCODE_SGT; + break; + case OPCODE_SLT: + pILInst[i].Opcode = OPCODE_SGE; + break; + case OPCODE_SGE: + pILInst[i].Opcode = OPCODE_SLT; + break; + case OPCODE_SGT: + pILInst[i].Opcode = OPCODE_SLE; + break; + case OPCODE_SEQ: + pILInst[i].Opcode = OPCODE_SNE; + break; + case OPCODE_SNE: + pILInst[i].Opcode = OPCODE_SEQ; + break; + default: + break; + } + } +#endif + if(pILInst[i].CondUpdate == 1) + { + /* remember dest register used for cond evaluation */ + /* XXX also handle PROGRAM_OUTPUT registers here? */ + pR700AsmCode->last_cond_register = pILInst[i].DstReg.Index; + } + switch (pILInst[i].Opcode) { case OPCODE_ABS: @@ -4313,7 +5543,7 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; case OPCODE_COS: - if ( GL_FALSE == assemble_COS(pR700AsmCode) ) + if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) ) return GL_FALSE; break; @@ -4342,7 +5572,8 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, if ( GL_FALSE == assemble_FLR(pR700AsmCode) ) return GL_FALSE; break; - //case OP_FLR_INT: + //case OP_FLR_INT: ; + // if ( GL_FALSE == assemble_FLR_INT() ) // return GL_FALSE; // break; @@ -4353,7 +5584,8 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, break; case OPCODE_KIL: - if ( GL_FALSE == assemble_KIL(pR700AsmCode) ) + case OPCODE_KIL_NV: + if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) ) return GL_FALSE; break; case OPCODE_LG2: @@ -4393,6 +5625,26 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, case OPCODE_MUL: if ( GL_FALSE == assemble_MUL(pR700AsmCode) ) return GL_FALSE; + break; + + case OPCODE_NOISE1: + { + callPreSub(pR700AsmCode, + GLSL_NOISE1, + &noise1_presub, + pILInst->DstReg.Index + pR700AsmCode->starting_temp_register_number, + 1); + radeon_error("noise1: not yet supported shader instruction\n"); + }; + break; + case OPCODE_NOISE2: + radeon_error("noise2: not yet supported shader instruction\n"); + break; + case OPCODE_NOISE3: + radeon_error("noise3: not yet supported shader instruction\n"); + break; + case OPCODE_NOISE4: + radeon_error("noise4: not yet supported shader instruction\n"); break; case OPCODE_POW: @@ -4408,22 +5660,78 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; case OPCODE_SIN: - if ( GL_FALSE == assemble_SIN(pR700AsmCode) ) + if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) ) return GL_FALSE; break; case OPCODE_SCS: if ( GL_FALSE == assemble_SCS(pR700AsmCode) ) return GL_FALSE; - break; + break; + + case OPCODE_SEQ: + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) ) + { + return GL_FALSE; + } + break; + + case OPCODE_SGT: + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) + { + return GL_FALSE; + } + break; case OPCODE_SGE: if ( GL_FALSE == assemble_SGE(pR700AsmCode) ) + { return GL_FALSE; - break; + } + break; + + /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/ case OPCODE_SLT: - if ( GL_FALSE == assemble_SLT(pR700AsmCode) ) + { + struct prog_src_register SrcRegSave[2]; + SrcRegSave[0] = pILInst[i].SrcReg[0]; + SrcRegSave[1] = pILInst[i].SrcReg[1]; + pILInst[i].SrcReg[0] = SrcRegSave[1]; + pILInst[i].SrcReg[1] = SrcRegSave[0]; + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) + { + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; + } + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + } + break; + + case OPCODE_SLE: + { + struct prog_src_register SrcRegSave[2]; + SrcRegSave[0] = pILInst[i].SrcReg[0]; + SrcRegSave[1] = pILInst[i].SrcReg[1]; + pILInst[i].SrcReg[0] = SrcRegSave[1]; + pILInst[i].SrcReg[1] = SrcRegSave[0]; + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) ) + { + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; + } + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + } + break; + + case OPCODE_SNE: + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) ) + { return GL_FALSE; - break; + } + break; //case OP_STP: // if ( GL_FALSE == assemble_STP(pR700AsmCode) ) @@ -4449,7 +5757,8 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, } } break; - + case OPCODE_DDX: + case OPCODE_DDY: case OPCODE_TEX: case OPCODE_TXB: case OPCODE_TXP: @@ -4457,30 +5766,104 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; + case OPCODE_TRUNC: + if ( GL_FALSE == assemble_math_function(pR700AsmCode, SQ_OP2_INST_TRUNC) ) + return GL_FALSE; + break; + case OPCODE_XPD: if ( GL_FALSE == assemble_XPD(pR700AsmCode) ) return GL_FALSE; break; - case OPCODE_IF : - if ( GL_FALSE == assemble_IF(pR700AsmCode) ) - return GL_FALSE; + case OPCODE_IF: + { + GLboolean bHasElse = GL_FALSE; + + if(pILInst[pILInst[i].BranchTarget].Opcode == OPCODE_ELSE) + { + bHasElse = GL_TRUE; + } + + if ( GL_FALSE == assemble_IF(pR700AsmCode, bHasElse) ) + { + return GL_FALSE; + } + } break; + case OPCODE_ELSE : - radeon_error("Not yet implemented instruction OPCODE_ELSE \n"); - //if ( GL_FALSE == assemble_BAD("ELSE") ) + if ( GL_FALSE == assemble_ELSE(pR700AsmCode) ) return GL_FALSE; break; + case OPCODE_ENDIF: if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) ) return GL_FALSE; break; + case OPCODE_BGNLOOP: + if( GL_FALSE == assemble_BGNLOOP(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_BRK: + if( GL_FALSE == assemble_BRK(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_CONT: + if( GL_FALSE == assemble_CONT(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_ENDLOOP: + if( GL_FALSE == assemble_ENDLOOP(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_BGNSUB: + if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i, uiIL_Shift) ) + { + return GL_FALSE; + } + break; + + case OPCODE_RET: + if( GL_FALSE == assemble_RET(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_CAL: + if( GL_FALSE == assemble_CAL(pR700AsmCode, + pILInst[i].BranchTarget, + uiIL_Shift, + uiNumberInsts, + pILInst, + NULL) ) + { + return GL_FALSE; + } + break; + //case OPCODE_EXPORT: // if ( GL_FALSE == assemble_EXPORT() ) // return GL_FALSE; // break; + case OPCODE_ENDSUB: + return assemble_ENDSUB(pR700AsmCode); + case OPCODE_END: //pR700AsmCode->uiCurInst = i; //This is to remaind that if in later exoort there is depth/stencil @@ -4497,6 +5880,417 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_TRUE; } +GLboolean InitShaderProgram(r700_AssemblerBase * pAsm) +{ + setRetInLoopFlag(pAsm, SQ_SEL_0); + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + return GL_TRUE; +} + +GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg) +{ + GLuint i; + GLuint unCFoffset; + TypedShaderList * plstCFmain; + TypedShaderList * plstCFsub; + + R700ShaderInstruction * pInst; + R700ControlFlowGenericClause * pCFInst; + + R700ControlFlowALUClause * pCF_ALU; + R700ALUInstruction * pALU; + GLuint unConstOffset = 0; + GLuint unRegOffset; + GLuint unMinRegIndex; + + plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local; + + /* remove flags init if they are not used */ + if((pAsm->unCFflags & HAS_LOOPRET) == 0) + { + R700ControlFlowALUClause * pCF_ALU; + pInst = plstCFmain->pHead; + while(pInst) + { + if(SIT_CF_ALU == pInst->m_ShaderInstType) + { + pCF_ALU = (R700ControlFlowALUClause *)pInst; + if(0 == pCF_ALU->m_Word1.f.count) + { + pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP; + } + else + { + R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction; + + pALU->m_pLinkedALUClause = NULL; + pALU = (R700ALUInstruction *)(pALU->pNextInst); + pALU->m_pLinkedALUClause = pCF_ALU; + pCF_ALU->m_pLinkedALUInstruction = pALU; + + pCF_ALU->m_Word1.f.count--; + } + break; + } + pInst = pInst->pNextInst; + }; + } + + if(pAsm->CALLSTACK[0].max > 0) + { + pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2; + } + + if(0 == pAsm->unSubArrayPointer) + { + return GL_TRUE; + } + + unCFoffset = plstCFmain->uNumOfNode; + + if(NULL != pILProg->Parameters) + { + unConstOffset = pILProg->Parameters->NumParameters; + } + + /* Reloc subs */ + for(i=0; i<pAsm->unSubArrayPointer; i++) + { + pAsm->subs[i].unCFoffset = unCFoffset; + plstCFsub = &(pAsm->subs[i].lstCFInstructions_local); + + pInst = plstCFsub->pHead; + + /* reloc instructions */ + while(pInst) + { + if(SIT_CF_GENERIC == pInst->m_ShaderInstType) + { + pCFInst = (R700ControlFlowGenericClause *)pInst; + + switch (pCFInst->m_Word1.f.cf_inst) + { + case SQ_CF_INST_POP: + case SQ_CF_INST_JUMP: + case SQ_CF_INST_ELSE: + case SQ_CF_INST_LOOP_END: + case SQ_CF_INST_LOOP_START: + case SQ_CF_INST_LOOP_START_NO_AL: + case SQ_CF_INST_LOOP_CONTINUE: + case SQ_CF_INST_LOOP_BREAK: + pCFInst->m_Word0.f.addr += unCFoffset; + break; + default: + break; + } + } + + pInst->m_uIndex += unCFoffset; + + pInst = pInst->pNextInst; + }; + + if(NULL != pAsm->subs[i].pPresubDesc) + { + GLuint uNumSrc; + + unMinRegIndex = pAsm->subs[i].pPresubDesc->pCompiledSub->MinRegIndex; + unRegOffset = pAsm->subs[i].pPresubDesc->maxStartReg; + unConstOffset += pAsm->subs[i].pPresubDesc->unConstantsStart; + + pInst = plstCFsub->pHead; + while(pInst) + { + if(SIT_CF_ALU == pInst->m_ShaderInstType) + { + pCF_ALU = (R700ControlFlowALUClause *)pInst; + + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex; + + if(pALU->m_Word0.f.src0_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src0_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src0_sel += unConstOffset; + } + + if( ((pALU->m_Word1.val >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) & 0x0000001F) + >= SQ_OP3_INST_MUL_LIT ) + { /* op3 : 3 srcs */ + if(pALU->m_Word1_OP3.f.src2_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word1_OP3.f.src2_sel = pALU->m_Word1_OP3.f.src2_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word1_OP3.f.src2_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word1_OP3.f.src2_sel += unConstOffset; + } + if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src1_sel += unConstOffset; + } + } + else + { + if(pAsm->bR6xx) + { + uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0); + } + else + { + uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0); + } + if(2 == uNumSrc) + { /* 2 srcs */ + if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src1_sel += unConstOffset; + } + } + } + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + pInst = pInst->pNextInst; + }; + } + + /* Put sub into main */ + plstCFmain->pTail->pNextInst = plstCFsub->pHead; + plstCFmain->pTail = plstCFsub->pTail; + plstCFmain->uNumOfNode += plstCFsub->uNumOfNode; + + unCFoffset += plstCFsub->uNumOfNode; + } + + /* reloc callers */ + for(i=0; i<pAsm->unCallerArrayPointer; i++) + { + pAsm->callers[i].cf_ptr->m_Word0.f.addr + = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset; + + if(NULL != pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc) + { + unMinRegIndex = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->pCompiledSub->MinRegIndex; + unRegOffset = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->maxStartReg; + + if(NULL != pAsm->callers[i].prelude_cf_ptr) + { + pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].prelude_cf_ptr); + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex; + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + if(NULL != pAsm->callers[i].finale_cf_ptr) + { + pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].finale_cf_ptr); + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex; + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + } + } + + return GL_TRUE; +} + +GLboolean callPreSub(r700_AssemblerBase* pAsm, + LOADABLE_SCRIPT_SIGNITURE scriptSigniture, + COMPILED_SUB * pCompiledSub, + GLshort uOutReg, + GLshort uNumValidSrc) +{ + /* save assemble context */ + GLuint starting_temp_register_number_save; + GLuint number_used_registers_save; + GLuint uFirstHelpReg_save; + GLuint uHelpReg_save; + GLuint uiCurInst_save; + struct prog_instruction *pILInst_save; + PRESUB_DESC * pPresubDesc; + GLboolean bRet; + int i; + + R700ControlFlowGenericClause* prelude_cf_ptr = NULL; + + /* copy srcs to presub inputs */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + for(i=0; i<uNumValidSrc; i++) + { + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pCompiledSub->srcRegIndex[i]; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 1; + pAsm->D.dst.writew = 1; + + if( GL_FALSE == assemble_src(pAsm, i, 0) ) + { + return GL_FALSE; + } + + next_ins(pAsm); + } + if(uNumValidSrc > 0) + { + prelude_cf_ptr = pAsm->cf_current_alu_clause_ptr; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + } + + /* browse thro existing presubs. */ + for(i=0; i<pAsm->unNumPresub; i++) + { + if(pAsm->presubs[i].sptSigniture == scriptSigniture) + { + break; + } + } + + if(i == pAsm->unNumPresub) + { /* not loaded yet */ + /* save assemble context */ + number_used_registers_save = pAsm->number_used_registers; + uFirstHelpReg_save = pAsm->uFirstHelpReg; + uHelpReg_save = pAsm->uHelpReg; + starting_temp_register_number_save = pAsm->starting_temp_register_number; + pILInst_save = pAsm->pILInst; + uiCurInst_save = pAsm->uiCurInst; + + /* alloc in presub */ + if( (pAsm->unNumPresub + 1) > pAsm->unPresubArraySize ) + { + pAsm->presubs = (PRESUB_DESC*)_mesa_realloc( (void *)pAsm->presubs, + sizeof(PRESUB_DESC) * pAsm->unPresubArraySize, + sizeof(PRESUB_DESC) * (pAsm->unPresubArraySize + 4) ); + if(NULL == pAsm->presubs) + { + radeon_error("No memeory to allocate built in shader function description structures. \n"); + return GL_FALSE; + } + pAsm->unPresubArraySize += 4; + } + + pPresubDesc = &(pAsm->presubs[i]); + pPresubDesc->sptSigniture = scriptSigniture; + + /* constants offsets need to be final resolved at reloc. */ + if(0 == pAsm->unNumPresub) + { + pPresubDesc->unConstantsStart = 0; + } + else + { + pPresubDesc->unConstantsStart = pAsm->presubs[i-1].unConstantsStart + + pAsm->presubs[i-1].pCompiledSub->NumParameters; + } + + pPresubDesc->pCompiledSub = pCompiledSub; + + pPresubDesc->subIL_Shift = pAsm->unCurNumILInsts; + pPresubDesc->maxStartReg = uFirstHelpReg_save; + pAsm->unCurNumILInsts += pCompiledSub->NumInstructions; + + pAsm->unNumPresub++; + + /* setup new assemble context */ + pAsm->starting_temp_register_number = 0; + pAsm->number_used_registers = pCompiledSub->NumTemporaries; + pAsm->uFirstHelpReg = pAsm->number_used_registers; + pAsm->uHelpReg = pAsm->uFirstHelpReg; + + bRet = assemble_CAL(pAsm, + 0, + pPresubDesc->subIL_Shift, + pCompiledSub->NumInstructions, + pCompiledSub->Instructions, + pPresubDesc); + + + pPresubDesc->number_used_registers = pAsm->number_used_registers; + + /* restore assemble context */ + pAsm->number_used_registers = number_used_registers_save; + pAsm->uFirstHelpReg = uFirstHelpReg_save; + pAsm->uHelpReg = uHelpReg_save; + pAsm->starting_temp_register_number = starting_temp_register_number_save; + pAsm->pILInst = pILInst_save; + pAsm->uiCurInst = uiCurInst_save; + } + else + { /* was loaded */ + pPresubDesc = &(pAsm->presubs[i]); + + bRet = assemble_CAL(pAsm, + 0, + pPresubDesc->subIL_Shift, + pCompiledSub->NumInstructions, + pCompiledSub->Instructions, + pPresubDesc); + } + + if(GL_FALSE == bRet) + { + radeon_error("Shader presub assemble failed. \n"); + } + else + { + /* copy presub output to real dst */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pCompiledSub->dstRegIndex; + pAsm->S[0].src.swizzlex = pCompiledSub->outputSwizzleX; + pAsm->S[0].src.swizzley = pCompiledSub->outputSwizzleY; + pAsm->S[0].src.swizzlez = pCompiledSub->outputSwizzleZ; + pAsm->S[0].src.swizzlew = pCompiledSub->outputSwizzleW; + + next_ins(pAsm); + + pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = pAsm->cf_current_alu_clause_ptr; + pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + } + + if( (pPresubDesc->number_used_registers + pAsm->uFirstHelpReg) > pAsm->number_used_registers ) + { + pAsm->number_used_registers = pPresubDesc->number_used_registers + pAsm->uFirstHelpReg; + } + if(pAsm->uFirstHelpReg > pPresubDesc->maxStartReg) + { + pPresubDesc->maxStartReg = pAsm->uFirstHelpReg; + } + + return bRet; +} + GLboolean Process_Export(r700_AssemblerBase* pAsm, GLuint type, GLuint export_starting_index, @@ -4791,6 +6585,25 @@ GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, export_starting_index++; } } + + for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++) + { + unBit = 1 << i; + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PARAM, + export_starting_index, + 1, + pR700AsmCode->ucVP_OutputMap[i], + GL_FALSE) ) + { + return GL_FALSE; + } + + export_starting_index++; + } + } // At least one param should be exported if (export_count) @@ -4825,6 +6638,21 @@ GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode) { FREE(pR700AsmCode->pucOutMask); FREE(pR700AsmCode->pInstDeps); + + if(NULL != pR700AsmCode->subs) + { + FREE(pR700AsmCode->subs); + } + if(NULL != pR700AsmCode->callers) + { + FREE(pR700AsmCode->callers); + } + + if(NULL != pR700AsmCode->presubs) + { + FREE(pR700AsmCode->presubs); + } + return GL_TRUE; } diff --git a/r600/r700_assembler.h b/r600/r700_assembler.h index c66db50..0064d08 100644 --- a/r600/r700_assembler.h +++ b/r600/r700_assembler.h @@ -34,6 +34,45 @@ #include "r700_shaderinst.h" #include "r700_shader.h" +typedef enum LOADABLE_SCRIPT_SIGNITURE +{ + GLSL_NOISE1 = 0x10000001, + GLSL_NOISE2 = 0x10000002, + GLSL_NOISE3 = 0x10000003, + GLSL_NOISE4 = 0x10000004 +}LOADABLE_SCRIPT_SIGNITURE; + +typedef struct COMPILED_SUB +{ + struct prog_instruction *Instructions; + GLuint NumInstructions; + GLuint NumTemporaries; + GLuint NumParameters; + GLuint MinRegIndex; + GLfloat (*ParameterValues)[4]; + GLbyte outputSwizzleX; + GLbyte outputSwizzleY; + GLbyte outputSwizzleZ; + GLbyte outputSwizzleW; + GLshort srcRegIndex[3]; + GLushort dstRegIndex; +}COMPILED_SUB; + +typedef struct PRESUB_DESCtag +{ + LOADABLE_SCRIPT_SIGNITURE sptSigniture; + GLint subIL_Shift; + struct prog_src_register InReg[3]; + struct prog_dst_register OutReg; + + GLushort maxStartReg; + GLushort number_used_registers; + + GLuint unConstantsStart; + + COMPILED_SUB * pCompiledSub; +} PRESUB_DESC; + typedef enum SHADER_PIPE_TYPE { SPT_VP = 0, @@ -72,7 +111,8 @@ typedef enum SrcRegisterType SRC_REG_INPUT = 1, SRC_REG_CONSTANT = 2, SRC_REG_ALT_TEMPORARY = 3, - NUMBER_OF_SRC_REG_TYPE = 4 + SRC_REC_LITERAL = 4, + NUMBER_OF_SRC_REG_TYPE = 5 } SrcRegisterType; typedef enum DstRegisterType @@ -111,16 +151,24 @@ typedef struct PVSDSTtag BITS addrmode1:1; //32 } PVSDST; +typedef struct PVSINSTtag +{ + BITS literal_slots :2; + BITS SaturateMode :2; + BITS index_mode :3; +} PVSINST; + typedef struct PVSSRCtag { - BITS rtype:4; + BITS rtype:3; BITS addrmode0:1; - BITS reg:10; //15 (8) + BITS reg:10; //14 (8) BITS swizzlex:3; BITS swizzley:3; BITS swizzlez:3; - BITS swizzlew:3; //27 + BITS swizzlew:3; //26 + BITS abs:1; BITS negx:1; BITS negy:1; BITS negz:1; @@ -148,6 +196,7 @@ typedef union PVSDWORDtag { BITS bits; PVSDST dst; + PVSINST dst2; PVSSRC src; PVSMATH math; float f; @@ -251,6 +300,8 @@ enum FC_IF = 1, FC_LOOP = 2, FC_REP = 3, + FC_PUSH_VPM = 4, + FC_PUSH_WQM = 5, COND_NONE = 0, COND_BOOL = 1, @@ -263,22 +314,56 @@ enum typedef struct FC_LEVEL { - unsigned int first; ///< first fc instruction on level (if, rep, loop) - unsigned int* mid; ///< middle instructions - else or all breaks on this level - unsigned int midLen; - unsigned int type; - unsigned int cond; - unsigned int inv; - unsigned int bpush; ///< 1 if first instruction does branch stack push - int id; ///< id of bool or int variable + R700ControlFlowGenericClause * first; + R700ControlFlowGenericClause ** mid; + unsigned int unNumMid; + unsigned int midLen; + unsigned int type; + unsigned int cond; + unsigned int inv; + int id; ///< id of bool or int variable } FC_LEVEL; typedef struct VTX_FETCH_METHOD { - GLboolean bEnableMini; - GLuint mega_fetch_remainder; + GLboolean bEnableMini; + GLuint mega_fetch_remainder; } VTX_FETCH_METHOD; +typedef struct SUB_OFFSET +{ + GLint subIL_Offset; + GLuint unCFoffset; + GLuint unStackDepthMax; + PRESUB_DESC * pPresubDesc; + TypedShaderList lstCFInstructions_local; +} SUB_OFFSET; + +typedef struct CALLER_POINTER +{ + GLint subIL_Offset; + GLint subDescIndex; + R700ControlFlowGenericClause* cf_ptr; + + R700ControlFlowGenericClause* prelude_cf_ptr; + R700ControlFlowGenericClause* finale_cf_ptr; +} CALLER_POINTER; + +#define SQ_MAX_CALL_DEPTH 0x00000020 + +typedef struct CALL_LEVEL +{ + unsigned int FCSP_BeforeEntry; + GLint subDescIndex; + GLushort current; + GLushort max; + TypedShaderList * plstCFInstructions_local; +} CALL_LEVEL; + +#define HAS_CURRENT_LOOPRET 0x1L +#define HAS_LOOPRET 0x2L +#define LOOPRET_FLAGS HAS_LOOPRET | HAS_CURRENT_LOOPRET + typedef struct r700_AssemblerBase { R700ControlFlowSXClause* cf_last_export_ptr; @@ -294,14 +379,19 @@ typedef struct r700_AssemblerBase // No clause has been created yet CF_CLAUSE_TYPE cf_current_clause_type; + BITS alu_x_opcode; + GLuint number_of_exports; GLuint number_of_colorandz_exports; GLuint number_of_export_opcodes; PVSDWORD D; + PVSDWORD D2; PVSDWORD S[3]; + PVSDWORD C[4]; unsigned int uLastPosUpdate; + unsigned int last_cond_register; OUT_FRAGMENT_FMT_0 fp_stOutFmt0; @@ -310,6 +400,8 @@ typedef struct r700_AssemblerBase unsigned int number_used_registers; unsigned int uUsedConsts; + unsigned int flag_reg_index; + // Fragment programs unsigned int uiFP_AttributeMap[FRAG_ATTRIB_MAX]; unsigned int uiFP_OutputMap[FRAG_RESULT_MAX]; @@ -330,9 +422,6 @@ typedef struct r700_AssemblerBase unsigned int FCSP; FC_LEVEL fc_stack[32]; - unsigned int branch_depth; - unsigned int max_branch_depth; - //----------------------------------------------------------------------------------- // ArgSubst used in Assemble_Source() function //----------------------------------------------------------------------------------- @@ -373,11 +462,33 @@ typedef struct r700_AssemblerBase SHADER_PIPE_TYPE currentShaderType; struct prog_instruction * pILInst; GLuint uiCurInst; + GLubyte SamplerUnits[MAX_SAMPLERS]; GLboolean bR6xx; /* helper to decide which type of instruction to assemble */ GLboolean is_tex; /* we inserted helper intructions and need barrier on next TEX ins */ GLboolean need_tex_barrier; + + SUB_OFFSET * subs; + GLuint unSubArraySize; + GLuint unSubArrayPointer; + CALLER_POINTER * callers; + GLuint unCallerArraySize; + GLuint unCallerArrayPointer; + unsigned int CALLSP; + CALL_LEVEL CALLSTACK[SQ_MAX_CALL_DEPTH]; + + GLuint unCFflags; + + PRESUB_DESC * presubs; + GLuint unPresubArraySize; + GLuint unNumPresub; + GLuint unCurNumILInsts; + + GLuint unVetTexBits; + + GLuint shadow_regs[R700_MAX_TEXTURE_UNITS]; + } r700_AssemblerBase; //Internal use @@ -399,7 +510,7 @@ BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) ; GLboolean is_reduction_opcode(PVSDWORD * dest); GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size); -unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm); +unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3); GLboolean IsTex(gl_inst_opcode Opcode); GLboolean IsAlu(gl_inst_opcode Opcode); @@ -422,6 +533,7 @@ GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, GLubyte element, GLuint _signed, GLboolean normalize, + GLenum format, VTX_FETCH_METHOD * pFetchMethod); GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm); GLuint gethelpr(r700_AssemblerBase* pAsm); @@ -446,6 +558,10 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, R700ALUInstruction* alu_instruction_ptr, GLuint contiguous_slots_needed); + +GLboolean add_cf_instruction(r700_AssemblerBase* pAsm); +void add_return_inst(r700_AssemblerBase *pAsm); + void get_src_properties(R700ALUInstruction* alu_instruction_ptr, int source_index, BITS* psrc_sel, @@ -467,13 +583,20 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, R700ALUInstruction* alu_instruction_ptr); GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm); GLboolean next_ins(r700_AssemblerBase *pAsm); + +GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops); +GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset); +GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue); +GLboolean testFlag(r700_AssemblerBase *pAsm); +GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP); +GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF); + GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode); GLboolean assemble_ABS(r700_AssemblerBase *pAsm); GLboolean assemble_ADD(r700_AssemblerBase *pAsm); GLboolean assemble_ARL(r700_AssemblerBase *pAsm); GLboolean assemble_BAD(char *opcode_str); GLboolean assemble_CMP(r700_AssemblerBase *pAsm); -GLboolean assemble_COS(r700_AssemblerBase *pAsm); GLboolean assemble_DOT(r700_AssemblerBase *pAsm); GLboolean assemble_DST(r700_AssemblerBase *pAsm); GLboolean assemble_EX2(r700_AssemblerBase *pAsm); @@ -481,7 +604,7 @@ GLboolean assemble_EXP(r700_AssemblerBase *pAsm); GLboolean assemble_FLR(r700_AssemblerBase *pAsm); GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm); GLboolean assemble_FRC(r700_AssemblerBase *pAsm); -GLboolean assemble_KIL(r700_AssemblerBase *pAsm); +GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode); GLboolean assemble_LG2(r700_AssemblerBase *pAsm); GLboolean assemble_LRP(r700_AssemblerBase *pAsm); GLboolean assemble_LOG(r700_AssemblerBase *pAsm); @@ -494,17 +617,38 @@ GLboolean assemble_MUL(r700_AssemblerBase *pAsm); GLboolean assemble_POW(r700_AssemblerBase *pAsm); GLboolean assemble_RCP(r700_AssemblerBase *pAsm); GLboolean assemble_RSQ(r700_AssemblerBase *pAsm); -GLboolean assemble_SIN(r700_AssemblerBase *pAsm); GLboolean assemble_SCS(r700_AssemblerBase *pAsm); GLboolean assemble_SGE(r700_AssemblerBase *pAsm); +GLboolean assemble_CONT(r700_AssemblerBase *pAsm); + +GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode); +GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode); +GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode); + GLboolean assemble_SLT(r700_AssemblerBase *pAsm); GLboolean assemble_STP(r700_AssemblerBase *pAsm); GLboolean assemble_TEX(r700_AssemblerBase *pAsm); GLboolean assemble_XPD(r700_AssemblerBase *pAsm); GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm); -GLboolean assemble_IF(r700_AssemblerBase *pAsm); +GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse); +GLboolean assemble_ELSE(r700_AssemblerBase *pAsm); GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm); +GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm); +GLboolean assemble_BRK(r700_AssemblerBase *pAsm); +GLboolean assemble_COND(r700_AssemblerBase *pAsm); +GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm); + +GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift); +GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm); +GLboolean assemble_RET(r700_AssemblerBase *pAsm); +GLboolean assemble_CAL(r700_AssemblerBase *pAsm, + GLint nILindex, + GLuint uiIL_Offest, + GLuint uiNumberInsts, + struct prog_instruction *pILInst, + PRESUB_DESC * pPresubDesc); + GLboolean Process_Export(r700_AssemblerBase* pAsm, GLuint type, GLuint export_starting_index, @@ -514,14 +658,25 @@ GLboolean Process_Export(r700_AssemblerBase* pAsm, GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select); +GLboolean callPreSub(r700_AssemblerBase* pAsm, + LOADABLE_SCRIPT_SIGNITURE scriptSigniture, + /* struct prog_instruction ** pILInstParent, */ + COMPILED_SUB * pCompiledSub, + GLshort uOutReg, + GLshort uNumValidSrc); //Interface -GLboolean AssembleInstr(GLuint uiNumberInsts, +GLboolean AssembleInstr(GLuint uiFirstInst, + GLuint uiIL_Shift, + GLuint uiNumberInsts, struct prog_instruction *pILInst, r700_AssemblerBase *pR700AsmCode); GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten); GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten); +GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg); +GLboolean InitShaderProgram(r700_AssemblerBase * pAsm); + int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader); GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode); diff --git a/r600/r700_chip.c b/r600/r700_chip.c index 02c56b9..63614b1 100644 --- a/r600/r700_chip.c +++ b/r600/r700_chip.c @@ -32,12 +32,10 @@ #include "r600_context.h" #include "r600_cmdbuf.h" -#include "r700_state.h" #include "r600_tex.h" #include "r700_oglprog.h" #include "r700_fragprog.h" #include "r700_vertprog.h" -#include "r700_ioctl.h" #include "radeon_mipmap_tree.h" @@ -45,6 +43,9 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) { context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + + struct r700_vertex_program *vp = context->selected_vp; + struct radeon_bo *bo = NULL; unsigned int i; BATCH_LOCALS(&context->radeon); @@ -52,16 +53,13 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { radeonTexObj *t = r700->textures[i]; - uint32_t offset; if (t) { if (!t->image_override) { bo = t->mt->bo; - offset = get_base_teximage_offset(t); } else { bo = t->bo; - offset = 0; } if (bo) { @@ -71,7 +69,16 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) BEGIN_BATCH_NO_AUTOSTATE(9 + 4); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); - R600_OUT_BATCH(i * 7); + + if( (1<<i) & vp->r700AsmCode.unVetTexBits ) + { /* vs texture */ + R600_OUT_BATCH((i + VERT_ATTRIB_MAX + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); + } + else + { + R600_OUT_BATCH(i * 7); + } + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0); R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1); R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE2); @@ -81,7 +88,7 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6); R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2, bo, - offset, + r700->textures[i]->SQ_TEX_RESOURCE2, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3, bo, @@ -95,21 +102,35 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) } } +#define SAMPLER_STRIDE 3 + static void r700SendTexSamplerState(GLcontext *ctx, struct radeon_state_atom *atom) { context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); unsigned int i; + + struct r700_vertex_program *vp = context->selected_vp; + BATCH_LOCALS(&context->radeon); radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { radeonTexObj *t = r700->textures[i]; if (t) { BEGIN_BATCH_NO_AUTOSTATE(5); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3)); - R600_OUT_BATCH(i * 3); + + if( (1<<i) & vp->r700AsmCode.unVetTexBits ) + { /* vs texture */ + R600_OUT_BATCH((i+SQ_TEX_SAMPLER_VS_OFFSET) * SAMPLER_STRIDE); //work 1 + } + else + { + R600_OUT_BATCH(i * 3); + } + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0); R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1); R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2); @@ -179,7 +200,8 @@ static void r700SetupVTXConstants(GLcontext * ctx, } else { - nVBsize = paos->count * pStreamDesc->stride; + nVBsize = (paos->count - 1) * pStreamDesc->stride + + pStreamDesc->size * getTypeSize(pStreamDesc->type); } uSQ_VTX_CONSTANT_WORD0_0 = paos->offset; @@ -197,11 +219,11 @@ static void r700SetupVTXConstants(GLcontext * ctx, SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_NORM, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); } - //else - //{ - // SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_INT, - // SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); - //} + else + { + SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED, + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); + } if(1 == pStreamDesc->_signed) { @@ -280,14 +302,13 @@ static void r700SetRenderTarget(context_t *context, int id) R600_STATECHANGE(context, cb_target); /* color buffer */ - r700->render_target[id].CB_COLOR0_BASE.u32All = context->radeon.state.color.draw_offset; + r700->render_target[id].CB_COLOR0_BASE.u32All = context->radeon.state.color.draw_offset / 256; nPitchInPixel = rrb->pitch/rrb->cpp; SETfield(r700->render_target[id].CB_COLOR0_SIZE.u32All, (nPitchInPixel/8)-1, PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask); SETfield(r700->render_target[id].CB_COLOR0_SIZE.u32All, ( (nPitchInPixel * context->radeon.radeonScreen->driScreen->fbHeight)/64 )-1, SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); - r700->render_target[id].CB_COLOR0_BASE.u32All = 0; SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ENDIAN_NONE, ENDIAN_shift, ENDIAN_mask); SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ARRAY_LINEAR_GENERAL, CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); @@ -430,13 +451,31 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom * R600_OUT_BATCH((2 << id)); END_BATCH(); } + /* Set CMASK & TILE buffer to the offset of color buffer as + * we don't use those this shouldn't cause any issue and we + * then have a valid cmd stream + */ + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(CB_COLOR0_TILE + (4 * id), 1); + R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_TILE.u32All); + R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All, + rrb->bo, + r700->render_target[id].CB_COLOR0_BASE.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(CB_COLOR0_FRAG + (4 * id), 1); + R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_FRAG.u32All); + R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All, + rrb->bo, + r700->render_target[id].CB_COLOR0_BASE.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(18); + BEGIN_BATCH_NO_AUTOSTATE(12); R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), r700->render_target[id].CB_COLOR0_SIZE.u32All); R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), r700->render_target[id].CB_COLOR0_VIEW.u32All); R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), r700->render_target[id].CB_COLOR0_INFO.u32All); - R600_OUT_BATCH_REGVAL(CB_COLOR0_TILE + (4 * id), r700->render_target[id].CB_COLOR0_TILE.u32All); - R600_OUT_BATCH_REGVAL(CB_COLOR0_FRAG + (4 * id), r700->render_target[id].CB_COLOR0_FRAG.u32All); R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), r700->render_target[id].CB_COLOR0_MASK.u32All); END_BATCH(); @@ -446,68 +485,77 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom * static void r700SendPSState(GLcontext *ctx, struct radeon_state_atom *atom) { - context_t *context = R700_CONTEXT(ctx); - R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); - struct radeon_bo * pbo; - BATCH_LOCALS(&context->radeon); - radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + struct radeon_bo * pbo; + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); - pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context)); + pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context)); - if (!pbo) - return; + if (!pbo) + return; - r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); + r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); - BEGIN_BATCH_NO_AUTOSTATE(3 + 2); - R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1); - R600_OUT_BATCH(r700->ps.SQ_PGM_START_PS.u32All); - R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All, - pbo, - r700->ps.SQ_PGM_START_PS.u32All, - RADEON_GEM_DOMAIN_GTT, 0, 0); - END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1); + R600_OUT_BATCH(r700->ps.SQ_PGM_START_PS.u32All); + R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All, + pbo, + r700->ps.SQ_PGM_START_PS.u32All, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(9); - R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, r700->ps.SQ_PGM_RESOURCES_PS.u32All); - R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, r700->ps.SQ_PGM_EXPORTS_PS.u32All); - R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, r700->ps.SQ_PGM_CF_OFFSET_PS.u32All); - END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(9); + R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, r700->ps.SQ_PGM_RESOURCES_PS.u32All); + R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, r700->ps.SQ_PGM_EXPORTS_PS.u32All); + R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, r700->ps.SQ_PGM_CF_OFFSET_PS.u32All); + END_BATCH(); - COMMIT_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3); + R600_OUT_BATCH_REGVAL(SQ_LOOP_CONST_0, 0x01000FFF); + END_BATCH(); + + COMMIT_BATCH(); } static void r700SendVSState(GLcontext *ctx, struct radeon_state_atom *atom) { - context_t *context = R700_CONTEXT(ctx); - R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); - struct radeon_bo * pbo; - BATCH_LOCALS(&context->radeon); - radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + struct radeon_bo * pbo; + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); - pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context)); + pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context)); - if (!pbo) - return; + if (!pbo) + return; - r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); + r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); - BEGIN_BATCH_NO_AUTOSTATE(3 + 2); - R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1); - R600_OUT_BATCH(r700->vs.SQ_PGM_START_VS.u32All); - R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All, - pbo, - r700->vs.SQ_PGM_START_VS.u32All, - RADEON_GEM_DOMAIN_GTT, 0, 0); - END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1); + R600_OUT_BATCH(r700->vs.SQ_PGM_START_VS.u32All); + R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All, + pbo, + r700->vs.SQ_PGM_START_VS.u32All, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(6); - R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, r700->vs.SQ_PGM_RESOURCES_VS.u32All); - R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, r700->vs.SQ_PGM_CF_OFFSET_VS.u32All); - END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(6); + R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, r700->vs.SQ_PGM_RESOURCES_VS.u32All); + R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, r700->vs.SQ_PGM_CF_OFFSET_VS.u32All); + END_BATCH(); - COMMIT_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3); + R600_OUT_BATCH_REGVAL((SQ_LOOP_CONST_0 + 32*4), 0x0100000F); + //R600_OUT_BATCH_REGVAL((SQ_LOOP_CONST_0 + (SQ_LOOP_CONST_vs<2)), 0x0100000F); + END_BATCH(); + + COMMIT_BATCH(); } static void r700SendFSState(GLcontext *ctx, struct radeon_state_atom *atom) @@ -1301,7 +1349,7 @@ void r600InitAtoms(context_t *context) ALLOC_STATE(poly, always, 10, r700SendPolyState); ALLOC_STATE(cb, cb, 18, r700SendCBState); ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState); - ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState); + ALLOC_STATE(cb_target, always, 29, r700SendRenderTargetState); ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState); ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState); ALLOC_STATE(sx, always, 9, r700SendSXState); @@ -1309,8 +1357,8 @@ void r600InitAtoms(context_t *context) ALLOC_STATE(spi, always, (59 + R700_MAX_SHADER_EXPORTS), r700SendSPIState); ALLOC_STATE(vpt, always, 16, r700SendViewportState); ALLOC_STATE(fs, always, 18, r700SendFSState); - ALLOC_STATE(vs, always, 18, r700SendVSState); - ALLOC_STATE(ps, always, 21, r700SendPSState); + ALLOC_STATE(vs, always, 21, r700SendVSState); + ALLOC_STATE(ps, always, 24, r700SendPSState); ALLOC_STATE(vs_consts, vs_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendVSConsts); ALLOC_STATE(ps_consts, ps_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendPSConsts); ALLOC_STATE(vtx, vtx, (6 + (VERT_ATTRIB_MAX * 18)), r700SendVTXState); diff --git a/r600/r700_clear.c b/r600/r700_clear.c index c6546ab..09c4856 100644 --- a/r600/r700_clear.c +++ b/r600/r700_clear.c @@ -37,7 +37,6 @@ #include "r600_context.h" #include "r700_shaderinst.h" -#include "r600_emit.h" #include "r700_clear.h" static GLboolean r700ClearFast(context_t *context, GLbitfield mask) @@ -49,14 +48,18 @@ static GLboolean r700ClearFast(context_t *context, GLbitfield mask) void r700Clear(GLcontext * ctx, GLbitfield mask) { context_t *context = R700_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&context->radeon); - const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask); + __DRIdrawable *dPriv = radeon_get_drawable(&context->radeon); + const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask[0]); GLbitfield swrast_mask = 0, tri_mask = 0; int i; struct gl_framebuffer *fb = ctx->DrawBuffer; radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s %x\n", __func__, mask); + if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) { + context->radeon.front_buffer_dirty = GL_TRUE; + } + if( GL_TRUE == r700ClearFast(context, mask) ) { return; diff --git a/r600/r700_fragprog.c b/r600/r700_fragprog.c index ccafd43..84d51e6 100644 --- a/r600/r700_fragprog.c +++ b/r600/r700_fragprog.c @@ -34,6 +34,7 @@ #include "main/imports.h" #include "shader/prog_parameter.h" #include "shader/prog_statevars.h" +#include "shader/program.h" #include "r600_context.h" #include "r600_cmdbuf.h" @@ -42,14 +43,68 @@ #include "r700_debug.h" +void insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog) +{ + static const gl_state_index winstate[STATE_LENGTH] + = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0}; + struct prog_instruction *newInst, *inst; + GLint win_size; /* state reference */ + GLuint wpos_temp; /* temp register */ + int i, j; + + /* PARAM win_size = STATE_FB_SIZE */ + win_size = _mesa_add_state_reference(fprog->Base.Parameters, winstate); + + wpos_temp = fprog->Base.NumTemporaries++; + + /* scan program where WPOS is used and replace with wpos_temp */ + inst = fprog->Base.Instructions; + for (i = 0; i < fprog->Base.NumInstructions; i++) { + for (j=0; j < 3; j++) { + if(inst->SrcReg[j].File == PROGRAM_INPUT && + inst->SrcReg[j].Index == FRAG_ATTRIB_WPOS) { + inst->SrcReg[j].File = PROGRAM_TEMPORARY; + inst->SrcReg[j].Index = wpos_temp; + } + } + inst++; + } + + _mesa_insert_instructions(&(fprog->Base), 0, 1); + + newInst = fprog->Base.Instructions; + /* invert wpos.y + * wpos_temp.xyzw = wpos.x-yzw + winsize.0y00 */ + newInst[0].Opcode = OPCODE_ADD; + newInst[0].DstReg.File = PROGRAM_TEMPORARY; + newInst[0].DstReg.Index = wpos_temp; + newInst[0].DstReg.WriteMask = WRITEMASK_XYZW; + + newInst[0].SrcReg[0].File = PROGRAM_INPUT; + newInst[0].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + newInst[0].SrcReg[0].Swizzle = SWIZZLE_XYZW; + newInst[0].SrcReg[0].Negate = NEGATE_Y; + + newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR; + newInst[0].SrcReg[1].Index = win_size; + newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO); + +} + //TODO : Validate FP input with VP output. void Map_Fragment_Program(r700_AssemblerBase *pAsm, - struct gl_fragment_program *mesa_fp) + struct gl_fragment_program *mesa_fp, + GLcontext *ctx) { unsigned int unBit; unsigned int i; GLuint ui; + /* match fp inputs with vp exports. */ + struct r700_vertex_program_cont *vpc = + (struct r700_vertex_program_cont *)ctx->VertexProgram._Current; + GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten; + pAsm->number_used_registers = 0; //Input mapping : mesa_fp->Base.InputsRead set the flag, set in @@ -61,32 +116,99 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS] = pAsm->number_used_registers++; } - unBit = 1 << FRAG_ATTRIB_COL0; - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << VERT_RESULT_COL0; + if(OutputsWritten & unBit) { pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0] = pAsm->number_used_registers++; } - unBit = 1 << FRAG_ATTRIB_COL1; - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << VERT_RESULT_COL1; + if(OutputsWritten & unBit) { pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++; } - unBit = 1 << FRAG_ATTRIB_FOGC; - if(mesa_fp->Base.InputsRead & unBit) - { - pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++; - } + unBit = 1 << VERT_RESULT_FOGC; + if(OutputsWritten & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++; + } for(i=0; i<8; i++) { - unBit = 1 << (FRAG_ATTRIB_TEX0 + i); - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << (VERT_RESULT_TEX0 + i); + if(OutputsWritten & unBit) { pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++; } } + +/* order has been taken care of */ +#if 1 + for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++) + { + unBit = 1 << i; + if(OutputsWritten & unBit) + { + pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0] = pAsm->number_used_registers++; + } + } +#else + if( (mesa_fp->Base.InputsRead >> FRAG_ATTRIB_VAR0) > 0 ) + { + struct r700_vertex_program_cont *vpc = + (struct r700_vertex_program_cont *)ctx->VertexProgram._Current; + struct gl_program_parameter_list * VsVarying = vpc->mesa_program.Base.Varying; + struct gl_program_parameter_list * PsVarying = mesa_fp->Base.Varying; + struct gl_program_parameter * pVsParam; + struct gl_program_parameter * pPsParam; + GLuint j, k; + GLuint unMaxVarying = 0; + + for(i=0; i<VsVarying->NumParameters; i++) + { + pAsm->uiFP_AttributeMap[i + FRAG_ATTRIB_VAR0] = 0; + } + + for(i=FRAG_ATTRIB_VAR0; i<FRAG_ATTRIB_MAX; i++) + { + unBit = 1 << i; + if(mesa_fp->Base.InputsRead & unBit) + { + j = i - FRAG_ATTRIB_VAR0; + pPsParam = PsVarying->Parameters + j; + + for(k=0; k<VsVarying->NumParameters; k++) + { + pVsParam = VsVarying->Parameters + k; + + if( strcmp(pPsParam->Name, pVsParam->Name) == 0) + { + pAsm->uiFP_AttributeMap[i] = pAsm->number_used_registers + k; + if(k > unMaxVarying) + { + unMaxVarying = k; + } + break; + } + } + } + } + + pAsm->number_used_registers += unMaxVarying + 1; + } +#endif + unBit = 1 << FRAG_ATTRIB_FACE; + if(mesa_fp->Base.InputsRead & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE] = pAsm->number_used_registers++; + } + + unBit = 1 << FRAG_ATTRIB_PNTC; + if(mesa_fp->Base.InputsRead & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC] = pAsm->number_used_registers++; + } /* Map temporary registers (GPRs) */ pAsm->starting_temp_register_number = pAsm->number_used_registers; @@ -127,6 +249,8 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm, pAsm->pucOutMask[ui] = 0x0; } + pAsm->flag_reg_index = pAsm->number_used_registers++; + pAsm->uFirstHelpReg = pAsm->number_used_registers; } @@ -233,22 +357,61 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, } GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, - struct gl_fragment_program *mesa_fp) + struct gl_fragment_program *mesa_fp, + GLcontext *ctx) { GLuint number_of_colors_exported; GLboolean z_enabled = GL_FALSE; - GLuint unBit; + GLuint unBit, shadow_unit; + int i; + struct prog_instruction *inst; + gl_state_index shadow_ambient[STATE_LENGTH] + = { STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0}; //Init_Program Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) ); - Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp); + + if(mesa_fp->Base.InputsRead & FRAG_BIT_WPOS) + { + insert_wpos_code(ctx, mesa_fp); + } + + /* add/map consts for ARB_shadow_ambient */ + if(mesa_fp->Base.ShadowSamplers) + { + inst = mesa_fp->Base.Instructions; + for (i = 0; i < mesa_fp->Base.NumInstructions; i++) + { + if(inst->TexShadow == 1) + { + shadow_unit = inst->TexSrcUnit; + shadow_ambient[2] = shadow_unit; + fp->r700AsmCode.shadow_regs[shadow_unit] = + _mesa_add_state_reference(mesa_fp->Base.Parameters, shadow_ambient); + } + inst++; + } + } + + Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx); if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) ) { return GL_FALSE; } + + InitShaderProgram(&(fp->r700AsmCode)); - if( GL_FALSE == AssembleInstr(mesa_fp->Base.NumInstructions, + for(i=0; i < MAX_SAMPLERS; i++) + { + fp->r700AsmCode.SamplerUnits[i] = fp->mesa_program.Base.SamplerUnits[i]; + } + + fp->r700AsmCode.unCurNumILInsts = mesa_fp->Base.NumInstructions; + + if( GL_FALSE == AssembleInstr(0, + 0, + mesa_fp->Base.NumInstructions, &(mesa_fp->Base.Instructions[0]), &(fp->r700AsmCode)) ) { @@ -260,6 +423,11 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, return GL_FALSE; } + if( GL_FALSE == RelocProgram(&(fp->r700AsmCode), &(mesa_fp->Base)) ) + { + return GL_FALSE; + } + fp->r700Shader.nRegs = (fp->r700AsmCode.number_used_registers == 0) ? 0 : (fp->r700AsmCode.number_used_registers - 1); @@ -300,7 +468,7 @@ void r700SelectFragmentShader(GLcontext *ctx) } if (GL_FALSE == fp->translated) - r700TranslateFragmentShader(fp, &(fp->mesa_program)); + r700TranslateFragmentShader(fp, &(fp->mesa_program), ctx); } void * r700GetActiveFpShaderBo(GLcontext * ctx) @@ -325,6 +493,7 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) unsigned int unNumOfReg; unsigned int unBit; GLuint exportCount; + GLboolean point_sprite = GL_FALSE; if(GL_FALSE == fp->loaded) { @@ -378,6 +547,50 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) CLEARbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit); } + if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_FACE)) + { + ui += 1; + SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask); + SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit); + SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ALL_BITS_bit); + SETfield(r700->SPI_PS_IN_CONTROL_1.u32All, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE], FRONT_FACE_ADDR_shift, FRONT_FACE_ADDR_mask); + } + else + { + CLEARbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit); + } + + /* see if we need any point_sprite replacements */ + for (i = VERT_RESULT_TEX0; i<= VERT_RESULT_TEX7; i++) + { + if(ctx->Point.CoordReplace[i - VERT_RESULT_TEX0] == GL_TRUE) + point_sprite = GL_TRUE; + } + + if ((mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC)) || point_sprite) + { + /* for FRAG_ATTRIB_PNTC we need to increase num_interp */ + if(mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC)) + { + ui++; + SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask); + } + SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit); + SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_S, PNT_SPRITE_OVRD_X_shift, PNT_SPRITE_OVRD_X_mask); + SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_T, PNT_SPRITE_OVRD_Y_shift, PNT_SPRITE_OVRD_Y_mask); + SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_0, PNT_SPRITE_OVRD_Z_shift, PNT_SPRITE_OVRD_Z_mask); + SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_1, PNT_SPRITE_OVRD_W_shift, PNT_SPRITE_OVRD_W_mask); + if(ctx->Point.SpriteOrigin == GL_LOWER_LEFT) + SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit); + else + CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit); + } + else + { + CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit); + } + + ui = (unNumOfReg < ui) ? ui : unNumOfReg; SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask); @@ -394,6 +607,13 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) EXPORT_MODE_shift, EXPORT_MODE_mask); // emit ps input map + struct r700_vertex_program_cont *vpc = + (struct r700_vertex_program_cont *)ctx->VertexProgram._Current; + GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten; + + for(ui = 0; ui < R700_MAX_SHADER_EXPORTS; ui++) + r700->SPI_PS_INPUT_CNTL[ui].u32All = 0; + unBit = 1 << FRAG_ATTRIB_WPOS; if(mesa_fp->Base.InputsRead & unBit) { @@ -407,8 +627,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } - unBit = 1 << FRAG_ATTRIB_COL0; - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << VERT_RESULT_COL0; + if(OutputsWritten & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); @@ -420,8 +640,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } - unBit = 1 << FRAG_ATTRIB_COL1; - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << VERT_RESULT_COL1; + if(OutputsWritten & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); @@ -433,8 +653,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } - unBit = 1 << FRAG_ATTRIB_FOGC; - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << VERT_RESULT_FOGC; + if(OutputsWritten & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); @@ -448,17 +668,67 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) for(i=0; i<8; i++) { - unBit = 1 << (FRAG_ATTRIB_TEX0 + i); - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << (VERT_RESULT_TEX0 + i); + if(OutputsWritten & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, SEMANTIC_shift, SEMANTIC_mask); CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + /* ARB_point_sprite */ + if(ctx->Point.CoordReplace[i] == GL_TRUE) + { + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit); + } } } + unBit = 1 << FRAG_ATTRIB_FACE; + if(mesa_fp->Base.InputsRead & unBit) + { + ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE]; + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + } + unBit = 1 << FRAG_ATTRIB_PNTC; + if(mesa_fp->Base.InputsRead & unBit) + { + ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC]; + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit); + } + + + + + for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++) + { + unBit = 1 << i; + if(OutputsWritten & unBit) + { + ui = pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0]; + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + } + } + exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift); if (r700->CB_SHADER_CONTROL.u32All != ((1 << exportCount) - 1)) { @@ -469,7 +739,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) /* sent out shader constants. */ paramList = fp->mesa_program.Base.Parameters; - if(NULL != paramList) { + if(NULL != paramList) + { _mesa_load_state_parameters(ctx, paramList); if (paramList->NumParameters > R700_MAX_DX9_CONSTS) @@ -482,14 +753,33 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) unNumParamData = paramList->NumParameters; for(ui=0; ui<unNumParamData; ui++) { - r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; + r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; + r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; + r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; } } else r700->ps.num_consts = 0; + COMPILED_SUB * pCompiledSub; + GLuint uj; + GLuint unConstOffset = r700->ps.num_consts; + for(ui=0; ui<pAsm->unNumPresub; ui++) + { + pCompiledSub = pAsm->presubs[ui].pCompiledSub; + + r700->ps.num_consts += pCompiledSub->NumParameters; + + for(uj=0; uj<pCompiledSub->NumParameters; uj++) + { + r700->ps.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0]; + r700->ps.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1]; + r700->ps.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2]; + r700->ps.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3]; + } + unConstOffset += pCompiledSub->NumParameters; + } + return GL_TRUE; } diff --git a/r600/r700_fragprog.h b/r600/r700_fragprog.h index cbb108d..39c59c9 100644 --- a/r600/r700_fragprog.h +++ b/r600/r700_fragprog.h @@ -48,13 +48,17 @@ struct r700_fragment_program }; /* Internal */ +void insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog); + void Map_Fragment_Program(r700_AssemblerBase *pAsm, - struct gl_fragment_program *mesa_fp); + struct gl_fragment_program *mesa_fp, + GLcontext *ctx); GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, struct gl_fragment_program *mesa_fp); GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, - struct gl_fragment_program *mesa_vp); + struct gl_fragment_program *mesa_vp, + GLcontext *ctx); /* Interface */ extern void r700SelectFragmentShader(GLcontext *ctx); diff --git a/r600/r700_ioctl.c b/r600/r700_ioctl.c index 72a8978..3bc422f 100644 --- a/r600/r700_ioctl.c +++ b/r600/r700_ioctl.c @@ -32,10 +32,8 @@ #include "main/macros.h" #include "main/context.h" #include "main/simple_list.h" -#include "swrast/swrast.h" #include "radeon_common.h" -#include "radeon_lock.h" #include "r600_context.h" #include "r700_ioctl.h" diff --git a/r600/r700_oglprog.c b/r600/r700_oglprog.c index 0d476fc..b7124e6 100644 --- a/r600/r700_oglprog.c +++ b/r600/r700_oglprog.c @@ -53,7 +53,7 @@ static void freeVertProgCache(GLcontext *ctx, struct r700_vertex_program_cont *c Clean_Up_Shader(&(vp->r700Shader)); _mesa_reference_vertprog(ctx, &vp->mesa_program, NULL); - _mesa_free(vp); + free(vp); vp = tmp; } } @@ -132,7 +132,7 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) _mesa_delete_program(ctx, prog); } -static void +static GLboolean r700ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) { struct r700_vertex_program_cont *vpc = (struct r700_vertex_program_cont *)prog; @@ -153,6 +153,8 @@ r700ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) break; } + /* XXX check if program is legal, within limits */ + return GL_TRUE; } static GLboolean r700IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) diff --git a/r600/r700_render.c b/r600/r700_render.c index 47f89c9..1929b7c 100644 --- a/r600/r700_render.c +++ b/r600/r700_render.c @@ -42,7 +42,6 @@ #include "tnl/t_vp_build.h" #include "tnl/t_context.h" #include "tnl/t_vertex.h" -#include "tnl/t_pipeline.h" #include "vbo/vbo_context.h" #include "r600_context.h" @@ -116,8 +115,6 @@ void r700Start3D(context_t *context) END_BATCH(); COMMIT_BATCH(); - - r700WaitForIdleClean(context); } GLboolean r700SyncSurf(context_t *context, @@ -422,7 +419,7 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, } /* start 3d, idle, cb/db flush */ -#define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14 +#define PRE_EMIT_STATE_BUFSZ 5 + 5 + 14 static GLuint r700PredictRenderSize(GLcontext* ctx, const struct _mesa_prim *prim, @@ -526,6 +523,9 @@ static void r700ConvertAttrib(GLcontext *ctx, int count, radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, sizeof(GLfloat) * input->Size * count, 32); + + radeon_bo_map(attr->bo, 1); + dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset); assert(src_ptr != NULL); @@ -559,6 +559,8 @@ static void r700ConvertAttrib(GLcontext *ctx, int count, break; } + radeon_bo_unmap(attr->bo); + if (mapped_named_bo) { ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); @@ -577,6 +579,8 @@ static void r700AlignDataToDword(GLcontext *ctx, radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32); + radeon_bo_map(attr->bo, 1); + if (!input->BufferObj->Pointer) { ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); @@ -590,12 +594,13 @@ static void r700AlignDataToDword(GLcontext *ctx, for (i = 0; i < count; ++i) { - _mesa_memcpy(dst_ptr, src_ptr, input->StrideB); + memcpy(dst_ptr, src_ptr, input->StrideB); src_ptr += input->StrideB; dst_ptr += dst_stride; } } + radeon_bo_unmap(attr->bo); if (mapped_named_bo) { ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); @@ -664,14 +669,18 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo, &context->stream_desc[index].bo_offset, size, 32); + + radeon_bo_map(context->stream_desc[index].bo, 1); assert(context->stream_desc[index].bo->ptr != NULL); + + dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr, context->stream_desc[index].bo_offset); switch (context->stream_desc[index].dwords) { case 1: - radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); + radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; case 2: radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); @@ -686,6 +695,7 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input assert(0); break; } + radeon_bo_unmap(context->stream_desc[index].bo); } } @@ -757,6 +767,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, &context->ind_buf.bo_offset, size, 4); + radeon_bo_map(context->ind_buf.bo, 1); assert(context->ind_buf.bo->ptr != NULL); out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); @@ -770,6 +781,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *out++ = in[i]; } + radeon_bo_unmap(context->ind_buf.bo); #if MESA_BIG_ENDIAN } else @@ -780,6 +792,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, &context->ind_buf.bo_offset, size, 4); + radeon_bo_map(context->ind_buf.bo, 1); assert(context->ind_buf.bo->ptr != NULL); out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); @@ -792,6 +805,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer { *out++ = in[i]; } + radeon_bo_unmap(context->ind_buf.bo); #endif } @@ -815,11 +829,10 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer #if MESA_BIG_ENDIAN if (mesa_ind_buf->type == GL_UNSIGNED_INT) - { #else if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) - { #endif + { const GLvoid *src_ptr; GLvoid *dst_ptr; GLboolean mapped_named_bo = GL_FALSE; @@ -837,11 +850,13 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, &context->ind_buf.bo_offset, size, 4); + radeon_bo_map(context->ind_buf.bo, 1); assert(context->ind_buf.bo->ptr != NULL); dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); - _mesa_memcpy(dst_ptr, src_ptr, size); + memcpy(dst_ptr, src_ptr, size); + radeon_bo_unmap(context->ind_buf.bo); context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); context->ind_buf.count = mesa_ind_buf->count; @@ -856,6 +871,14 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer } } +static GLboolean check_fallbacks(GLcontext *ctx) +{ + if (ctx->RenderMode != GL_RENDER) + return GL_TRUE; + + return GL_FALSE; +} + static GLboolean r700TryDrawPrims(GLcontext *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, @@ -872,6 +895,9 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, if (ctx->NewState) _mesa_update_state( ctx ); + if (check_fallbacks(ctx)) + return GL_FALSE; + _tnl_UpdateFixedFunctionProgram(ctx); r700SetVertexFormat(ctx, arrays, max_index + 1); /* shaders need to be updated before buffers are validated */ @@ -916,6 +942,7 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, radeon_debug_remove_indent(); /* Flush render op cached for last several quads. */ + /* XXX drm should handle this in fence submit */ r700WaitForIdleClean(context); rrb = radeon_get_colorbuffer(&context->radeon); @@ -966,8 +993,10 @@ static void r700DrawPrims(GLcontext *ctx, retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); /* If failed run tnl pipeline - it should take care of fallbacks */ - if (!retval) + if (!retval) { + _swsetup_Wakeup(ctx); _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + } } void r700InitDraw(GLcontext *ctx) diff --git a/r600/r700_shader.c b/r600/r700_shader.c index 955ea4e..67b0d40 100644 --- a/r600/r700_shader.c +++ b/r600/r700_shader.c @@ -35,7 +35,6 @@ #include "main/glheader.h" #include "r600_context.h" -#include "r700_debug.h" #include "r700_shader.h" @@ -159,13 +158,18 @@ void Init_R700_Shader(R700_Shader * pShader) pShader->lstVTXInstructions.uNumOfNode=0; } +void SetActiveCFlist(R700_Shader *pShader, TypedShaderList * plstCF) +{ + pShader->plstCFInstructions_active = plstCF; +} + void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst) { R700ControlFlowSXClause* pSXClause; R700ControlFlowSMXClause* pSMXClause; - pCFInst->m_uIndex = pShader->lstCFInstructions.uNumOfNode; - AddInstToList(&(pShader->lstCFInstructions), + pCFInst->m_uIndex = pShader->plstCFInstructions_active->uNumOfNode; + AddInstToList(pShader->plstCFInstructions_active, (R700ShaderInstruction*)pCFInst); pShader->uShaderBinaryDWORDSize += GetInstructionSize(pCFInst->m_ShaderInstType); diff --git a/r600/r700_shader.h b/r600/r700_shader.h index c6a0586..0599ffd 100644 --- a/r600/r700_shader.h +++ b/r600/r700_shader.h @@ -109,6 +109,7 @@ typedef struct R700_Shader GLuint uStackSize; GLuint uMaxCallDepth; + TypedShaderList * plstCFInstructions_active; TypedShaderList lstCFInstructions; TypedShaderList lstALUInstructions; TypedShaderList lstTEXInstructions; @@ -132,13 +133,13 @@ void TakeInstOutFromList(TypedShaderList * plstCFInstructions, R700ShaderInstruc void ResolveLinks(R700_Shader *pShader); void Assemble(R700_Shader *pShader); - //Interface void Init_R700_Shader(R700_Shader * pShader); void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst); void AddVTXInstruction(R700_Shader *pShader, R700VertexInstruction *pVTXInst); void AddTEXInstruction(R700_Shader *pShader, R700TextureInstruction *pTEXInst); void AddALUInstruction(R700_Shader *pShader, R700ALUInstruction *pALUInst); +void SetActiveCFlist(R700_Shader *pShader, TypedShaderList * plstCF); void LoadProgram(R700_Shader *pShader); void UpdateShaderRegisters(R700_Shader *pShader); diff --git a/r600/r700_shaderinst.h b/r600/r700_shaderinst.h index 2829cca..cdb9a57 100644 --- a/r600/r700_shaderinst.h +++ b/r600/r700_shaderinst.h @@ -42,6 +42,13 @@ #define SQ_FETCH_RESOURCE_VS_OFFSET 0x000000a0 #define SQ_FETCH_RESOURCE_VS_COUNT 0x000000b0 +//richard dec.10 glsl +#define SQ_TEX_SAMPLER_PS_OFFSET 0x00000000 +#define SQ_TEX_SAMPLER_PS_COUNT 0x00000012 +#define SQ_TEX_SAMPLER_VS_OFFSET 0x00000012 +#define SQ_TEX_SAMPLER_VS_COUNT 0x00000012 +//------------------- + #define SHADERINST_TYPEMASK_CF 0x10 #define SHADERINST_TYPEMASK_ALU 0x20 #define SHADERINST_TYPEMASK_TEX 0x40 diff --git a/r600/r700_state.c b/r600/r700_state.c index 16b05d5..6f156b5 100644 --- a/r600/r700_state.c +++ b/r600/r700_state.c @@ -26,7 +26,6 @@ #include "main/glheader.h" #include "main/mtypes.h" -#include "main/state.h" #include "main/imports.h" #include "main/enums.h" #include "main/macros.h" @@ -36,11 +35,9 @@ #include "tnl/tnl.h" #include "tnl/t_pipeline.h" -#include "tnl/t_vp_build.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" #include "main/api_arrayelt.h" -#include "main/state.h" #include "main/framebuffer.h" #include "shader/prog_parameter.h" @@ -59,6 +56,7 @@ static void r700SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state); static void r700UpdatePolygonMode(GLcontext * ctx); static void r700SetPolygonOffsetState(GLcontext * ctx, GLboolean state); static void r700SetStencilState(GLcontext * ctx, GLboolean state); +static void r700UpdateWindow(GLcontext * ctx, int id); void r700UpdateShaders(GLcontext * ctx) { @@ -67,7 +65,7 @@ void r700UpdateShaders(GLcontext * ctx) /* should only happenen once, just after context is created */ /* TODO: shouldn't we fallback to sw here? */ if (!ctx->FragmentProgram._Current) { - _mesa_fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); + fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); return; } @@ -85,7 +83,7 @@ void r700UpdateViewportOffset(GLcontext * ctx) //------------------ { context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&context->radeon); + __DRIdrawable *dPriv = radeon_get_drawable(&context->radeon); GLfloat xoffset = (GLfloat) dPriv->x; GLfloat yoffset = (GLfloat) dPriv->y + dPriv->h; const GLfloat *v = ctx->Viewport._WindowMap.m; @@ -780,6 +778,9 @@ static void r700Enable(GLcontext * ctx, GLenum cap, GLboolean state) //--------- case GL_LINE_STIPPLE: r700UpdateLineStipple(ctx); break; + case GL_DEPTH_CLAMP: + r700UpdateWindow(ctx, 0); + break; default: break; } @@ -910,10 +911,12 @@ static void r700PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * pa case GL_POINT_SIZE_MIN: SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MinSize * 8.0), MIN_SIZE_shift, MIN_SIZE_mask); + r700PointSize(ctx, ctx->Point.Size); break; case GL_POINT_SIZE_MAX: SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MaxSize * 8.0), MAX_SIZE_shift, MAX_SIZE_mask); + r700PointSize(ctx, ctx->Point.Size); break; case GL_POINT_DISTANCE_ATTENUATION: break; @@ -1071,7 +1074,7 @@ static void r700UpdateWindow(GLcontext * ctx, int id) //-------------------- { context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&context->radeon); + __DRIdrawable *dPriv = radeon_get_drawable(&context->radeon); GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0; GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0; const GLfloat *v = ctx->Viewport._WindowMap.m; @@ -1576,9 +1579,9 @@ static void r700InitSQConfig(GLcontext * ctx) SETbit(r700->sq_config.SQ_CONFIG.u32All, DX9_CONSTS_bit); SETbit(r700->sq_config.SQ_CONFIG.u32All, ALU_INST_PREFER_VECTOR_bit); SETfield(r700->sq_config.SQ_CONFIG.u32All, ps_prio, PS_PRIO_shift, PS_PRIO_mask); - SETfield(r700->sq_config.SQ_CONFIG.u32All, ps_prio, VS_PRIO_shift, VS_PRIO_mask); - SETfield(r700->sq_config.SQ_CONFIG.u32All, ps_prio, GS_PRIO_shift, GS_PRIO_mask); - SETfield(r700->sq_config.SQ_CONFIG.u32All, ps_prio, ES_PRIO_shift, ES_PRIO_mask); + SETfield(r700->sq_config.SQ_CONFIG.u32All, vs_prio, VS_PRIO_shift, VS_PRIO_mask); + SETfield(r700->sq_config.SQ_CONFIG.u32All, gs_prio, GS_PRIO_shift, GS_PRIO_mask); + SETfield(r700->sq_config.SQ_CONFIG.u32All, es_prio, ES_PRIO_shift, ES_PRIO_mask); r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All = 0; SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, num_ps_gprs, NUM_PS_GPRS_shift, NUM_PS_GPRS_mask); @@ -1625,8 +1628,6 @@ void r700InitState(GLcontext * ctx) //------------------- R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); int id = 0; - radeon_firevertices(&context->radeon); - r700->TA_CNTL_AUX.u32All = 0; SETfield(r700->TA_CNTL_AUX.u32All, 28, TD_FIFO_CREDIT_shift, TD_FIFO_CREDIT_mask); r700->VC_ENHANCE.u32All = 0; @@ -1724,10 +1725,10 @@ void r700InitState(GLcontext * ctx) //------------------- r700InitSQConfig(ctx); r700ColorMask(ctx, - ctx->Color.ColorMask[RCOMP], - ctx->Color.ColorMask[GCOMP], - ctx->Color.ColorMask[BCOMP], - ctx->Color.ColorMask[ACOMP]); + ctx->Color.ColorMask[0][RCOMP], + ctx->Color.ColorMask[0][GCOMP], + ctx->Color.ColorMask[0][BCOMP], + ctx->Color.ColorMask[0][ACOMP]); r700Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test); r700DepthMask(ctx, ctx->Depth.Mask); diff --git a/r600/r700_vertprog.c b/r600/r700_vertprog.c index 6986eb0..05c6516 100644 --- a/r600/r700_vertprog.c +++ b/r600/r700_vertprog.c @@ -111,6 +111,15 @@ unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm, } } + for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++) + { + unBit = 1 << i; + if(mesa_vp->Base.OutputsWritten & unBit) + { + pAsm->ucVP_OutputMap[i] = unTotal++; + } + } + return (unTotal - unStart); } @@ -179,7 +188,8 @@ GLboolean Process_Vertex_Program_Vfetch_Instructions2( context->stream_desc[i].size, context->stream_desc[i].element, context->stream_desc[i]._signed, - context->stream_desc[i].normalize, + context->stream_desc[i].normalize, + context->stream_desc[i].format, &vtxFetchMethod); } @@ -235,6 +245,8 @@ void Map_Vertex_Program(GLcontext *ctx, pAsm->number_used_registers += mesa_vp->Base.NumTemporaries; } + pAsm->flag_reg_index = pAsm->number_used_registers++; + pAsm->uFirstHelpReg = pAsm->number_used_registers; } @@ -295,8 +307,8 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, struct r700_vertex_program *vp; unsigned int i; - vp = _mesa_calloc(sizeof(*vp)); - vp->mesa_program = (struct gl_vertex_program *)_mesa_clone_program(ctx, &mesa_vp->Base); + vp = calloc(1, sizeof(*vp)); + vp->mesa_program = _mesa_clone_vertex_program(ctx, mesa_vp); if (mesa_vp->IsPositionInvariant) { @@ -308,6 +320,7 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, vp->aos_desc[i].size = context->stream_desc[i].size; vp->aos_desc[i].stride = context->stream_desc[i].stride; vp->aos_desc[i].type = context->stream_desc[i].type; + vp->aos_desc[i].format = context->stream_desc[i].format; } if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) @@ -324,7 +337,18 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, return NULL; } - if(GL_FALSE == AssembleInstr(vp->mesa_program->Base.NumInstructions, + InitShaderProgram(&(vp->r700AsmCode)); + + for(i=0; i < MAX_SAMPLERS; i++) + { + vp->r700AsmCode.SamplerUnits[i] = vp->mesa_program->Base.SamplerUnits[i]; + } + + vp->r700AsmCode.unCurNumILInsts = vp->mesa_program->Base.NumInstructions; + + if(GL_FALSE == AssembleInstr(0, + 0, + vp->mesa_program->Base.NumInstructions, &(vp->mesa_program->Base.Instructions[0]), &(vp->r700AsmCode)) ) { @@ -336,6 +360,11 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, return NULL; } + if( GL_FALSE == RelocProgram(&(vp->r700AsmCode), &(vp->mesa_program->Base)) ) + { + return GL_FALSE; + } + vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0 : (vp->r700AsmCode.number_used_registers - 1); @@ -368,7 +397,8 @@ void r700SelectVertexShader(GLcontext *ctx) match = GL_TRUE; for(i=0; i<context->nNumActiveAos; i++) { - if (vp->aos_desc[i].size != context->stream_desc[i].size) + if (vp->aos_desc[i].size != context->stream_desc[i].size || + vp->aos_desc[i].format != context->stream_desc[i].format) { match = GL_FALSE; break; @@ -471,6 +501,7 @@ static void r700TranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const s pStreamDesc->size = input->Size; pStreamDesc->dst_loc = context->nNumActiveAos; pStreamDesc->element = unLoc; + pStreamDesc->format = input->Format; switch (pStreamDesc->type) { //GetSurfaceFormat @@ -612,6 +643,12 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) paramList = vp->mesa_program->Base.Parameters; if(NULL != paramList) { + /* vp->mesa_program was cloned, not updated by glsl shader api. */ + /* _mesa_reference_program has already checked glsl shProg is ok and set ctx->VertexProgem._Current */ + /* so, use ctx->VertexProgem._Current */ + struct gl_program_parameter_list *paramListOrginal = + ctx->VertexProgram._Current->Base.Parameters; + _mesa_load_state_parameters(ctx, paramList); if (paramList->NumParameters > R700_MAX_DX9_CONSTS) @@ -624,13 +661,42 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) unNumParamData = paramList->NumParameters; for(ui=0; ui<unNumParamData; ui++) { - r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM) + { + r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0]; + r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1]; + r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2]; + r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3]; + } + else + { + r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; + r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; + r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; + r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + } } } else r700->vs.num_consts = 0; + COMPILED_SUB * pCompiledSub; + GLuint uj; + GLuint unConstOffset = r700->vs.num_consts; + for(ui=0; ui<vp->r700AsmCode.unNumPresub; ui++) + { + pCompiledSub = vp->r700AsmCode.presubs[ui].pCompiledSub; + + r700->vs.num_consts += pCompiledSub->NumParameters; + + for(uj=0; uj<pCompiledSub->NumParameters; uj++) + { + r700->vs.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0]; + r700->vs.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1]; + r700->vs.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2]; + r700->vs.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3]; + } + unConstOffset += pCompiledSub->NumParameters; + } + return GL_TRUE; } diff --git a/r600/r700_vertprog.h b/r600/r700_vertprog.h index 00824c2..645c9ac 100644 --- a/r600/r700_vertprog.h +++ b/r600/r700_vertprog.h @@ -39,6 +39,7 @@ typedef struct ArrayDesc //TEMP GLint size; //number of data element GLenum type; //data element type GLsizei stride; + GLenum format; //GL_RGBA or GL_BGRA } ArrayDesc; struct r700_vertex_program diff --git a/radeon/Makefile.am b/radeon/Makefile.am index 0ff149d..7a954ac 100644 --- a/radeon/Makefile.am +++ b/radeon/Makefile.am @@ -20,6 +20,7 @@ radeon_dri_la_SOURCES = \ radeon_queryobj.c \ radeon_span.c \ radeon_texture.c \ + radeon_tex_copy.c \ radeon_context.c \ radeon_ioctl.c \ radeon_screen.c \ @@ -30,7 +31,8 @@ radeon_dri_la_SOURCES = \ radeon_tcl.c \ radeon_swtcl.c \ radeon_maos.c \ - radeon_sanity.c + radeon_sanity.c \ + radeon_blit.c if HAVE_LIBDRM_RADEON radeon_dri_la_CFLAGS += -DHAVE_LIBDRM_RADEON=1 $(LIBDRM_RADEON_CFLAGS) diff --git a/radeon/radeon_blit.c b/radeon/radeon_blit.c new file mode 100644 index 0000000..e1e1f21 --- /dev/null +++ b/radeon/radeon_blit.c @@ -0,0 +1,404 @@ +/* + * Copyright (C) 2010 Advanced Micro Devices, Inc. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_common.h" +#include "radeon_context.h" +#include "radeon_blit.h" + +static inline uint32_t cmdpacket0(struct radeon_screen *rscrn, + int reg, int count) +{ + if (count) + return CP_PACKET0(reg, count - 1); + return CP_PACKET2; +} + +/* common formats supported as both textures and render targets */ +unsigned r100_check_blit(gl_format mesa_format) +{ + /* XXX others? BE/LE? */ + switch (mesa_format) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + case MESA_FORMAT_RGB565: + case MESA_FORMAT_ARGB4444: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_A8: + break; + default: + return 0; + } + + /* ??? */ + if (_mesa_get_format_bits(mesa_format, GL_DEPTH_BITS) > 0) + return 0; + + return 1; +} + +static inline void emit_vtx_state(struct r100_context *r100) +{ + BATCH_LOCALS(&r100->radeon); + + BEGIN_BATCH(8); + if (r100->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + OUT_BATCH_REGVAL(RADEON_SE_CNTL_STATUS, 0); + } else { + OUT_BATCH_REGVAL(RADEON_SE_CNTL_STATUS, RADEON_TCL_BYPASS); + + } + OUT_BATCH_REGVAL(RADEON_SE_COORD_FMT, (RADEON_VTX_XY_PRE_MULT_1_OVER_W0 | + RADEON_TEX1_W_ROUTING_USE_W0)); + OUT_BATCH_REGVAL(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY | RADEON_SE_VTX_FMT_ST0); + OUT_BATCH_REGVAL(RADEON_SE_CNTL, (RADEON_DIFFUSE_SHADE_GOURAUD | + RADEON_BFACE_SOLID | + RADEON_FFACE_SOLID | + RADEON_VTX_PIX_CENTER_OGL | + RADEON_ROUND_MODE_ROUND | + RADEON_ROUND_PREC_4TH_PIX)); + END_BATCH(); +} + +static void inline emit_tx_setup(struct r100_context *r100, + gl_format mesa_format, + struct radeon_bo *bo, + intptr_t offset, + unsigned width, + unsigned height, + unsigned pitch) +{ + uint32_t txformat = RADEON_TXFORMAT_NON_POWER2; + BATCH_LOCALS(&r100->radeon); + + assert(width <= 2047); + assert(height <= 2047); + assert(offset % 32 == 0); + + /* XXX others? BE/LE? */ + switch (mesa_format) { + case MESA_FORMAT_ARGB8888: + txformat |= RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP; + break; + case MESA_FORMAT_XRGB8888: + txformat |= RADEON_TXFORMAT_ARGB8888; + break; + case MESA_FORMAT_RGB565: + txformat |= RADEON_TXFORMAT_RGB565; + break; + case MESA_FORMAT_ARGB4444: + txformat |= RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP; + break; + case MESA_FORMAT_ARGB1555: + txformat |= RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP; + break; + case MESA_FORMAT_A8: + txformat |= RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP; + break; + default: + break; + } + + BEGIN_BATCH(18); + OUT_BATCH_REGVAL(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE); + OUT_BATCH_REGVAL(RADEON_PP_TXCBLEND_0, (RADEON_COLOR_ARG_A_ZERO | + RADEON_COLOR_ARG_B_ZERO | + RADEON_COLOR_ARG_C_T0_COLOR | + RADEON_BLEND_CTL_ADD | + RADEON_CLAMP_TX)); + OUT_BATCH_REGVAL(RADEON_PP_TXABLEND_0, (RADEON_ALPHA_ARG_A_ZERO | + RADEON_ALPHA_ARG_B_ZERO | + RADEON_ALPHA_ARG_C_T0_ALPHA | + RADEON_BLEND_CTL_ADD | + RADEON_CLAMP_TX)); + OUT_BATCH_REGVAL(RADEON_PP_TXFILTER_0, (RADEON_CLAMP_S_CLAMP_LAST | + RADEON_CLAMP_T_CLAMP_LAST | + RADEON_MAG_FILTER_NEAREST | + RADEON_MIN_FILTER_NEAREST)); + OUT_BATCH_REGVAL(RADEON_PP_TXFORMAT_0, txformat); + OUT_BATCH_REGVAL(RADEON_PP_TEX_SIZE_0, ((width - 1) | + ((height - 1) << RADEON_TEX_VSIZE_SHIFT))); + OUT_BATCH_REGVAL(RADEON_PP_TEX_PITCH_0, pitch * _mesa_get_format_bytes(mesa_format) - 32); + + OUT_BATCH_REGSEQ(RADEON_PP_TXOFFSET_0, 1); + OUT_BATCH_RELOC(0, bo, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + + END_BATCH(); +} + +static inline void emit_cb_setup(struct r100_context *r100, + struct radeon_bo *bo, + intptr_t offset, + gl_format mesa_format, + unsigned pitch, + unsigned width, + unsigned height) +{ + uint32_t dst_pitch = pitch; + uint32_t dst_format = 0; + BATCH_LOCALS(&r100->radeon); + + /* XXX others? BE/LE? */ + switch (mesa_format) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + dst_format = RADEON_COLOR_FORMAT_ARGB8888; + break; + case MESA_FORMAT_RGB565: + dst_format = RADEON_COLOR_FORMAT_RGB565; + break; + case MESA_FORMAT_ARGB4444: + dst_format = RADEON_COLOR_FORMAT_ARGB4444; + break; + case MESA_FORMAT_ARGB1555: + dst_format = RADEON_COLOR_FORMAT_ARGB1555; + break; + case MESA_FORMAT_A8: + dst_format = RADEON_COLOR_FORMAT_RGB8; + break; + default: + break; + } + + BEGIN_BATCH_NO_AUTOSTATE(18); + OUT_BATCH_REGVAL(RADEON_RE_TOP_LEFT, 0); + OUT_BATCH_REGVAL(RADEON_RE_WIDTH_HEIGHT, ((width << RADEON_RE_WIDTH_SHIFT) | + (height << RADEON_RE_HEIGHT_SHIFT))); + OUT_BATCH_REGVAL(RADEON_RB3D_PLANEMASK, 0xffffffff); + OUT_BATCH_REGVAL(RADEON_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); + OUT_BATCH_REGVAL(RADEON_RB3D_CNTL, dst_format); + + OUT_BATCH_REGSEQ(RADEON_RB3D_COLOROFFSET, 1); + OUT_BATCH_RELOC(0, bo, 0, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_REGSEQ(RADEON_RB3D_COLORPITCH, 1); + OUT_BATCH_RELOC(dst_pitch, bo, dst_pitch, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0); + + END_BATCH(); +} + +static GLboolean validate_buffers(struct r100_context *r100, + struct radeon_bo *src_bo, + struct radeon_bo *dst_bo) +{ + int ret; + + radeon_cs_space_reset_bos(r100->radeon.cmdbuf.cs); + + ret = radeon_cs_space_check_with_bo(r100->radeon.cmdbuf.cs, + src_bo, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + return GL_FALSE; + + ret = radeon_cs_space_check_with_bo(r100->radeon.cmdbuf.cs, + dst_bo, 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT); + if (ret) + return GL_FALSE; + + return GL_TRUE; +} + +/** + * Calculate texcoords for given image region. + * Output values are [minx, maxx, miny, maxy] + */ +static inline void calc_tex_coords(float img_width, float img_height, + float x, float y, + float reg_width, float reg_height, + unsigned flip_y, float *buf) +{ + buf[0] = x / img_width; + buf[1] = buf[0] + reg_width / img_width; + buf[2] = y / img_height; + buf[3] = buf[2] + reg_height / img_height; + if (flip_y) + { + buf[2] = 1.0 - buf[2]; + buf[3] = 1.0 - buf[3]; + } +} + +static inline void emit_draw_packet(struct r100_context *r100, + unsigned src_width, unsigned src_height, + unsigned src_x_offset, unsigned src_y_offset, + unsigned dst_x_offset, unsigned dst_y_offset, + unsigned reg_width, unsigned reg_height, + unsigned flip_y) +{ + float texcoords[4]; + float verts[12]; + BATCH_LOCALS(&r100->radeon); + + calc_tex_coords(src_width, src_height, + src_x_offset, src_y_offset, + reg_width, reg_height, + flip_y, texcoords); + + verts[0] = dst_x_offset; + verts[1] = dst_y_offset + reg_height; + verts[2] = texcoords[0]; + verts[3] = texcoords[3]; + + verts[4] = dst_x_offset + reg_width; + verts[5] = dst_y_offset + reg_height; + verts[6] = texcoords[1]; + verts[7] = texcoords[3]; + + verts[8] = dst_x_offset + reg_width; + verts[9] = dst_y_offset; + verts[10] = texcoords[1]; + verts[11] = texcoords[2]; + + BEGIN_BATCH(15); + OUT_BATCH(RADEON_CP_PACKET3_3D_DRAW_IMMD | (13 << 16)); + OUT_BATCH(RADEON_CP_VC_FRMT_XY | RADEON_CP_VC_FRMT_ST0); + OUT_BATCH(RADEON_CP_VC_CNTL_PRIM_WALK_RING | + RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | + RADEON_CP_VC_CNTL_MAOS_ENABLE | + RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | + (3 << 16)); + OUT_BATCH_TABLE(verts, 12); + END_BATCH(); +} + +/** + * Copy a region of [@a width x @a height] pixels from source buffer + * to destination buffer. + * @param[in] r100 r100 context + * @param[in] src_bo source radeon buffer object + * @param[in] src_offset offset of the source image in the @a src_bo + * @param[in] src_mesaformat source image format + * @param[in] src_pitch aligned source image width + * @param[in] src_width source image width + * @param[in] src_height source image height + * @param[in] src_x_offset x offset in the source image + * @param[in] src_y_offset y offset in the source image + * @param[in] dst_bo destination radeon buffer object + * @param[in] dst_offset offset of the destination image in the @a dst_bo + * @param[in] dst_mesaformat destination image format + * @param[in] dst_pitch aligned destination image width + * @param[in] dst_width destination image width + * @param[in] dst_height destination image height + * @param[in] dst_x_offset x offset in the destination image + * @param[in] dst_y_offset y offset in the destination image + * @param[in] width region width + * @param[in] height region height + * @param[in] flip_y set if y coords of the source image need to be flipped + */ +unsigned r100_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned reg_width, + unsigned reg_height, + unsigned flip_y) +{ + struct r100_context *r100 = R100_CONTEXT(ctx); + + if (!r100_check_blit(dst_mesaformat)) + return GL_FALSE; + + /* Make sure that colorbuffer has even width - hw limitation */ + if (dst_pitch % 2 > 0) + ++dst_pitch; + + /* Rendering to small buffer doesn't work. + * Looks like a hw limitation. + */ + if (dst_pitch < 32) + return GL_FALSE; + + /* Need to clamp the region size to make sure + * we don't read outside of the source buffer + * or write outside of the destination buffer. + */ + if (reg_width + src_x_offset > src_width) + reg_width = src_width - src_x_offset; + if (reg_height + src_y_offset > src_height) + reg_height = src_height - src_y_offset; + if (reg_width + dst_x_offset > dst_width) + reg_width = dst_width - dst_x_offset; + if (reg_height + dst_y_offset > dst_height) + reg_height = dst_height - dst_y_offset; + + if (src_bo == dst_bo) { + return GL_FALSE; + } + + if (src_offset % 32 || dst_offset % 32) { + return GL_FALSE; + } + + if (0) { + fprintf(stderr, "src: size [%d x %d], pitch %d, " + "offset [%d x %d], format %s, bo %p\n", + src_width, src_height, src_pitch, + src_x_offset, src_y_offset, + _mesa_get_format_name(src_mesaformat), + src_bo); + fprintf(stderr, "dst: pitch %d, offset[%d x %d], format %s, bo %p\n", + dst_pitch, dst_x_offset, dst_y_offset, + _mesa_get_format_name(dst_mesaformat), dst_bo); + fprintf(stderr, "region: %d x %d\n", reg_width, reg_height); + } + + /* Flush is needed to make sure that source buffer has correct data */ + radeonFlush(ctx); + + rcommonEnsureCmdBufSpace(&r100->radeon, 59, __FUNCTION__); + + if (!validate_buffers(r100, src_bo, dst_bo)) + return GL_FALSE; + + /* 8 */ + emit_vtx_state(r100); + /* 18 */ + emit_tx_setup(r100, src_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch); + /* 18 */ + emit_cb_setup(r100, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height); + /* 15 */ + emit_draw_packet(r100, src_width, src_height, + src_x_offset, src_y_offset, + dst_x_offset, dst_y_offset, + reg_width, reg_height, + flip_y); + + radeonFlush(ctx); + + return GL_TRUE; +} diff --git a/radeon/radeon_blit.h b/radeon/radeon_blit.h new file mode 100644 index 0000000..d7d0b55 --- /dev/null +++ b/radeon/radeon_blit.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2010 Advanced Micro Devices, Inc. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_BLIT_H +#define RADEON_BLIT_H + +void r100_blit_init(struct r100_context *r100); + +unsigned r100_check_blit(gl_format mesa_format); + +unsigned r100_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned width, + unsigned height, + unsigned flip_y); + +#endif // RADEON_BLIT_H diff --git a/radeon/radeon_buffer_objects.c b/radeon/radeon_buffer_objects.c index 99d3ec7..0897daf 100644 --- a/radeon/radeon_buffer_objects.c +++ b/radeon/radeon_buffer_objects.c @@ -70,7 +70,7 @@ radeonDeleteBufferObject(GLcontext * ctx, radeon_bo_unref(radeon_obj->bo); } - _mesa_free(radeon_obj); + free(radeon_obj); } @@ -114,7 +114,7 @@ radeonBufferData(GLcontext * ctx, if (data != NULL) { radeon_bo_map(radeon_obj->bo, GL_TRUE); - _mesa_memcpy(radeon_obj->bo->ptr, data, size); + memcpy(radeon_obj->bo->ptr, data, size); radeon_bo_unmap(radeon_obj->bo); } @@ -145,7 +145,7 @@ radeonBufferSubData(GLcontext * ctx, radeon_bo_map(radeon_obj->bo, GL_TRUE); - _mesa_memcpy(radeon_obj->bo->ptr + offset, data, size); + memcpy(radeon_obj->bo->ptr + offset, data, size); radeon_bo_unmap(radeon_obj->bo); } @@ -165,7 +165,7 @@ radeonGetBufferSubData(GLcontext * ctx, radeon_bo_map(radeon_obj->bo, GL_FALSE); - _mesa_memcpy(data, radeon_obj->bo->ptr + offset, size); + memcpy(data, radeon_obj->bo->ptr + offset, size); radeon_bo_unmap(radeon_obj->bo); } diff --git a/radeon/radeon_chipset.h b/radeon/radeon_chipset.h index 46a9cd5..f17a305 100644 --- a/radeon/radeon_chipset.h +++ b/radeon/radeon_chipset.h @@ -340,6 +340,7 @@ #define PCI_CHIP_RS880_9712 0x9712 #define PCI_CHIP_RS880_9713 0x9713 #define PCI_CHIP_RS880_9714 0x9714 +#define PCI_CHIP_RS880_9715 0x9715 #define PCI_CHIP_RV770_9440 0x9440 #define PCI_CHIP_RV770_9441 0x9441 diff --git a/radeon/radeon_common.c b/radeon/radeon_common.c index 9b64c21..13f1f06 100644 --- a/radeon/radeon_common.c +++ b/radeon/radeon_common.c @@ -137,7 +137,7 @@ void radeon_get_cliprects(radeonContextPtr radeon, unsigned int *num_cliprects, int *x_off, int *y_off) { - __DRIdrawablePrivate *dPriv = radeon_get_drawable(radeon); + __DRIdrawable *dPriv = radeon_get_drawable(radeon); struct radeon_framebuffer *rfb = dPriv->driverPrivate; if (radeon->constant_cliprect) { @@ -169,8 +169,8 @@ void radeon_get_cliprects(radeonContextPtr radeon, */ void radeonSetCliprects(radeonContextPtr radeon) { - __DRIdrawablePrivate *const drawable = radeon_get_drawable(radeon); - __DRIdrawablePrivate *const readable = radeon_get_readable(radeon); + __DRIdrawable *const drawable = radeon_get_drawable(radeon); + __DRIdrawable *const readable = radeon_get_readable(radeon); struct radeon_framebuffer *const draw_rfb = drawable->driverPrivate; struct radeon_framebuffer *const read_rfb = readable->driverPrivate; int x_off, y_off; @@ -229,7 +229,7 @@ void radeonUpdateScissor( GLcontext *ctx ) } if (!rmesa->radeonScreen->kernel_mm) { /* Fix scissors for dri 1 */ - __DRIdrawablePrivate *dPriv = radeon_get_drawable(rmesa); + __DRIdrawable *dPriv = radeon_get_drawable(rmesa); x1 += dPriv->x; x2 += dPriv->x + 1; min_x += dPriv->x; @@ -428,7 +428,7 @@ static void radeon_flip_renderbuffers(struct radeon_framebuffer *rfb) /* Copy the back color buffer to the front color buffer. */ -void radeonCopyBuffer( __DRIdrawablePrivate *dPriv, +void radeonCopyBuffer( __DRIdrawable *dPriv, const drm_clip_rect_t *rect) { radeonContextPtr rmesa; @@ -496,7 +496,7 @@ void radeonCopyBuffer( __DRIdrawablePrivate *dPriv, UNLOCK_HARDWARE( rmesa ); } -static int radeonScheduleSwap(__DRIdrawablePrivate *dPriv, GLboolean *missed_target) +static int radeonScheduleSwap(__DRIdrawable *dPriv, GLboolean *missed_target) { radeonContextPtr rmesa; @@ -519,11 +519,11 @@ static int radeonScheduleSwap(__DRIdrawablePrivate *dPriv, GLboolean *missed_tar return 0; } -static GLboolean radeonPageFlip( __DRIdrawablePrivate *dPriv ) +static GLboolean radeonPageFlip( __DRIdrawable *dPriv ) { radeonContextPtr radeon; GLint ret; - __DRIscreenPrivate *psp; + __DRIscreen *psp; struct radeon_renderbuffer *rrb; struct radeon_framebuffer *rfb; @@ -571,10 +571,10 @@ static GLboolean radeonPageFlip( __DRIdrawablePrivate *dPriv ) /** * Swap front and back buffer. */ -void radeonSwapBuffers(__DRIdrawablePrivate * dPriv) +void radeonSwapBuffers(__DRIdrawable * dPriv) { int64_t ust; - __DRIscreenPrivate *psp; + __DRIscreen *psp; if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { radeonContextPtr radeon; @@ -615,7 +615,7 @@ void radeonSwapBuffers(__DRIdrawablePrivate * dPriv) } } -void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv, +void radeonCopySubBuffer(__DRIdrawable * dPriv, int x, int y, int w, int h ) { if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { @@ -641,6 +641,27 @@ void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv, } } +/** + * Check if we're about to draw into the front color buffer. + * If so, set the intel->front_buffer_dirty field to true. + */ +void +radeon_check_front_buffer_rendering(GLcontext *ctx) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + const struct gl_framebuffer *fb = ctx->DrawBuffer; + + if (fb->Name == 0) { + /* drawing to window system buffer */ + if (fb->_NumColorDrawBuffers > 0) { + if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { + radeon->front_buffer_dirty = GL_TRUE; + } + } + } +} + + void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb) { radeonContextPtr radeon = RADEON_CONTEXT(ctx); @@ -817,7 +838,7 @@ void radeonDrawBuffer( GLcontext *ctx, GLenum mode ) */ if (!was_front_buffer_rendering && radeon->is_front_buffer_rendering) { radeon_update_renderbuffers(radeon->dri.context, - radeon->dri.context->driDrawablePriv); + radeon->dri.context->driDrawablePriv, GL_FALSE); } } @@ -834,7 +855,7 @@ void radeonReadBuffer( GLcontext *ctx, GLenum mode ) if (!was_front_buffer_reading && rmesa->is_front_buffer_reading) { radeon_update_renderbuffers(rmesa->dri.context, - rmesa->dri.context->driReadablePriv); + rmesa->dri.context->driReadablePriv, GL_FALSE); } } /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */ @@ -885,9 +906,9 @@ void radeon_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei he if (radeon->is_front_buffer_rendering) { ctx->Driver.Flush(ctx); } - radeon_update_renderbuffers(driContext, driContext->driDrawablePriv); + radeon_update_renderbuffers(driContext, driContext->driDrawablePriv, GL_FALSE); if (driContext->driDrawablePriv != driContext->driReadablePriv) - radeon_update_renderbuffers(driContext, driContext->driReadablePriv); + radeon_update_renderbuffers(driContext, driContext->driReadablePriv, GL_FALSE); } old_viewport = ctx->Driver.Viewport; @@ -1015,10 +1036,11 @@ static INLINE void radeon_emit_atom(radeonContextPtr radeon, struct radeon_state OUT_BATCH_TABLE(atom->cmd, dwords); END_BATCH(); } + atom->dirty = GL_FALSE; + } else { radeon_print(RADEON_STATE, RADEON_VERBOSE, " skip state %s\n", atom->name); } - atom->dirty = GL_FALSE; } @@ -1095,7 +1117,7 @@ void radeonFlush(GLcontext *ctx) then no point flushing anything at all. */ if (!radeon->dma.flush && !radeon->cmdbuf.cs->cdw && is_empty_list(&radeon->dma.reserved)) - return; + goto flush_front; if (radeon->dma.flush) radeon->dma.flush( ctx ); @@ -1103,12 +1125,13 @@ void radeonFlush(GLcontext *ctx) if (radeon->cmdbuf.cs->cdw) rcommonFlushCmdBuf(radeon, __FUNCTION__); +flush_front: if ((ctx->DrawBuffer->Name == 0) && radeon->front_buffer_dirty) { __DRIscreen *const screen = radeon->radeonScreen->driScreen; if (screen->dri2.loader && (screen->dri2.loader->base.version >= 2) && (screen->dri2.loader->flushFrontBuffer != NULL)) { - __DRIdrawablePrivate * drawable = radeon_get_drawable(radeon); + __DRIdrawable * drawable = radeon_get_drawable(radeon); (*screen->dri2.loader->flushFrontBuffer)(drawable, drawable->loaderPrivate); /* Only clear the dirty bit if front-buffer rendering is no longer @@ -1208,7 +1231,7 @@ int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller) fprintf(stderr, "drmRadeonCmdBuffer: %d. Kernel failed to " "parse or rejected command stream. See dmesg " "for more info.\n", ret); - _mesa_exit(ret); + exit(ret); } return ret; @@ -1302,11 +1325,6 @@ void rcommonBeginBatch(radeonContextPtr rmesa, int n, const char *function, int line) { - if (!rmesa->cmdbuf.cs->cdw && dostate) { - radeon_print(RADEON_STATE, RADEON_NORMAL, - "Reemit state after flush (from %s)\n", function); - radeonEmitState(rmesa); - } radeon_cs_begin(rmesa->cmdbuf.cs, n, file, function, line); radeon_print(RADEON_CS, RADEON_VERBOSE, "BEGIN_BATCH(%d) at %d, from %s:%i\n", diff --git a/radeon/radeon_common.h b/radeon/radeon_common.h index 0608fe2..cd01c99 100644 --- a/radeon/radeon_common.h +++ b/radeon/radeon_common.h @@ -13,10 +13,10 @@ void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h); void radeonWaitForIdleLocked(radeonContextPtr radeon); extern uint32_t radeonGetAge(radeonContextPtr radeon); -void radeonCopyBuffer( __DRIdrawablePrivate *dPriv, +void radeonCopyBuffer( __DRIdrawable *dPriv, const drm_clip_rect_t *rect); -void radeonSwapBuffers(__DRIdrawablePrivate * dPriv); -void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv, +void radeonSwapBuffers(__DRIdrawable * dPriv); +void radeonCopySubBuffer(__DRIdrawable * dPriv, int x, int y, int w, int h ); void radeonUpdatePageFlipping(radeonContextPtr rmesa); @@ -42,10 +42,15 @@ void radeon_renderbuffer_set_bo(struct radeon_renderbuffer *rb, struct radeon_bo *bo); struct radeon_renderbuffer * -radeon_create_renderbuffer(gl_format format, __DRIdrawablePrivate *driDrawPriv); +radeon_create_renderbuffer(gl_format format, __DRIdrawable *driDrawPriv); + +void radeon_check_front_buffer_rendering(GLcontext *ctx); static inline struct radeon_renderbuffer *radeon_renderbuffer(struct gl_renderbuffer *rb) { struct radeon_renderbuffer *rrb = (struct radeon_renderbuffer *)rb; + radeon_print(RADEON_MEMORY, RADEON_TRACE, + "%s(rb %p)\n", + __func__, rb); if (rrb && rrb->base.ClassID == RADEON_RB_CLASS) return rrb; else @@ -54,6 +59,10 @@ static inline struct radeon_renderbuffer *radeon_renderbuffer(struct gl_renderbu static inline struct radeon_renderbuffer *radeon_get_renderbuffer(struct gl_framebuffer *fb, int att_index) { + radeon_print(RADEON_MEMORY, RADEON_TRACE, + "%s(fb %p, index %d)\n", + __func__, fb, att_index); + if (att_index >= 0) return radeon_renderbuffer(fb->Attachment[att_index].Renderbuffer); else diff --git a/radeon/radeon_common_context.c b/radeon/radeon_common_context.c index 71f70d7..94f4766 100644 --- a/radeon/radeon_common_context.c +++ b/radeon/radeon_common_context.c @@ -39,7 +39,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "drirenderbuffer.h" #include "drivers/common/meta.h" #include "main/context.h" -#include "main/framebuffer.h" #include "main/renderbuffer.h" #include "main/state.h" #include "main/simple_list.h" @@ -47,10 +46,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "swrast_setup/swrast_setup.h" #include "tnl/tnl.h" -#if defined(RADEON_R600) -#include "r600_context.h" -#endif - #define DRIVER_DATE "20090101" #ifndef RADEON_DEBUG @@ -181,10 +176,10 @@ static void radeonInitDriverFuncs(struct dd_function_table *functions) GLboolean radeonInitContext(radeonContextPtr radeon, struct dd_function_table* functions, const __GLcontextModes * glVisual, - __DRIcontextPrivate * driContextPriv, + __DRIcontext * driContextPriv, void *sharedContextPrivate) { - __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; + __DRIscreen *sPriv = driContextPriv->driScreenPriv; radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private); GLcontext* ctx; GLcontext* shareCtx; @@ -291,7 +286,7 @@ static void radeon_destroy_atom_list(radeonContextPtr radeon) * Cleanup common context fields. * Called by r200DestroyContext/r300DestroyContext */ -void radeonDestroyContext(__DRIcontextPrivate *driContextPriv ) +void radeonDestroyContext(__DRIcontext *driContextPriv ) { #ifdef RADEON_BO_TRACK FILE *track; @@ -355,7 +350,7 @@ void radeonDestroyContext(__DRIcontextPrivate *driContextPriv ) /* Force the context `c' to be unbound from its buffer. */ -GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv) +GLboolean radeonUnbindContext(__DRIcontext * driContextPriv) { radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate; @@ -499,7 +494,8 @@ radeon_bits_per_pixel(const struct radeon_renderbuffer *rb) } void -radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) +radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable, + GLboolean front_only) { unsigned int attachments[10]; __DRIbuffer *buffers = NULL; @@ -525,7 +521,7 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) struct radeon_renderbuffer *stencil_rb; i = 0; - if ((radeon->is_front_buffer_rendering || + if ((front_only || radeon->is_front_buffer_rendering || radeon->is_front_buffer_reading || !draw->color_rb[1]) && draw->color_rb[0]) { @@ -533,23 +529,25 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) attachments[i++] = radeon_bits_per_pixel(draw->color_rb[0]); } - if (draw->color_rb[1]) { - attachments[i++] = __DRI_BUFFER_BACK_LEFT; - attachments[i++] = radeon_bits_per_pixel(draw->color_rb[1]); - } + if (!front_only) { + if (draw->color_rb[1]) { + attachments[i++] = __DRI_BUFFER_BACK_LEFT; + attachments[i++] = radeon_bits_per_pixel(draw->color_rb[1]); + } - depth_rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH); - stencil_rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL); - - if ((depth_rb != NULL) && (stencil_rb != NULL)) { - attachments[i++] = __DRI_BUFFER_DEPTH_STENCIL; - attachments[i++] = radeon_bits_per_pixel(depth_rb); - } else if (depth_rb != NULL) { - attachments[i++] = __DRI_BUFFER_DEPTH; - attachments[i++] = radeon_bits_per_pixel(depth_rb); - } else if (stencil_rb != NULL) { - attachments[i++] = __DRI_BUFFER_STENCIL; - attachments[i++] = radeon_bits_per_pixel(stencil_rb); + depth_rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH); + stencil_rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL); + + if ((depth_rb != NULL) && (stencil_rb != NULL)) { + attachments[i++] = __DRI_BUFFER_DEPTH_STENCIL; + attachments[i++] = radeon_bits_per_pixel(depth_rb); + } else if (depth_rb != NULL) { + attachments[i++] = __DRI_BUFFER_DEPTH; + attachments[i++] = radeon_bits_per_pixel(depth_rb); + } else if (stencil_rb != NULL) { + attachments[i++] = __DRI_BUFFER_STENCIL; + attachments[i++] = radeon_bits_per_pixel(stencil_rb); + } } buffers = (*screen->dri2.loader->getBuffersWithFormat)(drawable, @@ -562,12 +560,14 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) i = 0; if (draw->color_rb[0]) attachments[i++] = __DRI_BUFFER_FRONT_LEFT; - if (draw->color_rb[1]) - attachments[i++] = __DRI_BUFFER_BACK_LEFT; - if (radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH)) - attachments[i++] = __DRI_BUFFER_DEPTH; - if (radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL)) - attachments[i++] = __DRI_BUFFER_STENCIL; + if (!front_only) { + if (draw->color_rb[1]) + attachments[i++] = __DRI_BUFFER_BACK_LEFT; + if (radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH)) + attachments[i++] = __DRI_BUFFER_DEPTH; + if (radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL)) + attachments[i++] = __DRI_BUFFER_STENCIL; + } buffers = (*screen->dri2.loader->getBuffers)(drawable, &drawable->w, @@ -715,9 +715,9 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) /* Force the context `c' to be the current context and associate with it * buffer `b'. */ -GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, - __DRIdrawablePrivate * driDrawPriv, - __DRIdrawablePrivate * driReadPriv) +GLboolean radeonMakeCurrent(__DRIcontext * driContextPriv, + __DRIdrawable * driDrawPriv, + __DRIdrawable * driReadPriv) { radeonContextPtr radeon; struct radeon_framebuffer *drfb; @@ -735,9 +735,9 @@ GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, readfb = driReadPriv->driverPrivate; if (driContextPriv->driScreenPriv->dri2.enabled) { - radeon_update_renderbuffers(driContextPriv, driDrawPriv); + radeon_update_renderbuffers(driContextPriv, driDrawPriv, GL_FALSE); if (driDrawPriv != driReadPriv) - radeon_update_renderbuffers(driContextPriv, driReadPriv); + radeon_update_renderbuffers(driContextPriv, driReadPriv, GL_FALSE); _mesa_reference_renderbuffer(&radeon->state.color.rb, &(radeon_get_renderbuffer(&drfb->base, BUFFER_BACK_LEFT)->base)); _mesa_reference_renderbuffer(&radeon->state.depth.rb, diff --git a/radeon/radeon_common_context.h b/radeon/radeon_common_context.h index 6298748..d1a24e2 100644 --- a/radeon/radeon_common_context.h +++ b/radeon/radeon_common_context.h @@ -92,7 +92,7 @@ struct radeon_renderbuffer GLuint pf_pending; /**< sequence number of pending flip */ GLuint vbl_pending; /**< vblank sequence number of pending flip */ - __DRIdrawablePrivate *dPriv; + __DRIdrawable *dPriv; }; struct radeon_framebuffer @@ -328,6 +328,7 @@ struct radeon_swtcl_info { GLuint vertex_attr_count; GLuint emit_prediction; + struct radeon_bo *bo; }; #define RADEON_MAX_AOS_ARRAYS 16 @@ -380,8 +381,8 @@ struct radeon_store { }; struct radeon_dri_mirror { - __DRIcontextPrivate *context; /* DRI context */ - __DRIscreenPrivate *screen; /* DRI screen */ + __DRIcontext *context; /* DRI context */ + __DRIscreen *screen; /* DRI screen */ drm_context_t hwContext; drm_hw_lock_t *hwLock; @@ -517,17 +518,38 @@ struct radeon_context { void (*free_context)(GLcontext *ctx); void (*emit_query_finish)(radeonContextPtr radeon); void (*update_scissor)(GLcontext *ctx); + unsigned (*check_blit)(gl_format mesa_format); + unsigned (*blit)(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned reg_width, + unsigned reg_height, + unsigned flip_y); } vtbl; }; #define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx)) -static inline __DRIdrawablePrivate* radeon_get_drawable(radeonContextPtr radeon) +static inline __DRIdrawable* radeon_get_drawable(radeonContextPtr radeon) { return radeon->dri.context->driDrawablePriv; } -static inline __DRIdrawablePrivate* radeon_get_readable(radeonContextPtr radeon) +static inline __DRIdrawable* radeon_get_readable(radeonContextPtr radeon) { return radeon->dri.context->driReadablePriv; } @@ -580,15 +602,16 @@ static INLINE uint32_t radeonPackFloat24(float f) GLboolean radeonInitContext(radeonContextPtr radeon, struct dd_function_table* functions, const __GLcontextModes * glVisual, - __DRIcontextPrivate * driContextPriv, + __DRIcontext * driContextPriv, void *sharedContextPrivate); void radeonCleanupContext(radeonContextPtr radeon); -GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv); -void radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable); -GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, - __DRIdrawablePrivate * driDrawPriv, - __DRIdrawablePrivate * driReadPriv); -extern void radeonDestroyContext(__DRIcontextPrivate * driContextPriv); +GLboolean radeonUnbindContext(__DRIcontext * driContextPriv); +void radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable, + GLboolean front_only); +GLboolean radeonMakeCurrent(__DRIcontext * driContextPriv, + __DRIdrawable * driDrawPriv, + __DRIdrawable * driReadPriv); +extern void radeonDestroyContext(__DRIcontext * driContextPriv); #endif diff --git a/radeon/radeon_context.c b/radeon/radeon_context.c index 5e700be..878a453 100644 --- a/radeon/radeon_context.c +++ b/radeon/radeon_context.c @@ -39,10 +39,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/context.h" #include "main/simple_list.h" #include "main/imports.h" -#include "main/matrix.h" #include "main/extensions.h" -#include "main/framebuffer.h" -#include "main/state.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" @@ -61,8 +58,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_tex.h" #include "radeon_swtcl.h" #include "radeon_tcl.h" -#include "radeon_maos.h" #include "radeon_queryobj.h" +#include "radeon_blit.h" #define need_GL_ARB_occlusion_query #define need_GL_EXT_blend_minmax @@ -73,7 +70,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define DRIVER_DATE "20061018" -#include "vblank.h" #include "utils.h" #include "xmlpool.h" /* for symbolic values of enum-type options */ @@ -202,16 +198,18 @@ static void r100_init_vtbl(radeonContextPtr radeon) radeon->vtbl.fallback = radeonFallback; radeon->vtbl.free_context = r100_vtbl_free_context; radeon->vtbl.emit_query_finish = r100_emit_query_finish; + radeon->vtbl.check_blit = r100_check_blit; + radeon->vtbl.blit = r100_blit; } /* Create the device specific context. */ GLboolean r100CreateContext( const __GLcontextModes *glVisual, - __DRIcontextPrivate *driContextPriv, + __DRIcontext *driContextPriv, void *sharedContextPrivate) { - __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; + __DRIscreen *sPriv = driContextPriv->driScreenPriv; radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private); struct dd_function_table functions; r100ContextPtr rmesa; @@ -228,6 +226,7 @@ r100CreateContext( const __GLcontextModes *glVisual, if ( !rmesa ) return GL_FALSE; + rmesa->radeon.radeonScreen = screen; r100_init_vtbl(&rmesa->radeon); /* init exp fog table data */ @@ -257,7 +256,7 @@ r100CreateContext( const __GLcontextModes *glVisual, * (the texture functions are especially important) */ _mesa_init_driver_functions( &functions ); - radeonInitTextureFuncs( &functions ); + radeonInitTextureFuncs( &rmesa->radeon, &functions ); radeonInitQueryObjFunctions(&functions); if (!radeonInitContext(&rmesa->radeon, &functions, @@ -281,6 +280,7 @@ r100CreateContext( const __GLcontextModes *glVisual, "texture_units"); ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits; ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits; + ctx->Const.MaxCombinedTextureImageUnits = ctx->Const.MaxTextureUnits; i = driQueryOptioni( &rmesa->radeon.optionCache, "allow_large_textures"); @@ -318,6 +318,8 @@ r100CreateContext( const __GLcontextModes *glVisual, rmesa->boxes = 0; ctx->Const.MaxDrawBuffers = 1; + ctx->Const.MaxColorAttachments = 1; + ctx->Const.MaxRenderbufferSize = 2048; _mesa_set_mvp_with_dp4( ctx, GL_TRUE ); diff --git a/radeon/radeon_context.h b/radeon/radeon_context.h index 12ab33a..d84760b 100644 --- a/radeon/radeon_context.h +++ b/radeon/radeon_context.h @@ -451,9 +451,8 @@ struct r100_context { #define RADEON_OLD_PACKETS 1 extern GLboolean r100CreateContext( const __GLcontextModes *glVisual, - __DRIcontextPrivate *driContextPriv, + __DRIcontext *driContextPriv, void *sharedContextPrivate); - #endif /* __RADEON_CONTEXT_H__ */ diff --git a/radeon/radeon_cs_legacy.c b/radeon/radeon_cs_legacy.c index 45b608a..cc951a1 100644 --- a/radeon/radeon_cs_legacy.c +++ b/radeon/radeon_cs_legacy.c @@ -180,9 +180,8 @@ static int cs_begin(struct radeon_cs_int *cs, if (cs->cdw + ndw > cs->ndw) { uint32_t tmp, *ptr; - int num = (ndw > 0x3FF) ? ndw : 0x3FF; - tmp = (cs->cdw + 1 + num) & (~num); + tmp = (cs->cdw + ndw + 0x3ff) & (~0x3ff); ptr = (uint32_t*)realloc(cs->packets, 4 * tmp); if (ptr == NULL) { return -ENOMEM; diff --git a/radeon/radeon_debug.h b/radeon/radeon_debug.h index 26da31c..ef8b967 100644 --- a/radeon/radeon_debug.h +++ b/radeon/radeon_debug.h @@ -47,7 +47,11 @@ typedef enum radeon_debug_levels { * errors. */ #ifndef RADEON_DEBUG_LEVEL -#define RADEON_DEBUG_LEVEL RADEON_VERBOSE +# ifdef DEBUG +# define RADEON_DEBUG_LEVEL RADEON_TRACE +# else +# define RADEON_DEBUG_LEVEL RADEON_VERBOSE +# endif #endif typedef enum radeon_debug_types { diff --git a/radeon/radeon_dma.c b/radeon/radeon_dma.c index 232972d..22499bc 100644 --- a/radeon/radeon_dma.c +++ b/radeon/radeon_dma.c @@ -151,6 +151,7 @@ void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, aos->components = size; aos->count = count; + radeon_bo_map(aos->bo, 1); out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); switch (size) { case 1: radeonEmitVec4(out, data, stride, count); break; @@ -161,6 +162,7 @@ void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, assert(0); break; } + radeon_bo_unmap(aos->bo); } void radeon_init_dma(radeonContextPtr rmesa) @@ -182,8 +184,6 @@ void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n", __FUNCTION__, size, rmesa->dma.minimum_size); - - /* unmap old reserved bo */ if (!is_empty_list(&rmesa->dma.reserved)) radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo); @@ -213,7 +213,7 @@ again_alloc: rmesa->dma.current_used = 0; rmesa->dma.current_vertexptr = 0; - + if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs, first_elem(&rmesa->dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0)) @@ -223,7 +223,6 @@ again_alloc: /* Cmd buff have been flushed in radeon_revalidate_bos */ goto again_alloc; } - radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1); } @@ -281,7 +280,6 @@ void radeonFreeDmaRegions(radeonContextPtr rmesa) foreach_s(dma_bo, temp, &rmesa->dma.reserved) { remove_from_list(dma_bo); - radeon_bo_unmap(dma_bo->bo); radeon_bo_unref(dma_bo->bo); FREE(dma_bo); } @@ -337,6 +335,10 @@ void radeonReleaseDmaRegions(radeonContextPtr rmesa) /* request updated cs processing information from kernel */ legacy_track_pending(rmesa->radeonScreen->bom, 0); } + + if (!is_empty_list(&rmesa->dma.reserved)) + radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo); + /* move waiting bos to free list. wait list provides gpu time to handle data before reuse */ foreach_s(dma_bo, temp, &rmesa->dma.wait) { @@ -354,16 +356,16 @@ void radeonReleaseDmaRegions(radeonContextPtr rmesa) FREE(dma_bo); continue; } - if (!radeon_bo_is_idle(dma_bo->bo)) + if (!radeon_bo_is_idle(dma_bo->bo)) { + if (rmesa->radeonScreen->driScreen->dri2.enabled) + break; continue; + } remove_from_list(dma_bo); dma_bo->expire_counter = expire_at; insert_at_tail(&rmesa->dma.free, dma_bo); } - /* unmap the last dma region */ - if (!is_empty_list(&rmesa->dma.reserved)) - radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo); /* move reserved to wait list */ foreach_s(dma_bo, temp, &rmesa->dma.reserved) { /* free objects that are too small to be used because of large request */ @@ -396,12 +398,13 @@ void rcommon_flush_last_swtcl_prim( GLcontext *ctx ) { radeonContextPtr rmesa = RADEON_CONTEXT(ctx); struct radeon_dma *dma = &rmesa->dma; - if (RADEON_DEBUG & RADEON_IOCTL) fprintf(stderr, "%s\n", __FUNCTION__); dma->flush = NULL; + radeon_bo_unmap(rmesa->swtcl.bo); + if (!is_empty_list(&dma->reserved)) { GLuint current_offset = dma->current_used; @@ -416,6 +419,8 @@ void rcommon_flush_last_swtcl_prim( GLcontext *ctx ) } rmesa->swtcl.numverts = 0; } + radeon_bo_unref(rmesa->swtcl.bo); + rmesa->swtcl.bo = NULL; } /* Alloc space in the current dma region. */ @@ -426,6 +431,7 @@ rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize ) void *head; if (RADEON_DEBUG & RADEON_IOCTL) fprintf(stderr, "%s\n", __FUNCTION__); + if(is_empty_list(&rmesa->dma.reserved) ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) { if (rmesa->dma.flush) { @@ -449,7 +455,13 @@ rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize ) rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == rmesa->dma.current_vertexptr ); - head = (first_elem(&rmesa->dma.reserved)->bo->ptr + rmesa->dma.current_vertexptr); + if (!rmesa->swtcl.bo) { + rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo; + radeon_bo_ref(rmesa->swtcl.bo); + radeon_bo_map(rmesa->swtcl.bo, 1); + } + + head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr); rmesa->dma.current_vertexptr += bytes; rmesa->swtcl.numverts += nverts; return head; diff --git a/radeon/radeon_fbo.c b/radeon/radeon_fbo.c index fc21069..46664a1 100644 --- a/radeon/radeon_fbo.c +++ b/radeon/radeon_fbo.c @@ -29,6 +29,7 @@ #include "main/imports.h" #include "main/macros.h" #include "main/mtypes.h" +#include "main/enums.h" #include "main/fbobject.h" #include "main/framebuffer.h" #include "main/renderbuffer.h" @@ -42,7 +43,7 @@ #define FILE_DEBUG_FLAG RADEON_TEXTURE #define DBG(...) do { \ if (RADEON_DEBUG & FILE_DEBUG_FLAG) \ - _mesa_printf(__VA_ARGS__); \ + printf(__VA_ARGS__); \ } while(0) static struct gl_framebuffer * @@ -56,18 +57,26 @@ radeon_delete_renderbuffer(struct gl_renderbuffer *rb) { struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb); + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s(rb %p, rrb %p) \n", + __func__, rb, rrb); + ASSERT(rrb); if (rrb && rrb->bo) { radeon_bo_unref(rrb->bo); } - _mesa_free(rrb); + free(rrb); } static void * radeon_get_pointer(GLcontext *ctx, struct gl_renderbuffer *rb, GLint x, GLint y) { + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s(%p, rb %p) \n", + __func__, ctx, rb); + return NULL; } @@ -85,6 +94,10 @@ radeon_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, GLboolean software_buffer = GL_FALSE; int cpp; + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s(%p, rb %p) \n", + __func__, ctx, rb); + ASSERT(rb->Name != 0); switch (internalFormat) { case GL_R3_G3_B2: @@ -166,8 +179,9 @@ radeon_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, uint32_t size; uint32_t pitch = ((cpp * width + 63) & ~63) / cpp; - fprintf(stderr,"Allocating %d x %d radeon RBO (pitch %d)\n", width, - height, pitch); + if (RADEON_DEBUG & RADEON_MEMORY) + fprintf(stderr,"Allocating %d x %d radeon RBO (pitch %d)\n", width, + height, pitch); size = pitch * height * cpp; rrb->pitch = pitch * cpp; @@ -199,6 +213,10 @@ radeon_alloc_window_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->Width = width; rb->Height = height; rb->InternalFormat = internalFormat; + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s(%p, rb %p) \n", + __func__, ctx, rb); + return GL_TRUE; } @@ -211,6 +229,10 @@ radeon_resize_buffers(GLcontext *ctx, struct gl_framebuffer *fb, struct radeon_framebuffer *radeon_fb = (struct radeon_framebuffer*)fb; int i; + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s(%p, fb %p) \n", + __func__, ctx, fb); + _mesa_resize_framebuffer(ctx, fb, width, height); fb->Initialized = GL_TRUE; /* XXX remove someday */ @@ -246,11 +268,16 @@ radeon_nop_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, * Not used for user-created renderbuffers. */ struct radeon_renderbuffer * -radeon_create_renderbuffer(gl_format format, __DRIdrawablePrivate *driDrawPriv) +radeon_create_renderbuffer(gl_format format, __DRIdrawable *driDrawPriv) { struct radeon_renderbuffer *rrb; rrb = CALLOC_STRUCT(radeon_renderbuffer); + + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s( rrb %p ) \n", + __func__, rrb); + if (!rrb) return NULL; @@ -330,6 +357,11 @@ radeon_new_renderbuffer(GLcontext * ctx, GLuint name) struct radeon_renderbuffer *rrb; rrb = CALLOC_STRUCT(radeon_renderbuffer); + + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s(%p, rrb %p) \n", + __func__, ctx, rrb); + if (!rrb) return NULL; @@ -347,6 +379,11 @@ static void radeon_bind_framebuffer(GLcontext * ctx, GLenum target, struct gl_framebuffer *fb, struct gl_framebuffer *fbread) { + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s(%p, fb %p, target %s) \n", + __func__, ctx, fb, + _mesa_lookup_enum_by_nr(target)); + if (target == GL_FRAMEBUFFER_EXT || target == GL_DRAW_FRAMEBUFFER_EXT) { radeon_draw_buffer(ctx, fb); } @@ -364,6 +401,10 @@ radeon_framebuffer_renderbuffer(GLcontext * ctx, if (ctx->Driver.Flush) ctx->Driver.Flush(ctx); /* +r6/r7 */ + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s(%p, fb %p, rb %p) \n", + __func__, ctx, fb, rb); + _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb); radeon_draw_buffer(ctx, fb); } @@ -382,6 +423,10 @@ radeon_update_wrapper(GLcontext *ctx, struct radeon_renderbuffer *rrb, int retry = 0; gl_format texFormat; + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s(%p, rrb %p, texImage %p) \n", + __func__, ctx, rrb, texImage); + restart: if (texImage->TexFormat == _dri_texformat_argb8888) { rrb->base.DataType = GL_UNSIGNED_BYTE; @@ -452,6 +497,11 @@ radeon_wrap_texture(GLcontext * ctx, struct gl_texture_image *texImage) /* make an radeon_renderbuffer to wrap the texture image */ rrb = CALLOC_STRUCT(radeon_renderbuffer); + + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s(%p, rrb %p, texImage %p) \n", + __func__, ctx, rrb, texImage); + if (!rrb) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glFramebufferTexture"); return NULL; @@ -461,7 +511,7 @@ radeon_wrap_texture(GLcontext * ctx, struct gl_texture_image *texImage) rrb->base.ClassID = RADEON_RB_CLASS; if (!radeon_update_wrapper(ctx, rrb, texImage)) { - _mesa_free(rrb); + free(rrb); return NULL; } @@ -479,6 +529,10 @@ radeon_render_texture(GLcontext * ctx, radeon_texture_image *radeon_image; GLuint imageOffset; + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s(%p, fb %p, rrb %p, att %p)\n", + __func__, ctx, fb, rrb, att); + (void) fb; ASSERT(newImage); @@ -510,7 +564,7 @@ radeon_render_texture(GLcontext * ctx, return; } - DBG("Begin render texture tid %x tex=%u w=%d h=%d refcount=%d\n", + DBG("Begin render texture tid %lx tex=%u w=%d h=%d refcount=%d\n", _glthread_GetID(), att->Texture->Name, newImage->Width, newImage->Height, rrb->base.RefCount); @@ -530,10 +584,9 @@ radeon_render_texture(GLcontext * ctx, att->TextureLevel); if (att->Texture->Target == GL_TEXTURE_3D) { - GLuint offsets[6]; - radeon_miptree_depth_offsets(radeon_image->mt, att->TextureLevel, - offsets); - imageOffset += offsets[att->Zoffset]; + imageOffset += radeon_image->mt->levels[att->TextureLevel].rowstride * + radeon_image->mt->levels[att->TextureLevel].height * + att->Zoffset; } /* store that offset in the region, along with the correct pitch for diff --git a/radeon/radeon_ioctl.c b/radeon/radeon_ioctl.c index a0106d0..5ac526c 100644 --- a/radeon/radeon_ioctl.c +++ b/radeon/radeon_ioctl.c @@ -38,18 +38,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include <errno.h> #include "main/attrib.h" -#include "main/enable.h" -#include "main/blend.h" #include "main/bufferobj.h" -#include "main/buffers.h" -#include "main/depth.h" -#include "main/shaders.h" -#include "main/texstate.h" -#include "main/varray.h" -#include "glapi/dispatch.h" #include "swrast/swrast.h" -#include "main/stencil.h" -#include "main/matrix.h" #include "main/glheader.h" #include "main/imports.h" @@ -58,15 +48,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_context.h" #include "radeon_common.h" -#include "radeon_state.h" #include "radeon_ioctl.h" -#include "radeon_tcl.h" -#include "radeon_sanity.h" #define STANDALONE_MMIO -#include "radeon_macros.h" /* for INREG() */ -#include "drirenderbuffer.h" #include "vblank.h" #define RADEON_TIMEOUT 512 @@ -107,6 +92,8 @@ void radeonSetUpAtomList( r100ContextPtr rmesa ) insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i]); for (i = 0; i < 6; ++i) insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i]); + if (rmesa->radeon.radeonScreen->kernel_mm) + insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.stp); insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.eye); insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.grd); insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.fog); @@ -449,12 +436,14 @@ void radeonEmitAOS( r100ContextPtr rmesa, static void radeonKernelClear(GLcontext *ctx, GLuint flags) { r100ContextPtr rmesa = R100_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); + __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); drm_radeon_sarea_t *sarea = rmesa->radeon.sarea; uint32_t clear; GLint ret, i; GLint cx, cy, cw, ch; + radeonEmitState(&rmesa->radeon); + LOCK_HARDWARE( &rmesa->radeon ); /* compute region after locking: */ @@ -570,11 +559,15 @@ static void radeonKernelClear(GLcontext *ctx, GLuint flags) static void radeonClear( GLcontext *ctx, GLbitfield mask ) { r100ContextPtr rmesa = R100_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); + __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLuint flags = 0; GLuint color_mask = 0; GLuint orig_mask = mask; + if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) { + rmesa->radeon.front_buffer_dirty = GL_TRUE; + } + if ( RADEON_DEBUG & RADEON_IOCTL ) { fprintf( stderr, "radeonClear\n"); } diff --git a/radeon/radeon_lighting.c b/radeon/radeon_lighting.c deleted file mode 100644 index ba444f2..0000000 --- a/radeon/radeon_lighting.c +++ /dev/null @@ -1,681 +0,0 @@ -/* - * Copyright 2000, 2001 VA Linux Systems Inc., Fremont, California. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Gareth Hughes <gareth@valinux.com> - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "main/glheader.h" -#include "main/imports.h" -#include "api_arrayelt.h" -/* #include "mmath.h" */ -#include "main/enums.h" -#include "colormac.h" - - -#include "radeon_context.h" -#include "radeon_ioctl.h" -#include "radeon_state.h" -#include "radeon_tcl.h" -#include "radeon_tex.h" -#include "radeon_vtxfmt.h" - - - -/* ============================================================= - * Materials - */ - - -/* Update on colormaterial, material emmissive/ambient, - * lightmodel.globalambient - */ -void update_global_ambient( GLcontext *ctx ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - float *fcmd = (float *)RADEON_DB_STATE( glt ); - - /* Need to do more if both emmissive & ambient are PREMULT: - */ - if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] & - ((3 << RADEON_EMISSIVE_SOURCE_SHIFT) | - (3 << RADEON_AMBIENT_SOURCE_SHIFT))) == 0) - { - COPY_3V( &fcmd[GLT_RED], - ctx->Light.Material[0].Emission); - ACC_SCALE_3V( &fcmd[GLT_RED], - ctx->Light.Model.Ambient, - ctx->Light.Material[0].Ambient); - } - else - { - COPY_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient ); - } - - RADEON_DB_STATECHANGE(rmesa, &rmesa->hw.glt); -} - -/* Update on change to - * - light[p].colors - * - light[p].enabled - * - material, - * - colormaterial enabled - * - colormaterial bitmask - */ -void update_light_colors( GLcontext *ctx, GLuint p ) -{ - struct gl_light *l = &ctx->Light.Light[p]; - -/* fprintf(stderr, "%s\n", __FUNCTION__); */ - - if (l->Enabled) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - float *fcmd = (float *)RADEON_DB_STATE( lit[p] ); - GLuint bitmask = ctx->Light.ColorMaterialBitmask; - struct gl_material *mat = &ctx->Light.Material[0]; - - COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient ); - COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse ); - COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular ); - - if (!ctx->Light.ColorMaterialEnabled) - bitmask = 0; - - if ((bitmask & FRONT_AMBIENT_BIT) == 0) - SELF_SCALE_3V( &fcmd[LIT_AMBIENT_RED], mat->Ambient ); - - if ((bitmask & FRONT_DIFFUSE_BIT) == 0) - SELF_SCALE_3V( &fcmd[LIT_DIFFUSE_RED], mat->Diffuse ); - - if ((bitmask & FRONT_SPECULAR_BIT) == 0) - SELF_SCALE_3V( &fcmd[LIT_SPECULAR_RED], mat->Specular ); - - RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] ); - } -} - -/* Also fallback for asym colormaterial mode in twoside lighting... - */ -void check_twoside_fallback( GLcontext *ctx ) -{ - GLboolean fallback = GL_FALSE; - - if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) { - if (memcmp( &ctx->Light.Material[0], - &ctx->Light.Material[1], - sizeof(struct gl_material)) != 0) - fallback = GL_TRUE; - else if (ctx->Light.ColorMaterialEnabled && - (ctx->Light.ColorMaterialBitmask & BACK_MATERIAL_BITS) != - ((ctx->Light.ColorMaterialBitmask & FRONT_MATERIAL_BITS)<<1)) - fallback = GL_TRUE; - } - - TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_LIGHT_TWOSIDE, fallback ); -} - -void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) -{ - if (ctx->Light.ColorMaterialEnabled) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - GLuint light_model_ctl = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]; - GLuint mask = ctx->Light.ColorMaterialBitmask; - - /* Default to PREMULT: - */ - light_model_ctl &= ~((3 << RADEON_EMISSIVE_SOURCE_SHIFT) | - (3 << RADEON_AMBIENT_SOURCE_SHIFT) | - (3 << RADEON_DIFFUSE_SOURCE_SHIFT) | - (3 << RADEON_SPECULAR_SOURCE_SHIFT)); - - if (mask & FRONT_EMISSION_BIT) { - light_model_ctl |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE << - RADEON_EMISSIVE_SOURCE_SHIFT); - } - - if (mask & FRONT_AMBIENT_BIT) { - light_model_ctl |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE << - RADEON_AMBIENT_SOURCE_SHIFT); - } - - if (mask & FRONT_DIFFUSE_BIT) { - light_model_ctl |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE << - RADEON_DIFFUSE_SOURCE_SHIFT); - } - - if (mask & FRONT_SPECULAR_BIT) { - light_model_ctl |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE << - RADEON_SPECULAR_SOURCE_SHIFT); - } - - if (light_model_ctl != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) { - GLuint p; - - RADEON_STATECHANGE( rmesa, tcl ); - rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = light_model_ctl; - - for (p = 0 ; p < MAX_LIGHTS; p++) - update_light_colors( ctx, p ); - update_global_ambient( ctx ); - } - } - - check_twoside_fallback( ctx ); -} - -void radeonUpdateMaterial( GLcontext *ctx ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( mtl ); - GLuint p; - GLuint mask = ~0; - - if (ctx->Light.ColorMaterialEnabled) - mask &= ~ctx->Light.ColorMaterialBitmask; - - if (RADEON_DEBUG & RADEON_STATE) - fprintf(stderr, "%s\n", __FUNCTION__); - - - if (mask & FRONT_EMISSION_BIT) { - fcmd[MTL_EMMISSIVE_RED] = ctx->Light.Material[0].Emission[0]; - fcmd[MTL_EMMISSIVE_GREEN] = ctx->Light.Material[0].Emission[1]; - fcmd[MTL_EMMISSIVE_BLUE] = ctx->Light.Material[0].Emission[2]; - fcmd[MTL_EMMISSIVE_ALPHA] = ctx->Light.Material[0].Emission[3]; - } - if (mask & FRONT_AMBIENT_BIT) { - fcmd[MTL_AMBIENT_RED] = ctx->Light.Material[0].Ambient[0]; - fcmd[MTL_AMBIENT_GREEN] = ctx->Light.Material[0].Ambient[1]; - fcmd[MTL_AMBIENT_BLUE] = ctx->Light.Material[0].Ambient[2]; - fcmd[MTL_AMBIENT_ALPHA] = ctx->Light.Material[0].Ambient[3]; - } - if (mask & FRONT_DIFFUSE_BIT) { - fcmd[MTL_DIFFUSE_RED] = ctx->Light.Material[0].Diffuse[0]; - fcmd[MTL_DIFFUSE_GREEN] = ctx->Light.Material[0].Diffuse[1]; - fcmd[MTL_DIFFUSE_BLUE] = ctx->Light.Material[0].Diffuse[2]; - fcmd[MTL_DIFFUSE_ALPHA] = ctx->Light.Material[0].Diffuse[3]; - } - if (mask & FRONT_SPECULAR_BIT) { - fcmd[MTL_SPECULAR_RED] = ctx->Light.Material[0].Specular[0]; - fcmd[MTL_SPECULAR_GREEN] = ctx->Light.Material[0].Specular[1]; - fcmd[MTL_SPECULAR_BLUE] = ctx->Light.Material[0].Specular[2]; - fcmd[MTL_SPECULAR_ALPHA] = ctx->Light.Material[0].Specular[3]; - } - if (mask & FRONT_SHININESS_BIT) { - fcmd[MTL_SHININESS] = ctx->Light.Material[0].Shininess; - } - - if (RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mtl )) { - for (p = 0 ; p < MAX_LIGHTS; p++) - update_light_colors( ctx, p ); - - check_twoside_fallback( ctx ); - update_global_ambient( ctx ); - } - else if (RADEON_DEBUG & (RADEON_PRIMS|DEBUG_STATE)) - fprintf(stderr, "%s: Elided noop material call\n", __FUNCTION__); -} - -/* _NEW_LIGHT - * _NEW_MODELVIEW - * _MESA_NEW_NEED_EYE_COORDS - * - * Uses derived state from mesa: - * _VP_inf_norm - * _h_inf_norm - * _Position - * _NormSpotDirection - * _ModelViewInvScale - * _NeedEyeCoords - * _EyeZDir - * - * which are calculated in light.c and are correct for the current - * lighting space (model or eye), hence dependencies on _NEW_MODELVIEW - * and _MESA_NEW_NEED_EYE_COORDS. - */ -void radeonUpdateLighting( GLcontext *ctx ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - - /* Have to check these, or have an automatic shortcircuit mechanism - * to remove noop statechanges. (Or just do a better job on the - * front end). - */ - { - GLuint tmp = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]; - - if (ctx->_NeedEyeCoords) - tmp &= ~RADEON_LIGHT_IN_MODELSPACE; - else - tmp |= RADEON_LIGHT_IN_MODELSPACE; - - - /* Leave this test disabled: (unexplained q3 lockup) (even with - new packets) - */ - if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) - { - RADEON_STATECHANGE( rmesa, tcl ); - rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = tmp; - } - } - - { - GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( eye ); - fcmd[EYE_X] = ctx->_EyeZDir[0]; - fcmd[EYE_Y] = ctx->_EyeZDir[1]; - fcmd[EYE_Z] = - ctx->_EyeZDir[2]; - fcmd[EYE_RESCALE_FACTOR] = ctx->_ModelViewInvScale; - RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.eye ); - } - - -/* RADEON_STATECHANGE( rmesa, glt ); */ - - if (ctx->Light.Enabled) { - GLint p; - for (p = 0 ; p < MAX_LIGHTS; p++) { - if (ctx->Light.Light[p].Enabled) { - struct gl_light *l = &ctx->Light.Light[p]; - GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( lit[p] ); - - if (l->EyePosition[3] == 0.0) { - COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm ); - COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm ); - fcmd[LIT_POSITION_W] = 0; - fcmd[LIT_DIRECTION_W] = 0; - } else { - COPY_4V( &fcmd[LIT_POSITION_X], l->_Position ); - fcmd[LIT_DIRECTION_X] = -l->_NormSpotDirection[0]; - fcmd[LIT_DIRECTION_Y] = -l->_NormSpotDirection[1]; - fcmd[LIT_DIRECTION_Z] = -l->_NormSpotDirection[2]; - fcmd[LIT_DIRECTION_W] = 0; - } - - RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] ); - } - } - } -} - - -void radeonLightfv( GLcontext *ctx, GLenum light, - GLenum pname, const GLfloat *params ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - GLint p = light - GL_LIGHT0; - struct gl_light *l = &ctx->Light.Light[p]; - GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd; - - - switch (pname) { - case GL_AMBIENT: - case GL_DIFFUSE: - case GL_SPECULAR: - update_light_colors( ctx, p ); - break; - - case GL_SPOT_DIRECTION: - /* picked up in update_light */ - break; - - case GL_POSITION: { - /* positions picked up in update_light, but can do flag here */ - GLuint flag = (p&1)? RADEON_LIGHT_1_IS_LOCAL : RADEON_LIGHT_0_IS_LOCAL; - GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2; - - RADEON_STATECHANGE(rmesa, tcl); - if (l->EyePosition[3] != 0.0F) - rmesa->hw.tcl.cmd[idx] |= flag; - else - rmesa->hw.tcl.cmd[idx] &= ~flag; - break; - } - - case GL_SPOT_EXPONENT: - RADEON_STATECHANGE(rmesa, lit[p]); - fcmd[LIT_SPOT_EXPONENT] = params[0]; - break; - - case GL_SPOT_CUTOFF: { - GLuint flag = (p&1) ? RADEON_LIGHT_1_IS_SPOT : RADEON_LIGHT_0_IS_SPOT; - GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2; - - RADEON_STATECHANGE(rmesa, lit[p]); - fcmd[LIT_SPOT_CUTOFF] = l->_CosCutoff; - - RADEON_STATECHANGE(rmesa, tcl); - if (l->SpotCutoff != 180.0F) - rmesa->hw.tcl.cmd[idx] |= flag; - else - rmesa->hw.tcl.cmd[idx] &= ~flag; - break; - } - - case GL_CONSTANT_ATTENUATION: - RADEON_STATECHANGE(rmesa, lit[p]); - fcmd[LIT_ATTEN_CONST] = params[0]; - break; - case GL_LINEAR_ATTENUATION: - RADEON_STATECHANGE(rmesa, lit[p]); - fcmd[LIT_ATTEN_LINEAR] = params[0]; - break; - case GL_QUADRATIC_ATTENUATION: - RADEON_STATECHANGE(rmesa, lit[p]); - fcmd[LIT_ATTEN_QUADRATIC] = params[0]; - break; - default: - return; - } - -} - - - - -void radeonLightModelfv( GLcontext *ctx, GLenum pname, - const GLfloat *param ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - - switch (pname) { - case GL_LIGHT_MODEL_AMBIENT: - update_global_ambient( ctx ); - break; - - case GL_LIGHT_MODEL_LOCAL_VIEWER: - RADEON_STATECHANGE( rmesa, tcl ); - if (ctx->Light.Model.LocalViewer) - rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LOCAL_VIEWER; - else - rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LOCAL_VIEWER; - break; - - case GL_LIGHT_MODEL_TWO_SIDE: - RADEON_STATECHANGE( rmesa, tcl ); - if (ctx->Light.Model.TwoSide) - rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_LIGHT_TWOSIDE; - else - rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_LIGHT_TWOSIDE; - - check_twoside_fallback( ctx ); - -#if _HAVE_SWTNL - if (rmesa->TclFallback) { - radeonChooseRenderState( ctx ); - radeonChooseVertexState( ctx ); - } -#endif - break; - - case GL_LIGHT_MODEL_COLOR_CONTROL: - radeonUpdateSpecular(ctx); - - RADEON_STATECHANGE( rmesa, tcl ); - if (ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR) - rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= - ~RADEON_DIFFUSE_SPECULAR_COMBINE; - else - rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= - RADEON_DIFFUSE_SPECULAR_COMBINE; - break; - - default: - break; - } -} - - -/* ============================================================= - * Fog - */ - - -static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - union { int i; float f; } c, d; - GLchan col[4]; - - c.i = rmesa->hw.fog.cmd[FOG_C]; - d.i = rmesa->hw.fog.cmd[FOG_D]; - - switch (pname) { - case GL_FOG_MODE: - if (!ctx->Fog.Enabled) - return; - RADEON_STATECHANGE(rmesa, tcl); - rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_TCL_FOG_MASK; - switch (ctx->Fog.Mode) { - case GL_LINEAR: - rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_LINEAR; - if (ctx->Fog.Start == ctx->Fog.End) { - c.f = 1.0F; - d.f = 1.0F; - } - else { - c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start); - d.f = 1.0/(ctx->Fog.End-ctx->Fog.Start); - } - break; - case GL_EXP: - rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_EXP; - c.f = 0.0; - d.f = ctx->Fog.Density; - break; - case GL_EXP2: - rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_EXP2; - c.f = 0.0; - d.f = -(ctx->Fog.Density * ctx->Fog.Density); - break; - default: - return; - } - break; - case GL_FOG_DENSITY: - switch (ctx->Fog.Mode) { - case GL_EXP: - c.f = 0.0; - d.f = ctx->Fog.Density; - break; - case GL_EXP2: - c.f = 0.0; - d.f = -(ctx->Fog.Density * ctx->Fog.Density); - break; - default: - break; - } - break; - case GL_FOG_START: - case GL_FOG_END: - if (ctx->Fog.Mode == GL_LINEAR) { - if (ctx->Fog.Start == ctx->Fog.End) { - c.f = 1.0F; - d.f = 1.0F; - } else { - c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start); - d.f = 1.0/(ctx->Fog.End-ctx->Fog.Start); - } - } - break; - case GL_FOG_COLOR: - RADEON_STATECHANGE( rmesa, ctx ); - UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color ); - rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] = - radeonPackColor( 4, col[0], col[1], col[2], 0 ); - break; - case GL_FOG_COORDINATE_SOURCE_EXT: - /* What to do? - */ - break; - default: - return; - } - - if (c.i != rmesa->hw.fog.cmd[FOG_C] || d.i != rmesa->hw.fog.cmd[FOG_D]) { - RADEON_STATECHANGE( rmesa, fog ); - rmesa->hw.fog.cmd[FOG_C] = c.i; - rmesa->hw.fog.cmd[FOG_D] = d.i; - } -} - -/* Examine lighting and texture state to determine if separate specular - * should be enabled. - */ -void radeonUpdateSpecular( GLcontext *ctx ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - GLuint p = rmesa->hw.ctx.cmd[CTX_PP_CNTL]; - - if (NEED_SECONDARY_COLOR(ctx)) { - p |= RADEON_SPECULAR_ENABLE; - } else { - p &= ~RADEON_SPECULAR_ENABLE; - } - - if ( rmesa->hw.ctx.cmd[CTX_PP_CNTL] != p ) { - RADEON_STATECHANGE( rmesa, ctx ); - rmesa->hw.ctx.cmd[CTX_PP_CNTL] = p; - } - - /* Bizzare: have to leave lighting enabled to get fog. - */ - RADEON_STATECHANGE( rmesa, tcl ); - if ((ctx->Light.Enabled && - ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR)) { - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR; - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE; - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC; - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE; - rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE; - } - else if (ctx->Fog.Enabled) { - if (ctx->Light.Enabled) { - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR; - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE; - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC; - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE; - rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE; - } else { - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR; - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_DIFFUSE; - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC; - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE; - rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE; - } - } - else if (ctx->Light.Enabled) { - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_SPECULAR; - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE; - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~RADEON_TCL_VTX_PK_SPEC; - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE; - rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE; - } else if (ctx->Fog.ColorSumEnabled ) { - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_SPECULAR; - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_DIFFUSE; - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC; - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE; - rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LIGHTING_ENABLE; - } else { - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_SPECULAR; - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_DIFFUSE; - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~RADEON_TCL_VTX_PK_SPEC; - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE; - rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LIGHTING_ENABLE; - } - -#if _HAVE_SWTNL - /* Update vertex/render formats - */ - if (rmesa->TclFallback) { - radeonChooseRenderState( ctx ); - radeonChooseVertexState( ctx ); - } -#endif -} - - - -static void radeonLightingSpaceChange( GLcontext *ctx ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - GLboolean tmp; - RADEON_STATECHANGE( rmesa, tcl ); - - if (RADEON_DEBUG & RADEON_STATE) - fprintf(stderr, "%s %d\n", __FUNCTION__, ctx->_NeedEyeCoords); - - if (ctx->_NeedEyeCoords) - tmp = ctx->Transform.RescaleNormals; - else - tmp = !ctx->Transform.RescaleNormals; - - if ( tmp ) { - rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_RESCALE_NORMALS; - } else { - rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_RESCALE_NORMALS; - } -} - -void radeonInitLightStateFuncs( GLcontext *ctx ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - int i; - - ctx->Driver.LightModelfv = radeonLightModelfv; - ctx->Driver.Lightfv = radeonLightfv; - ctx->Driver.Fogfv = radeonFogfv; - ctx->Driver.LightingSpaceChange = radeonLightingSpaceChange; - - for (i = 0 ; i < 8; i++) { - struct gl_light *l = &ctx->Light.Light[i]; - GLenum p = GL_LIGHT0 + i; - *(float *)&(rmesa->hw.lit[i].cmd[LIT_RANGE_CUTOFF]) = FLT_MAX; - - ctx->Driver.Lightfv( ctx, p, GL_AMBIENT, l->Ambient ); - ctx->Driver.Lightfv( ctx, p, GL_DIFFUSE, l->Diffuse ); - ctx->Driver.Lightfv( ctx, p, GL_SPECULAR, l->Specular ); - ctx->Driver.Lightfv( ctx, p, GL_POSITION, 0 ); - ctx->Driver.Lightfv( ctx, p, GL_SPOT_DIRECTION, 0 ); - ctx->Driver.Lightfv( ctx, p, GL_SPOT_EXPONENT, &l->SpotExponent ); - ctx->Driver.Lightfv( ctx, p, GL_SPOT_CUTOFF, &l->SpotCutoff ); - ctx->Driver.Lightfv( ctx, p, GL_CONSTANT_ATTENUATION, - &l->ConstantAttenuation ); - ctx->Driver.Lightfv( ctx, p, GL_LINEAR_ATTENUATION, - &l->LinearAttenuation ); - ctx->Driver.Lightfv( ctx, p, GL_QUADRATIC_ATTENUATION, - &l->QuadraticAttenuation ); - } - - ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_AMBIENT, - ctx->Light.Model.Ambient ); - - ctx->Driver.Fogfv( ctx, GL_FOG_MODE, 0 ); - ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density ); - ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start ); - ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End ); - ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color ); - ctx->Driver.Fogfv( ctx, GL_FOG_COORDINATE_SOURCE_EXT, 0 ); -} diff --git a/radeon/radeon_lock.c b/radeon/radeon_lock.c index 7ad781b..7b6bd36 100644 --- a/radeon/radeon_lock.c +++ b/radeon/radeon_lock.c @@ -46,7 +46,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_screen.h" #include "radeon_common.h" #include "radeon_lock.h" -#include "drirenderbuffer.h" /* Update the hardware state. This is called if another context has * grabbed the hardware lock, which includes the X server. This @@ -58,9 +57,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ void radeonGetLock(radeonContextPtr rmesa, GLuint flags) { - __DRIdrawablePrivate *const drawable = radeon_get_drawable(rmesa); - __DRIdrawablePrivate *const readable = radeon_get_readable(rmesa); - __DRIscreenPrivate *sPriv = rmesa->dri.screen; + __DRIdrawable *const drawable = radeon_get_drawable(rmesa); + __DRIdrawable *const readable = radeon_get_readable(rmesa); + __DRIscreen *sPriv = rmesa->dri.screen; drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags); diff --git a/radeon/radeon_maos_arrays.c b/radeon/radeon_maos_arrays.c index 08e1c5d..d810e60 100644 --- a/radeon/radeon_maos_arrays.c +++ b/radeon/radeon_maos_arrays.c @@ -76,12 +76,14 @@ static void emit_vecfog(GLcontext *ctx, struct radeon_aos *aos, /* Emit the data */ + radeon_bo_map(aos->bo, 1); out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); for (i = 0; i < count; i++) { out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data ); out++; data += stride; } + radeon_bo_unmap(aos->bo); } static void emit_s0_vec(uint32_t *out, GLvoid *data, int stride, int count) @@ -151,6 +153,7 @@ static void emit_tex_vector(GLcontext *ctx, struct radeon_aos *aos, /* Emit the data */ + radeon_bo_map(aos->bo, 1); out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); switch (size) { case 1: @@ -170,6 +173,7 @@ static void emit_tex_vector(GLcontext *ctx, struct radeon_aos *aos, exit(1); break; } + radeon_bo_unmap(aos->bo); } @@ -196,12 +200,12 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) if (!rmesa->tcl.obj.buf) rcommon_emit_vector( ctx, &(rmesa->tcl.aos[nr]), - (char *)VB->ObjPtr->data, - VB->ObjPtr->size, - VB->ObjPtr->stride, + (char *)VB->AttribPtr[_TNL_ATTRIB_POS]->data, + VB->AttribPtr[_TNL_ATTRIB_POS]->size, + VB->AttribPtr[_TNL_ATTRIB_POS]->stride, count); - switch( VB->ObjPtr->size ) { + switch( VB->AttribPtr[_TNL_ATTRIB_POS]->size ) { case 4: vfmt |= RADEON_CP_VC_FRMT_W0; case 3: vfmt |= RADEON_CP_VC_FRMT_Z; case 2: vfmt |= RADEON_CP_VC_FRMT_XY; @@ -216,9 +220,9 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) if (!rmesa->tcl.norm.buf) rcommon_emit_vector( ctx, &(rmesa->tcl.aos[nr]), - (char *)VB->NormalPtr->data, + (char *)VB->AttribPtr[_TNL_ATTRIB_NORMAL]->data, 3, - VB->NormalPtr->stride, + VB->AttribPtr[_TNL_ATTRIB_NORMAL]->stride, count); vfmt |= RADEON_CP_VC_FRMT_N0; @@ -227,9 +231,9 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) if (inputs & VERT_BIT_COLOR0) { int emitsize; - if (VB->ColorPtr[0]->size == 4 && - (VB->ColorPtr[0]->stride != 0 || - VB->ColorPtr[0]->data[0][3] != 1.0)) { + if (VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size == 4 && + (VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride != 0 || + VB->AttribPtr[_TNL_ATTRIB_COLOR0]->data[0][3] != 1.0)) { vfmt |= RADEON_CP_VC_FRMT_FPCOLOR | RADEON_CP_VC_FRMT_FPALPHA; emitsize = 4; } @@ -242,9 +246,9 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) if (!rmesa->tcl.rgba.buf) rcommon_emit_vector( ctx, &(rmesa->tcl.aos[nr]), - (char *)VB->ColorPtr[0]->data, + (char *)VB->AttribPtr[_TNL_ATTRIB_COLOR0]->data, emitsize, - VB->ColorPtr[0]->stride, + VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride, count); nr++; @@ -256,9 +260,9 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) rcommon_emit_vector( ctx, &(rmesa->tcl.aos[nr]), - (char *)VB->SecondaryColorPtr[0]->data, + (char *)VB->AttribPtr[_TNL_ATTRIB_COLOR1]->data, 3, - VB->SecondaryColorPtr[0]->stride, + VB->AttribPtr[_TNL_ATTRIB_COLOR1]->stride, count); } @@ -273,8 +277,8 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) if (!rmesa->tcl.fog.buf) emit_vecfog( ctx, &(rmesa->tcl.aos[nr]), - (char *)VB->FogCoordPtr->data, - VB->FogCoordPtr->stride, + (char *)VB->AttribPtr[_TNL_ATTRIB_FOG]->data, + VB->AttribPtr[_TNL_ATTRIB_FOG]->stride, count); vfmt |= RADEON_CP_VC_FRMT_FPFOG; @@ -290,24 +294,24 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) if (!rmesa->tcl.tex[unit].buf) emit_tex_vector( ctx, &(rmesa->tcl.aos[nr]), - (char *)VB->TexCoordPtr[unit]->data, - VB->TexCoordPtr[unit]->size, - VB->TexCoordPtr[unit]->stride, + (char *)VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->data, + VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->size, + VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->stride, count ); nr++; vfmt |= RADEON_ST_BIT(unit); /* assume we need the 3rd coord if texgen is active for r/q OR at least 3 coords are submitted. This may not be 100% correct */ - if (VB->TexCoordPtr[unit]->size >= 3) { + if (VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->size >= 3) { vtx |= RADEON_Q_BIT(unit); vfmt |= RADEON_Q_BIT(unit); } if ( (ctx->Texture.Unit[unit].TexGenEnabled & (R_BIT | Q_BIT)) ) vtx |= RADEON_Q_BIT(unit); - else if ((VB->TexCoordPtr[unit]->size >= 3) && + else if ((VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->size >= 3) && ((ctx->Texture.Unit[unit]._ReallyEnabled & (TEXTURE_CUBE_BIT)) == 0)) { - GLuint swaptexmatcol = (VB->TexCoordPtr[unit]->size - 3); + GLuint swaptexmatcol = (VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->size - 3); if (((rmesa->NeedTexMatrix >> unit) & 1) && (swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1))) radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ; diff --git a/radeon/radeon_maos_vbtmp.h b/radeon/radeon_maos_vbtmp.h index 5157831..d764ccb 100644 --- a/radeon/radeon_maos_vbtmp.h +++ b/radeon/radeon_maos_vbtmp.h @@ -56,18 +56,18 @@ static void TAG(emit)( GLcontext *ctx, radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __FUNCTION__); - coord = (GLuint (*)[4])VB->ObjPtr->data; - coord_stride = VB->ObjPtr->stride; + coord = (GLuint (*)[4])VB->AttribPtr[_TNL_ATTRIB_POS]->data; + coord_stride = VB->AttribPtr[_TNL_ATTRIB_POS]->stride; if (DO_TEX2) { - if (VB->TexCoordPtr[2]) { + if (VB->AttribPtr[_TNL_ATTRIB_TEX2]) { const GLuint t2 = GET_TEXSOURCE(2); - tc2 = (GLuint (*)[4])VB->TexCoordPtr[t2]->data; - tc2_stride = VB->TexCoordPtr[t2]->stride; - if (DO_PTEX && VB->TexCoordPtr[t2]->size < 3) { + tc2 = (GLuint (*)[4])VB->AttribPtr[_TNL_ATTRIB_TEX0 + t2]->data; + tc2_stride = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t2]->stride; + if (DO_PTEX && VB->AttribPtr[_TNL_ATTRIB_TEX0 + t2]->size < 3) { fill_tex |= (1<<2); } - else if (DO_PTEX && VB->TexCoordPtr[t2]->size < 4) { + else if (DO_PTEX && VB->AttribPtr[_TNL_ATTRIB_TEX0 + t2]->size < 4) { rqcoordsnoswap |= (1<<2); } } else { @@ -77,14 +77,14 @@ static void TAG(emit)( GLcontext *ctx, } if (DO_TEX1) { - if (VB->TexCoordPtr[1]) { + if (VB->AttribPtr[_TNL_ATTRIB_TEX1]) { const GLuint t1 = GET_TEXSOURCE(1); - tc1 = (GLuint (*)[4])VB->TexCoordPtr[t1]->data; - tc1_stride = VB->TexCoordPtr[t1]->stride; - if (DO_PTEX && VB->TexCoordPtr[t1]->size < 3) { + tc1 = (GLuint (*)[4])VB->AttribPtr[_TNL_ATTRIB_TEX0 + t1]->data; + tc1_stride = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t1]->stride; + if (DO_PTEX && VB->AttribPtr[_TNL_ATTRIB_TEX0 + t1]->size < 3) { fill_tex |= (1<<1); } - else if (DO_PTEX && VB->TexCoordPtr[t1]->size < 4) { + else if (DO_PTEX && VB->AttribPtr[_TNL_ATTRIB_TEX0 + t1]->size < 4) { rqcoordsnoswap |= (1<<1); } } else { @@ -94,14 +94,14 @@ static void TAG(emit)( GLcontext *ctx, } if (DO_TEX0) { - if (VB->TexCoordPtr[0]) { + if (VB->AttribPtr[_TNL_ATTRIB_TEX0]) { const GLuint t0 = GET_TEXSOURCE(0); - tc0_stride = VB->TexCoordPtr[t0]->stride; - tc0 = (GLuint (*)[4])VB->TexCoordPtr[t0]->data; - if (DO_PTEX && VB->TexCoordPtr[t0]->size < 3) { + tc0_stride = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t0]->stride; + tc0 = (GLuint (*)[4])VB->AttribPtr[_TNL_ATTRIB_TEX0 + t0]->data; + if (DO_PTEX && VB->AttribPtr[_TNL_ATTRIB_TEX0 + t0]->size < 3) { fill_tex |= (1<<0); } - else if (DO_PTEX && VB->TexCoordPtr[t0]->size < 4) { + else if (DO_PTEX && VB->AttribPtr[_TNL_ATTRIB_TEX0 + t0]->size < 4) { rqcoordsnoswap |= (1<<0); } } else { @@ -112,9 +112,9 @@ static void TAG(emit)( GLcontext *ctx, } if (DO_NORM) { - if (VB->NormalPtr) { - norm_stride = VB->NormalPtr->stride; - norm = (GLuint (*)[4])VB->NormalPtr->data; + if (VB->AttribPtr[_TNL_ATTRIB_NORMAL]) { + norm_stride = VB->AttribPtr[_TNL_ATTRIB_NORMAL]->stride; + norm = (GLuint (*)[4])VB->AttribPtr[_TNL_ATTRIB_NORMAL]->data; } else { norm_stride = 0; norm = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_NORMAL]; @@ -122,9 +122,9 @@ static void TAG(emit)( GLcontext *ctx, } if (DO_RGBA) { - if (VB->ColorPtr[0]) { - col = VB->ColorPtr[0]->data; - col_stride = VB->ColorPtr[0]->stride; + if (VB->AttribPtr[_TNL_ATTRIB_COLOR0]) { + col = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->data; + col_stride = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride; } else { col = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_COLOR0]; col_stride = 0; @@ -132,9 +132,9 @@ static void TAG(emit)( GLcontext *ctx, } if (DO_SPEC_OR_FOG) { - if (VB->SecondaryColorPtr[0]) { - spec = VB->SecondaryColorPtr[0]->data; - spec_stride = VB->SecondaryColorPtr[0]->stride; + if (VB->AttribPtr[_TNL_ATTRIB_COLOR1]) { + spec = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->data; + spec_stride = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->stride; } else { spec = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_COLOR1]; spec_stride = 0; @@ -142,9 +142,9 @@ static void TAG(emit)( GLcontext *ctx, } if (DO_SPEC_OR_FOG) { - if (VB->FogCoordPtr) { - fog = VB->FogCoordPtr->data; - fog_stride = VB->FogCoordPtr->stride; + if (VB->AttribPtr[_TNL_ATTRIB_FOG]) { + fog = VB->AttribPtr[_TNL_ATTRIB_FOG]->data; + fog_stride = VB->AttribPtr[_TNL_ATTRIB_FOG]->stride; } else { fog = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_FOG]; fog_stride = 0; diff --git a/radeon/radeon_maos_verts.c b/radeon/radeon_maos_verts.c index 78ec119..98f96ff 100644 --- a/radeon/radeon_maos_verts.c +++ b/radeon/radeon_maos_verts.c @@ -326,7 +326,7 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) if (1) { req |= RADEON_CP_VC_FRMT_Z; - if (VB->ObjPtr->size == 4) { + if (VB->AttribPtr[_TNL_ATTRIB_POS]->size == 4) { req |= RADEON_CP_VC_FRMT_W0; } } @@ -348,15 +348,15 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) req |= RADEON_ST_BIT(unit); /* assume we need the 3rd coord if texgen is active for r/q OR at least 3 coords are submitted. This may not be 100% correct */ - if (VB->TexCoordPtr[unit]->size >= 3) { + if (VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->size >= 3) { req |= RADEON_Q_BIT(unit); vtx |= RADEON_Q_BIT(unit); } if ( (ctx->Texture.Unit[unit].TexGenEnabled & (R_BIT | Q_BIT)) ) vtx |= RADEON_Q_BIT(unit); - else if ((VB->TexCoordPtr[unit]->size >= 3) && + else if ((VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->size >= 3) && ((ctx->Texture.Unit[unit]._ReallyEnabled & (TEXTURE_CUBE_BIT)) == 0)) { - GLuint swaptexmatcol = (VB->TexCoordPtr[unit]->size - 3); + GLuint swaptexmatcol = (VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->size - 3); if (((rmesa->NeedTexMatrix >> unit) & 1) && (swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1))) radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ; @@ -390,19 +390,19 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) * this, add more vertex code (for obj-2, obj-3) or preferably move * to maos. */ - if (VB->ObjPtr->size < 3 || - (VB->ObjPtr->size == 3 && + if (VB->AttribPtr[_TNL_ATTRIB_POS]->size < 3 || + (VB->AttribPtr[_TNL_ATTRIB_POS]->size == 3 && (setup_tab[i].vertex_format & RADEON_CP_VC_FRMT_W0))) { _math_trans_4f( rmesa->tcl.ObjClean.data, - VB->ObjPtr->data, - VB->ObjPtr->stride, + VB->AttribPtr[_TNL_ATTRIB_POS]->data, + VB->AttribPtr[_TNL_ATTRIB_POS]->stride, GL_FLOAT, - VB->ObjPtr->size, + VB->AttribPtr[_TNL_ATTRIB_POS]->size, 0, VB->Count ); - switch (VB->ObjPtr->size) { + switch (VB->AttribPtr[_TNL_ATTRIB_POS]->size) { case 1: _mesa_vector4f_clean_elem(&rmesa->tcl.ObjClean, VB->Count, 1); case 2: @@ -416,14 +416,14 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) break; } - VB->ObjPtr = &rmesa->tcl.ObjClean; + VB->AttribPtr[_TNL_ATTRIB_POS] = &rmesa->tcl.ObjClean; } - + radeon_bo_map(rmesa->radeon.tcl.aos[0].bo, 1); setup_tab[i].emit( ctx, 0, VB->Count, rmesa->radeon.tcl.aos[0].bo->ptr + rmesa->radeon.tcl.aos[0].offset); - + radeon_bo_unmap(rmesa->radeon.tcl.aos[0].bo); // rmesa->radeon.tcl.aos[0].size = setup_tab[i].vertex_size; rmesa->radeon.tcl.aos[0].stride = setup_tab[i].vertex_size; rmesa->tcl.vertex_format = setup_tab[i].vertex_format; diff --git a/radeon/radeon_mipmap_tree.c b/radeon/radeon_mipmap_tree.c index 5a346c5..e0e271b 100644 --- a/radeon/radeon_mipmap_tree.c +++ b/radeon/radeon_mipmap_tree.c @@ -32,9 +32,9 @@ #include <unistd.h> #include "main/simple_list.h" -#include "main/texcompress.h" #include "main/teximage.h" #include "main/texobj.h" +#include "main/enums.h" #include "radeon_texture.h" static unsigned get_aligned_compressed_row_stride( @@ -42,18 +42,31 @@ static unsigned get_aligned_compressed_row_stride( unsigned width, unsigned minStride) { - const unsigned blockSize = _mesa_get_format_bytes(format); - unsigned blockWidth, blockHeight, numXBlocks; + const unsigned blockBytes = _mesa_get_format_bytes(format); + unsigned blockWidth, blockHeight; + unsigned stride; _mesa_get_format_block_size(format, &blockWidth, &blockHeight); - numXBlocks = (width + blockWidth - 1) / blockWidth; - - while (numXBlocks * blockSize < minStride) - { - ++numXBlocks; - } - return numXBlocks * blockSize; + /* Count number of blocks required to store the given width. + * And then multiple it with bytes required to store a block. + */ + stride = (width + blockWidth - 1) / blockWidth * blockBytes; + + /* Round the given minimum stride to the next full blocksize. + * (minStride + blockBytes - 1) / blockBytes * blockBytes + */ + if ( stride < minStride ) + stride = (minStride + blockBytes - 1) / blockBytes * blockBytes; + + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s width %u, minStride %u, block(bytes %u, width %u):" + "stride %u\n", + __func__, width, minStride, + blockBytes, blockWidth, + stride); + + return stride; } static unsigned get_compressed_image_size( @@ -80,25 +93,28 @@ static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree { radeon_mipmap_level *lvl = &mt->levels[level]; uint32_t row_align; + GLuint height; + + height = _mesa_next_pow_two_32(lvl->height); /* Find image size in bytes */ if (_mesa_is_format_compressed(mt->mesaFormat)) { lvl->rowstride = get_aligned_compressed_row_stride(mt->mesaFormat, lvl->width, rmesa->texture_compressed_row_align); - lvl->size = get_compressed_image_size(mt->mesaFormat, lvl->rowstride, lvl->height); + lvl->size = get_compressed_image_size(mt->mesaFormat, lvl->rowstride, height); } else if (mt->target == GL_TEXTURE_RECTANGLE_NV) { row_align = rmesa->texture_rect_row_align - 1; lvl->rowstride = (_mesa_format_row_stride(mt->mesaFormat, lvl->width) + row_align) & ~row_align; - lvl->size = lvl->rowstride * lvl->height; + lvl->size = lvl->rowstride * height; } else if (mt->tilebits & RADEON_TXO_MICRO_TILE) { /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, * though the actual offset may be different (if texture is less than * 32 bytes width) to the untiled case */ lvl->rowstride = (_mesa_format_row_stride(mt->mesaFormat, lvl->width) * 2 + 31) & ~31; - lvl->size = lvl->rowstride * ((lvl->height + 1) / 2) * lvl->depth; + lvl->size = lvl->rowstride * ((height + 1) / 2) * lvl->depth; } else { row_align = rmesa->texture_row_align - 1; lvl->rowstride = (_mesa_format_row_stride(mt->mesaFormat, lvl->width) + row_align) & ~row_align; - lvl->size = lvl->rowstride * lvl->height * lvl->depth; + lvl->size = lvl->rowstride * height * lvl->depth; } assert(lvl->size > 0); @@ -107,10 +123,11 @@ static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree lvl->faces[face].offset = *curOffset; *curOffset += lvl->size; - if (RADEON_DEBUG & RADEON_TEXTURE) - fprintf(stderr, - "level %d, face %d: rs:%d %dx%d at %d\n", - level, face, lvl->rowstride, lvl->width, lvl->height, lvl->faces[face].offset); + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s(%p) level %d, face %d: rs:%d %dx%d at %d\n", + __func__, rmesa, + level, face, + lvl->rowstride, lvl->width, height, lvl->faces[face].offset); } static GLuint minify(GLuint size, GLuint levels) @@ -142,6 +159,10 @@ static void calculate_miptree_layout_r100(radeonContextPtr rmesa, radeon_mipmap_ /* Note the required size in memory */ mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; + + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s(%p, %p) total size %d\n", + __func__, rmesa, mt, mt->totalsize); } static void calculate_miptree_layout_r300(radeonContextPtr rmesa, radeon_mipmap_tree *mt) @@ -161,10 +182,20 @@ static void calculate_miptree_layout_r300(radeonContextPtr rmesa, radeon_mipmap_ for(face = 0; face < mt->faces; face++) compute_tex_image_offset(rmesa, mt, face, level, &curOffset); + /* r600 cube levels seems to be aligned to 8 faces but + * we have separate register for 1'st level offset so add + * 2 image alignment after 1'st mip level */ + if(rmesa->radeonScreen->chip_family >= CHIP_FAMILY_R600 && + mt->target == GL_TEXTURE_CUBE_MAP && level >= 1) + curOffset += 2 * mt->levels[level].size; } /* Note the required size in memory */ mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; + + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s(%p, %p) total size %d\n", + __func__, rmesa, mt, mt->totalsize); } /** @@ -176,6 +207,10 @@ static radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, { radeon_mipmap_tree *mt = CALLOC_STRUCT(_radeon_mipmap_tree); + radeon_print(RADEON_TEXTURE, RADEON_NORMAL, + "%s(%p) new tree is %p.\n", + __func__, rmesa, mt); + mt->mesaFormat = mesaFormat; mt->refcount = 1; mt->target = target; @@ -266,6 +301,12 @@ static void calculate_min_max_lod(struct gl_texture_object *tObj, return; } + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s(%p) target %s, min %d, max %d.\n", + __func__, tObj, + _mesa_lookup_enum_by_nr(tObj->Target), + minLod, maxLod); + /* save these values */ *pminLod = minLod; *pmaxLod = maxLod; @@ -312,7 +353,7 @@ static GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct g firstImage = texObj->Image[0][texObj->BaseLevel]; numLevels = MIN2(texObj->MaxLevel - texObj->BaseLevel + 1, firstImage->MaxLog2 + 1); - if (RADEON_DEBUG & RADEON_TEXTURE) { + if (radeon_is_debug_enabled(RADEON_TEXTURE,RADEON_TRACE)) { fprintf(stderr, "Checking if miptree %p matches texObj %p\n", mt, texObj); fprintf(stderr, "target %d vs %d\n", mt->target, texObj->Target); fprintf(stderr, "format %d vs %d\n", mt->mesaFormat, firstImage->TexFormat); @@ -353,8 +394,12 @@ void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t) assert(!t->mt); - if (!texImg) + if (!texImg) { + radeon_warning("%s(%p) No image in given texture object(%p).\n", + __func__, rmesa, t); return; + } + numLevels = MIN2(texObj->MaxLevel - texObj->BaseLevel + 1, texImg->MaxLog2 + 1); @@ -364,25 +409,6 @@ void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t) texImg->Depth, t->tile_bits); } -/* Although we use the image_offset[] array to store relative offsets - * to cube faces, Mesa doesn't know anything about this and expects - * each cube face to be treated as a separate image. - * - * These functions present that view to mesa: - */ -void -radeon_miptree_depth_offsets(radeon_mipmap_tree *mt, GLuint level, GLuint *offsets) -{ - if (mt->target != GL_TEXTURE_3D || mt->faces == 1) { - offsets[0] = 0; - } else { - int i; - for (i = 0; i < 6; i++) { - offsets[i] = mt->levels[level].faces[i].offset; - } - } -} - GLuint radeon_miptree_image_offset(radeon_mipmap_tree *mt, GLuint face, GLuint level) @@ -409,6 +435,10 @@ static void migrate_image_to_miptree(radeon_mipmap_tree *mt, assert(dstlvl->height == image->base.Height); assert(dstlvl->depth == image->base.Depth); + radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, + "%s miptree %p, image %p, face %d, level %d.\n", + __func__, mt, image, face, level); + radeon_bo_map(mt->bo, GL_TRUE); dest = mt->bo->ptr + dstlvl->faces[face].offset; @@ -440,6 +470,9 @@ static void migrate_image_to_miptree(radeon_mipmap_tree *mt, /* This condition should be removed, it's here to workaround * a segfault when mapping textures during software fallbacks. */ + radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT, + "%s Trying to map texture in sowftware fallback.\n", + __func__); const uint32_t srcrowstride = _mesa_format_row_stride(image->base.TexFormat, image->base.Width); uint32_t rows = image->base.Height * image->base.Depth; @@ -479,10 +512,8 @@ static radeon_mipmap_tree * get_biggest_matching_miptree(radeonTexObj *texObj, unsigned mtCount = 0; unsigned maxMtIndex = 0; radeon_mipmap_tree *tmp; - unsigned level; - int i; - for (level = firstLevel; level <= lastLevel; ++level) { + for (unsigned level = firstLevel; level <= lastLevel; ++level) { radeon_texture_image *img = get_radeon_texture_image(texObj->base.Image[0][level]); unsigned found = 0; // TODO: why this hack?? @@ -492,7 +523,7 @@ static radeon_mipmap_tree * get_biggest_matching_miptree(radeonTexObj *texObj, if (!img->mt) continue; - for (i = 0; i < mtCount; ++i) { + for (int i = 0; i < mtCount; ++i) { if (mts[i] == img->mt) { found = 1; mtSizes[i] += img->mt->levels[img->mtlevel].size; @@ -508,10 +539,12 @@ static radeon_mipmap_tree * get_biggest_matching_miptree(radeonTexObj *texObj, } if (mtCount == 0) { + free(mtSizes); + free(mts); return NULL; } - for (i = 1; i < mtCount; ++i) { + for (int i = 1; i < mtCount; ++i) { if (mtSizes[i] > mtSizes[maxMtIndex]) { maxMtIndex = i; } @@ -548,9 +581,9 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t calculate_min_max_lod(&t->base, &t->minLod, &t->maxLod); - if (RADEON_DEBUG & RADEON_TEXTURE) - fprintf(stderr, "%s: Validating texture %p now, minLod = %d, maxLod = %d\n", - __FUNCTION__, texObj ,t->minLod, t->maxLod); + radeon_print(RADEON_TEXTURE, RADEON_NORMAL, + "%s: Validating texture %p now, minLod = %d, maxLod = %d\n", + __FUNCTION__, texObj ,t->minLod, t->maxLod); radeon_mipmap_tree *dst_miptree; dst_miptree = get_biggest_matching_miptree(t, t->minLod, t->maxLod); @@ -559,11 +592,13 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t radeon_miptree_unreference(&t->mt); radeon_try_alloc_miptree(rmesa, t); dst_miptree = t->mt; - if (RADEON_DEBUG & RADEON_TEXTURE) { - fprintf(stderr, "%s: No matching miptree found, allocated new one %p\n", __FUNCTION__, t->mt); - } - } else if (RADEON_DEBUG & RADEON_TEXTURE) { - fprintf(stderr, "%s: Using miptree %p\n", __FUNCTION__, t->mt); + radeon_print(RADEON_TEXTURE, RADEON_NORMAL, + "%s: No matching miptree found, allocated new one %p\n", + __FUNCTION__, t->mt); + + } else { + radeon_print(RADEON_TEXTURE, RADEON_NORMAL, + "%s: Using miptree %p\n", __FUNCTION__, t->mt); } const unsigned faces = texObj->Target == GL_TEXTURE_CUBE_MAP ? 6 : 1; @@ -574,22 +609,21 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t for (level = t->minLod; level <= t->maxLod; ++level) { img = get_radeon_texture_image(texObj->Image[face][level]); - if (RADEON_DEBUG & RADEON_TEXTURE) { - fprintf(stderr, "Checking image level %d, face %d, mt %p ... ", level, face, img->mt); - } + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "Checking image level %d, face %d, mt %p ... ", + level, face, img->mt); if (img->mt != dst_miptree) { - if (RADEON_DEBUG & RADEON_TEXTURE) { - fprintf(stderr, "MIGRATING\n"); - } + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "MIGRATING\n"); + struct radeon_bo *src_bo = (img->mt) ? img->mt->bo : img->bo; if (src_bo && radeon_bo_is_referenced_by_cs(src_bo, rmesa->cmdbuf.cs)) { radeon_firevertices(rmesa); } migrate_image_to_miptree(dst_miptree, img, face, level); - } else if (RADEON_DEBUG & RADEON_TEXTURE) { - fprintf(stderr, "OK\n"); - } + } else + radeon_print(RADEON_TEXTURE, RADEON_TRACE, "OK\n"); } } diff --git a/radeon/radeon_mipmap_tree.h b/radeon/radeon_mipmap_tree.h index a10649b..c911688 100644 --- a/radeon/radeon_mipmap_tree.h +++ b/radeon/radeon_mipmap_tree.h @@ -88,7 +88,5 @@ GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt, void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t); GLuint radeon_miptree_image_offset(radeon_mipmap_tree *mt, GLuint face, GLuint level); -void radeon_miptree_depth_offsets(radeon_mipmap_tree *mt, GLuint level, GLuint *offsets); - uint32_t get_base_teximage_offset(radeonTexObj *texObj); #endif /* __RADEON_MIPMAP_TREE_H_ */ diff --git a/radeon/radeon_queryobj.c b/radeon/radeon_queryobj.c index 98117cd..04ce124 100644 --- a/radeon/radeon_queryobj.c +++ b/radeon/radeon_queryobj.c @@ -65,7 +65,7 @@ static void radeonQueryGetResult(GLcontext *ctx, struct gl_query_object *q) } radeon_print(RADEON_STATE, RADEON_TRACE, - "%d start: %lx, end: %lx %ld\n", i, start, end, end - start); + "%d start: %llx, end: %llx %lld\n", i, start, end, end - start); } } else { for (i = 0; i < query->curr_offset/sizeof(uint32_t); ++i) { @@ -81,7 +81,7 @@ static struct gl_query_object * radeonNewQueryObject(GLcontext *ctx, GLuint id) { struct radeon_query_object *query; - query = _mesa_calloc(sizeof(struct radeon_query_object)); + query = calloc(1, sizeof(struct radeon_query_object)); query->Base.Id = id; query->Base.Result = 0; @@ -103,7 +103,7 @@ static void radeonDeleteQuery(GLcontext *ctx, struct gl_query_object *q) radeon_bo_unref(query->bo); } - _mesa_free(query); + free(query); } static void radeonWaitQuery(GLcontext *ctx, struct gl_query_object *q) diff --git a/radeon/radeon_sanity.c b/radeon/radeon_sanity.c index 1ab570f..3e64be8 100644 --- a/radeon/radeon_sanity.c +++ b/radeon/radeon_sanity.c @@ -36,7 +36,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/glheader.h" #include "radeon_context.h" -#include "radeon_ioctl.h" #include "radeon_sanity.h" /* Set this '1' to get more verbiage. diff --git a/radeon/radeon_screen.c b/radeon/radeon_screen.c index be2d836..6415ec1 100644 --- a/radeon/radeon_screen.c +++ b/radeon/radeon_screen.c @@ -47,7 +47,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_macros.h" #include "radeon_screen.h" #include "radeon_common.h" -#include "radeon_span.h" #if defined(RADEON_R100) #include "radeon_context.h" #include "radeon_tex.h" @@ -66,7 +65,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "utils.h" #include "vblank.h" -#include "drirenderbuffer.h" #include "radeon_bocs_wrapper.h" @@ -214,10 +212,10 @@ static const GLuint __driNConfigOptions = 17; #endif -static int getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo ); +static int getSwapInfo( __DRIdrawable *dPriv, __DRIswapInfo * sInfo ); static int -radeonGetParam(__DRIscreenPrivate *sPriv, int param, void *value) +radeonGetParam(__DRIscreen *sPriv, int param, void *value) { int ret; drm_radeon_getparam_t gp = { 0 }; @@ -249,7 +247,7 @@ radeonGetParam(__DRIscreenPrivate *sPriv, int param, void *value) } static const __DRIconfig ** -radeonFillInModes( __DRIscreenPrivate *psp, +radeonFillInModes( __DRIscreen *psp, unsigned pixel_bits, unsigned depth_bits, unsigned stencil_bits, GLboolean have_back_buffer ) { @@ -295,18 +293,18 @@ radeonFillInModes( __DRIscreenPrivate *psp, depth_bits_array, stencil_bits_array, depth_buffer_factor, back_buffer_modes, back_buffer_factor, msaa_samples_array, - 1); + 1, GL_TRUE); configs_a8r8g8b8 = driCreateConfigs(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, depth_bits_array, stencil_bits_array, 1, back_buffer_modes, 1, - msaa_samples_array, 1); + msaa_samples_array, 1, GL_TRUE); configs = driConcatConfigs(configs_r5g6b5, configs_a8r8g8b8); } else configs = driCreateConfigs(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, depth_bits_array, stencil_bits_array, depth_buffer_factor, back_buffer_modes, back_buffer_factor, - msaa_samples_array, 1); + msaa_samples_array, 1, GL_TRUE); if (configs == NULL) { fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", @@ -828,6 +826,7 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) case PCI_CHIP_RS880_9712: case PCI_CHIP_RS880_9713: case PCI_CHIP_RS880_9714: + case PCI_CHIP_RS880_9715: screen->chip_family = CHIP_FAMILY_RS880; screen->chip_flags = RADEON_CHIPSET_TCL; break; @@ -911,7 +910,7 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) /* Create the device specific screen private data struct. */ static radeonScreenPtr -radeonCreateScreen( __DRIscreenPrivate *sPriv ) +radeonCreateScreen( __DRIscreen *sPriv ) { radeonScreenPtr screen; RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv; @@ -1250,7 +1249,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) } static radeonScreenPtr -radeonCreateScreen2(__DRIscreenPrivate *sPriv) +radeonCreateScreen2(__DRIscreen *sPriv) { radeonScreenPtr screen; int i; @@ -1401,7 +1400,7 @@ radeonCreateScreen2(__DRIscreenPrivate *sPriv) /* Destroy the device specific screen private data struct. */ static void -radeonDestroyScreen( __DRIscreenPrivate *sPriv ) +radeonDestroyScreen( __DRIscreen *sPriv ) { radeonScreenPtr screen = (radeonScreenPtr)sPriv->private; @@ -1435,7 +1434,7 @@ radeonDestroyScreen( __DRIscreenPrivate *sPriv ) /* Initialize the driver specific screen private data. */ static GLboolean -radeonInitDriver( __DRIscreenPrivate *sPriv ) +radeonInitDriver( __DRIscreen *sPriv ) { if (sPriv->dri2.enabled) { sPriv->private = (void *) radeonCreateScreen2( sPriv ); @@ -1459,8 +1458,8 @@ radeonInitDriver( __DRIscreenPrivate *sPriv ) * pbuffers. */ static GLboolean -radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv, - __DRIdrawablePrivate *driDrawPriv, +radeonCreateBuffer( __DRIscreen *driScrnPriv, + __DRIdrawable *driDrawPriv, const __GLcontextModes *mesaVis, GLboolean isPixmap ) { @@ -1481,7 +1480,7 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv, if (!rfb) return GL_FALSE; - _mesa_initialize_framebuffer(&rfb->base, mesaVis); + _mesa_initialize_window_framebuffer(&rfb->base, mesaVis); if (mesaVis->redBits == 5) rgbFormat = _mesa_little_endian() ? MESA_FORMAT_RGB565 : MESA_FORMAT_RGB565_REV; @@ -1559,7 +1558,7 @@ static void radeon_cleanup_renderbuffers(struct radeon_framebuffer *rfb) } void -radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) +radeonDestroyBuffer(__DRIdrawable *driDrawPriv) { struct radeon_framebuffer *rfb; if (!driDrawPriv) @@ -1581,7 +1580,7 @@ radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) * \return the __GLcontextModes supported by this driver */ static const __DRIconfig ** -radeonInitScreen(__DRIscreenPrivate *psp) +radeonInitScreen(__DRIscreen *psp) { #if defined(RADEON_R100) static const char *driver_name = "Radeon"; @@ -1631,7 +1630,7 @@ radeonInitScreen(__DRIscreenPrivate *psp) * \return the __GLcontextModes supported by this driver */ static const -__DRIconfig **radeonInitScreen2(__DRIscreenPrivate *psp) +__DRIconfig **radeonInitScreen2(__DRIscreen *psp) { GLenum fb_format[3]; GLenum fb_type[3]; @@ -1678,7 +1677,8 @@ __DRIconfig **radeonInitScreen2(__DRIscreenPrivate *psp) back_buffer_modes, ARRAY_SIZE(back_buffer_modes), msaa_samples_array, - ARRAY_SIZE(msaa_samples_array)); + ARRAY_SIZE(msaa_samples_array), + GL_TRUE); if (configs == NULL) configs = new_configs; else @@ -1698,7 +1698,7 @@ __DRIconfig **radeonInitScreen2(__DRIscreenPrivate *psp) * Get information about previous buffer swaps. */ static int -getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo ) +getSwapInfo( __DRIdrawable *dPriv, __DRIswapInfo * sInfo ) { struct radeon_framebuffer *rfb; @@ -1751,3 +1751,10 @@ const struct __DriverAPIRec driDriverAPI = { .InitScreen2 = radeonInitScreen2, }; +/* This is the table of extensions that the loader will dlsym() for. */ +PUBLIC const __DRIextension *__driDriverExtensions[] = { + &driCoreExtension.base, + &driLegacyExtension.base, + &driDRI2Extension.base, + NULL +}; diff --git a/radeon/radeon_screen.h b/radeon/radeon_screen.h index 15744e8..5e6d432 100644 --- a/radeon/radeon_screen.h +++ b/radeon/radeon_screen.h @@ -86,7 +86,7 @@ typedef struct radeon_screen { __volatile__ uint32_t *scratch; - __DRIscreenPrivate *driScreen; + __DRIscreen *driScreen; unsigned int sarea_priv_offset; unsigned int gart_buffer_offset; /* offset in card memory space */ unsigned int gart_texture_offset; /* offset in card memory space */ @@ -123,5 +123,5 @@ typedef struct radeon_screen { #define IS_R600_CLASS(screen) \ ((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R600) -extern void radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv); +extern void radeonDestroyBuffer(__DRIdrawable *driDrawPriv); #endif /* __RADEON_SCREEN_H__ */ diff --git a/radeon/radeon_span.c b/radeon/radeon_span.c index 665f2b6..1adb609 100644 --- a/radeon/radeon_span.c +++ b/radeon/radeon_span.c @@ -810,6 +810,10 @@ static void map_unmap_rb(struct gl_renderbuffer *rb, int flag) if (rrb == NULL || !rrb->bo) return; + radeon_print(RADEON_MEMORY, RADEON_TRACE, + "%s( rb %p, flag %s )\n", + __func__, rb, flag ? "true":"false"); + if (flag) { radeon_bo_wait(rrb->bo); r = radeon_bo_map(rrb->bo, 1); @@ -827,18 +831,25 @@ static void map_unmap_rb(struct gl_renderbuffer *rb, int flag) } static void -radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map) +radeon_map_unmap_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb, + GLboolean map) { GLuint i, j; + radeon_print(RADEON_MEMORY, RADEON_TRACE, + "%s( %p , fb %p, map %s )\n", + __func__, ctx, fb, map ? "true":"false"); + /* color draw buffers */ for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++) - map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map); + map_unmap_rb(fb->_ColorDrawBuffers[j], map); + + map_unmap_rb(fb->_ColorReadBuffer, map); /* check for render to textures */ for (i = 0; i < BUFFER_COUNT; i++) { struct gl_renderbuffer_attachment *att = - ctx->DrawBuffer->Attachment + i; + fb->Attachment + i; struct gl_texture_object *tex = att->Texture; if (tex) { /* Render to texture. Note that a mipmapped texture need not @@ -854,15 +865,15 @@ radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map) radeon_teximage_unmap(image); } } - - map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map); - + /* depth buffer (Note wrapper!) */ - if (ctx->DrawBuffer->_DepthBuffer) - map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map); + if (fb->_DepthBuffer) + map_unmap_rb(fb->_DepthBuffer->Wrapped, map); + + if (fb->_StencilBuffer) + map_unmap_rb(fb->_StencilBuffer->Wrapped, map); - if (ctx->DrawBuffer->_StencilBuffer) - map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map); + radeon_check_front_buffer_rendering(ctx); } static void radeonSpanRenderStart(GLcontext * ctx) @@ -887,23 +898,30 @@ static void radeonSpanRenderStart(GLcontext * ctx) ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current); } - radeon_map_unmap_buffers(ctx, 1); + radeon_map_unmap_framebuffer(ctx, ctx->DrawBuffer, GL_TRUE); + if (ctx->ReadBuffer != ctx->DrawBuffer) + radeon_map_unmap_framebuffer(ctx, ctx->ReadBuffer, GL_TRUE); } static void radeonSpanRenderFinish(GLcontext * ctx) { radeonContextPtr rmesa = RADEON_CONTEXT(ctx); int i; + _swrast_flush(ctx); - if (!rmesa->radeonScreen->driScreen->dri2.enabled) { - UNLOCK_HARDWARE(rmesa); - } + for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled) ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current); } - radeon_map_unmap_buffers(ctx, 0); + radeon_map_unmap_framebuffer(ctx, ctx->DrawBuffer, GL_FALSE); + if (ctx->ReadBuffer != ctx->DrawBuffer) + radeon_map_unmap_framebuffer(ctx, ctx->ReadBuffer, GL_FALSE); + + if (!rmesa->radeonScreen->driScreen->dri2.enabled) { + UNLOCK_HARDWARE(rmesa); + } } void radeonInitSpanFuncs(GLcontext * ctx) diff --git a/radeon/radeon_state.c b/radeon/radeon_state.c index f6c733a..583751d 100644 --- a/radeon/radeon_state.c +++ b/radeon/radeon_state.c @@ -37,7 +37,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/api_arrayelt.h" #include "main/enums.h" #include "main/light.h" -#include "main/state.h" #include "main/context.h" #include "main/framebuffer.h" #include "main/simple_list.h" @@ -54,7 +53,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_tcl.h" #include "radeon_tex.h" #include "radeon_swtcl.h" -#include "drirenderbuffer.h" static void radeonUpdateSpecular( GLcontext *ctx ); @@ -521,10 +519,10 @@ static void radeonColorMask( GLcontext *ctx, return; mask = radeonPackColor( rrb->cpp, - ctx->Color.ColorMask[RCOMP], - ctx->Color.ColorMask[GCOMP], - ctx->Color.ColorMask[BCOMP], - ctx->Color.ColorMask[ACOMP] ); + ctx->Color.ColorMask[0][RCOMP], + ctx->Color.ColorMask[0][GCOMP], + ctx->Color.ColorMask[0][BCOMP], + ctx->Color.ColorMask[0][ACOMP] ); if ( rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] != mask ) { RADEON_STATECHANGE( rmesa, msk ); @@ -1400,7 +1398,7 @@ static void radeonClearStencil( GLcontext *ctx, GLint s ) void radeonUpdateWindow( GLcontext *ctx ) { r100ContextPtr rmesa = R100_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); + __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0; GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0; const GLfloat *v = ctx->Viewport._WindowMap.m; @@ -1455,7 +1453,7 @@ static void radeonDepthRange( GLcontext *ctx, GLclampd nearval, void radeonUpdateViewportOffset( GLcontext *ctx ) { r100ContextPtr rmesa = R100_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); + __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLfloat xoffset = (GLfloat)dPriv->x; GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h; const GLfloat *v = ctx->Viewport._WindowMap.m; @@ -1902,7 +1900,7 @@ void radeonUploadTexMatrix( r100ContextPtr rmesa, So: if we need the q coord in the end (solely determined by the texture target, i.e. 2d / 1d / texrect targets) we swap the third and 4th row. Additionally, if we don't have texgen but 4 tex coords submitted, we swap - column 3 and 4 (for the 2d / 1d / texrect targets) since the the q coord + column 3 and 4 (for the 2d / 1d / texrect targets) since the q coord will get submitted in the "wrong", i.e. 3rd, slot. If an app submits 3 coords for 2d targets, we assume it is saving on vertex size and using the texture matrix to swap the r and q coords around (ut2k3 @@ -2256,7 +2254,6 @@ void radeonInitStateFuncs( GLcontext *ctx , GLboolean dri2 ) ctx->Driver.BlendFuncSeparate = radeonBlendFuncSeparate; ctx->Driver.ClearColor = radeonClearColor; ctx->Driver.ClearDepth = radeonClearDepth; - ctx->Driver.ClearIndex = NULL; ctx->Driver.ClearStencil = radeonClearStencil; ctx->Driver.ClipPlane = radeonClipPlane; ctx->Driver.ColorMask = radeonColorMask; @@ -2268,7 +2265,6 @@ void radeonInitStateFuncs( GLcontext *ctx , GLboolean dri2 ) ctx->Driver.Fogfv = radeonFogfv; ctx->Driver.FrontFace = radeonFrontFace; ctx->Driver.Hint = NULL; - ctx->Driver.IndexMask = NULL; ctx->Driver.LightModelfv = radeonLightModelfv; ctx->Driver.Lightfv = radeonLightfv; ctx->Driver.LineStipple = radeonLineStipple; diff --git a/radeon/radeon_state_init.c b/radeon/radeon_state_init.c index dd82888..91718a4 100644 --- a/radeon/radeon_state_init.c +++ b/radeon/radeon_state_init.c @@ -33,7 +33,6 @@ #include "swrast/swrast.h" #include "vbo/vbo.h" -#include "tnl/tnl.h" #include "tnl/t_pipeline.h" #include "swrast_setup/swrast_setup.h" @@ -41,9 +40,6 @@ #include "radeon_mipmap_tree.h" #include "radeon_ioctl.h" #include "radeon_state.h" -#include "radeon_tcl.h" -#include "radeon_tex.h" -#include "radeon_swtcl.h" #include "radeon_queryobj.h" #include "../r200/r200_reg.h" diff --git a/radeon/radeon_swtcl.c b/radeon/radeon_swtcl.c index e61f59e..f2fcb46 100644 --- a/radeon/radeon_swtcl.c +++ b/radeon/radeon_swtcl.c @@ -41,7 +41,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/simple_list.h" #include "swrast_setup/swrast_setup.h" -#include "math/m_translate.h" #include "tnl/tnl.h" #include "tnl/t_context.h" #include "tnl/t_pipeline.h" @@ -179,7 +178,7 @@ static void radeonSetVertexFormat( GLcontext *ctx ) for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { - GLuint sz = VB->TexCoordPtr[i]->size; + GLuint sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size; switch (sz) { case 1: @@ -309,7 +308,7 @@ void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset) radeonEmitState(&rmesa->radeon); radeonEmitVertexAOS( rmesa, rmesa->radeon.swtcl.vertex_size, - first_elem(&rmesa->radeon.dma.reserved)->bo, + rmesa->radeon.swtcl.bo, current_offset); @@ -525,7 +524,6 @@ static struct { #define DO_POINTS 1 #define DO_FULL_QUAD 1 -#define HAVE_RGBA 1 #define HAVE_SPEC 1 #define HAVE_BACK_COLORS 0 #define HAVE_HW_FLATSHADE 1 diff --git a/radeon/radeon_tcl.c b/radeon/radeon_tcl.c index b334ea0..ea796e1 100644 --- a/radeon/radeon_tcl.c +++ b/radeon/radeon_tcl.c @@ -46,7 +46,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_context.h" #include "radeon_state.h" #include "radeon_ioctl.h" -#include "radeon_tex.h" #include "radeon_tcl.h" #include "radeon_swtcl.h" #include "radeon_maos.h" @@ -412,6 +411,7 @@ static GLuint radeonEnsureEmitSize( GLcontext * ctx , GLuint inputs ) space_required += vbuf; else space_required += index + elts; + space_required += VB->Primitive[i].count * 3; space_required += AOS_BUFSZ(nr_aos); } space_required += SCISSOR_BUFSZ; diff --git a/radeon/radeon_tex.c b/radeon/radeon_tex.c index 749ab75..c66e5d1 100644 --- a/radeon/radeon_tex.c +++ b/radeon/radeon_tex.c @@ -44,9 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_context.h" #include "radeon_mipmap_tree.h" -#include "radeon_state.h" #include "radeon_ioctl.h" -#include "radeon_swtcl.h" #include "radeon_tex.h" #include "xmlpool.h" @@ -341,7 +339,7 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target, break; case GL_TEXTURE_BORDER_COLOR: - radeonSetTexBorderColor( t, texObj->BorderColor ); + radeonSetTexBorderColor( t, texObj->BorderColor.f ); break; case GL_TEXTURE_BASE_LEVEL: @@ -428,13 +426,13 @@ radeonNewTextureObject( GLcontext *ctx, GLuint name, GLenum target ) radeonSetTexWrap( t, t->base.WrapS, t->base.WrapT ); radeonSetTexMaxAnisotropy( t, t->base.MaxAnisotropy ); radeonSetTexFilter( t, t->base.MinFilter, t->base.MagFilter ); - radeonSetTexBorderColor( t, t->base.BorderColor ); + radeonSetTexBorderColor( t, t->base.BorderColor.f ); return &t->base; } -void radeonInitTextureFuncs( struct dd_function_table *functions ) +void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions ) { functions->ChooseTextureFormat = radeonChooseTextureFormat_mesa; functions->TexImage1D = radeonTexImage1D; @@ -455,6 +453,11 @@ void radeonInitTextureFuncs( struct dd_function_table *functions ) functions->CompressedTexImage2D = radeonCompressedTexImage2D; functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; + if (radeon->radeonScreen->kernel_mm) { + functions->CopyTexImage2D = radeonCopyTexImage2D; + functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; + } + functions->GenerateMipmap = radeonGenerateMipmap; functions->NewTextureImage = radeonNewTextureImage; diff --git a/radeon/radeon_tex.h b/radeon/radeon_tex.h index a4aaddc..0113ffd 100644 --- a/radeon/radeon_tex.h +++ b/radeon/radeon_tex.h @@ -52,6 +52,6 @@ extern int radeonUploadTexImages( r100ContextPtr rmesa, radeonTexObjPtr t, extern void radeonDestroyTexObj( r100ContextPtr rmesa, radeonTexObjPtr t ); -extern void radeonInitTextureFuncs( struct dd_function_table *functions ); +extern void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions ); #endif /* __RADEON_TEX_H__ */ diff --git a/radeon/radeon_tex_copy.c b/radeon/radeon_tex_copy.c new file mode 100644 index 0000000..5cfad6f --- /dev/null +++ b/radeon/radeon_tex_copy.c @@ -0,0 +1,197 @@ +/* + * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_common.h" +#include "radeon_texture.h" + +#include "main/image.h" +#include "main/teximage.h" +#include "main/texstate.h" +#include "drivers/common/meta.h" + +#include "radeon_mipmap_tree.h" + +static GLboolean +do_copy_texsubimage(GLcontext *ctx, + GLenum target, GLint level, + struct radeon_tex_obj *tobj, + radeon_texture_image *timg, + GLint dstx, GLint dsty, + GLint x, GLint y, + GLsizei width, GLsizei height) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct radeon_renderbuffer *rrb; + unsigned src_bpp; + unsigned dst_bpp; + gl_format src_mesaformat; + gl_format dst_mesaformat; + unsigned src_width; + unsigned dst_width; + + if (!radeon->vtbl.blit) { + return GL_FALSE; + } + + if (_mesa_get_format_bits(timg->base.TexFormat, GL_DEPTH_BITS) > 0) { + rrb = radeon_get_depthbuffer(radeon); + } else { + rrb = radeon_get_colorbuffer(radeon); + } + + if (!timg->mt) { + radeon_validate_texture_miptree(ctx, &tobj->base); + } + + assert(rrb && rrb->bo); + assert(timg->mt); + assert(timg->mt->bo); + assert(timg->base.Width >= dstx + width); + assert(timg->base.Height >= dsty + height); + + intptr_t src_offset = rrb->draw_offset; + intptr_t dst_offset = radeon_miptree_image_offset(timg->mt, _mesa_tex_target_to_face(target), level); + + if (0) { + fprintf(stderr, "%s: copying to face %d, level %d\n", + __FUNCTION__, _mesa_tex_target_to_face(target), level); + fprintf(stderr, "to: x %d, y %d, offset %d\n", dstx, dsty, (uint32_t) dst_offset); + fprintf(stderr, "from (%dx%d) width %d, height %d, offset %d, pitch %d\n", + x, y, rrb->base.Width, rrb->base.Height, (uint32_t) src_offset, rrb->pitch/rrb->cpp); + fprintf(stderr, "src size %d, dst size %d\n", rrb->bo->size, timg->mt->bo->size); + + } + + src_mesaformat = rrb->base.Format; + dst_mesaformat = timg->base.TexFormat; + src_width = rrb->base.Width; + dst_width = timg->base.Width; + src_bpp = _mesa_get_format_bytes(src_mesaformat); + dst_bpp = _mesa_get_format_bytes(dst_mesaformat); + if (!radeon->vtbl.check_blit(dst_mesaformat)) { + /* depth formats tend to be special */ + if (_mesa_get_format_bits(dst_mesaformat, GL_DEPTH_BITS) > 0) + return GL_FALSE; + + if (src_bpp != dst_bpp) + return GL_FALSE; + + switch (dst_bpp) { + case 2: + src_mesaformat = MESA_FORMAT_RGB565; + dst_mesaformat = MESA_FORMAT_RGB565; + break; + case 4: + src_mesaformat = MESA_FORMAT_ARGB8888; + dst_mesaformat = MESA_FORMAT_ARGB8888; + break; + case 1: + src_mesaformat = MESA_FORMAT_A8; + dst_mesaformat = MESA_FORMAT_A8; + break; + default: + return GL_FALSE; + } + } + + /* blit from src buffer to texture */ + return radeon->vtbl.blit(ctx, rrb->bo, src_offset, src_mesaformat, rrb->pitch/rrb->cpp, + src_width, rrb->base.Height, x, y, + timg->mt->bo, dst_offset, dst_mesaformat, + timg->mt->levels[level].rowstride / dst_bpp, + dst_width, timg->base.Height, + dstx, dsty, width, height, 1); +} + +void +radeonCopyTexImage2D(GLcontext *ctx, GLenum target, GLint level, + GLenum internalFormat, + GLint x, GLint y, GLsizei width, GLsizei height, + GLint border) +{ + struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); + struct gl_texture_object *texObj = + _mesa_select_tex_object(ctx, texUnit, target); + struct gl_texture_image *texImage = + _mesa_select_tex_image(ctx, texObj, target, level); + int srcx, srcy, dstx, dsty; + + if (border) + goto fail; + + /* Setup or redefine the texture object, mipmap tree and texture + * image. Don't populate yet. + */ + ctx->Driver.TexImage2D(ctx, target, level, internalFormat, + width, height, border, + GL_RGBA, GL_UNSIGNED_BYTE, NULL, + &ctx->DefaultPacking, texObj, texImage); + + srcx = x; + srcy = y; + dstx = 0; + dsty = 0; + if (!_mesa_clip_copytexsubimage(ctx, + &dstx, &dsty, + &srcx, &srcy, + &width, &height)) { + return; + } + + if (!do_copy_texsubimage(ctx, target, level, + radeon_tex_obj(texObj), (radeon_texture_image *)texImage, + 0, 0, x, y, width, height)) { + goto fail; + } + + return; + +fail: + _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y, + width, height, border); +} + +void +radeonCopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLint x, GLint y, + GLsizei width, GLsizei height) +{ + struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); + struct gl_texture_object *texObj = _mesa_select_tex_object(ctx, texUnit, target); + struct gl_texture_image *texImage = _mesa_select_tex_image(ctx, texObj, target, level); + + if (!do_copy_texsubimage(ctx, target, level, + radeon_tex_obj(texObj), (radeon_texture_image *)texImage, + xoffset, yoffset, x, y, width, height)) { + + //DEBUG_FALLBACKS + + _mesa_meta_CopyTexSubImage2D(ctx, target, level, + xoffset, yoffset, x, y, width, height); + } +} diff --git a/radeon/radeon_texstate.c b/radeon/radeon_texstate.c index 3cbe3b4..f852116 100644 --- a/radeon/radeon_texstate.c +++ b/radeon/radeon_texstate.c @@ -639,7 +639,7 @@ void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname, } } -void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format, +void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format, __DRIdrawable *dPriv) { struct gl_texture_unit *texUnit; @@ -656,7 +656,7 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_ type = GL_BGRA; format = GL_UNSIGNED_BYTE; - internalFormat = (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT ? 3 : 4); + internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); radeon = pDRICtx->driverPrivate; rmesa = pDRICtx->driverPrivate; @@ -672,24 +672,13 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_ return; } - radeon_update_renderbuffers(pDRICtx, dPriv); - /* back & depth buffer are useless free them right away */ - rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } - rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } + radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE); rb = rfb->color_rb[0]; if (rb->bo == NULL) { /* Failed to BO for the buffer */ return; } - + _mesa_lock_texture(radeon->glCtx, texObj); if (t->bo) { radeon_bo_unref(t->bo); @@ -716,7 +705,7 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_ t->override_offset = 0; switch (rb->cpp) { case 4: - if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT) + if (texture_format == __DRI_TEXTURE_FORMAT_RGB) t->pp_txformat = tx_table[MESA_FORMAT_RGB888].format; else t->pp_txformat = tx_table[MESA_FORMAT_ARGB8888].format; @@ -751,7 +740,7 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_ void radeonSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) { - radeonSetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv); + radeonSetTexBuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv); } diff --git a/radeon/radeon_texture.c b/radeon/radeon_texture.c index 0317811..ff37fd3 100644 --- a/radeon/radeon_texture.c +++ b/radeon/radeon_texture.c @@ -33,6 +33,7 @@ #include "main/imports.h" #include "main/context.h" #include "main/convolve.h" +#include "main/enums.h" #include "main/mipmap.h" #include "main/texcompress.h" #include "main/texstore.h" @@ -53,6 +54,13 @@ void copy_rows(void* dst, GLuint dststride, const void* src, GLuint srcstride, assert(rowsize <= dststride); assert(rowsize <= srcstride); + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s dst %p, stride %u, src %p, stride %u, " + "numrows %u, rowsize %u.\n", + __func__, dst, dststride, + src, srcstride, + numrows, rowsize); + if (rowsize == srcstride && rowsize == dststride) { memcpy(dst, src, numrows*rowsize); } else { @@ -102,8 +110,12 @@ static void teximage_set_map_data(radeon_texture_image *image) { radeon_mipmap_level *lvl; - if (!image->mt) + if (!image->mt) { + radeon_warning("%s(%p) Trying to set map data without miptree.\n", + __func__, image); + return; + } lvl = &image->mt->levels[image->mtlevel]; @@ -117,6 +129,10 @@ static void teximage_set_map_data(radeon_texture_image *image) */ void radeon_teximage_map(radeon_texture_image *image, GLboolean write_enable) { + radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, + "%s(img %p), write_enable %s.\n", + __func__, image, + write_enable ? "true": "false"); if (image->mt) { assert(!image->base.Data); @@ -128,6 +144,9 @@ void radeon_teximage_map(radeon_texture_image *image, GLboolean write_enable) void radeon_teximage_unmap(radeon_texture_image *image) { + radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, + "%s(img %p)\n", + __func__, image); if (image->mt) { assert(image->base.Data); @@ -162,15 +181,31 @@ void radeonMapTexture(GLcontext *ctx, struct gl_texture_object *texObj) radeonTexObj* t = radeon_tex_obj(texObj); int face, level; - if (!radeon_validate_texture_miptree(ctx, texObj)) - return; + radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, + "%s(%p, tex %p)\n", + __func__, ctx, texObj); + + if (!radeon_validate_texture_miptree(ctx, texObj)) { + radeon_error("%s(%p, tex %p) Failed to validate miptree for " + "sw fallback.\n", + __func__, ctx, texObj); + return; + } + + if (t->image_override && t->bo) { + radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, + "%s(%p, tex %p) Work around for missing miptree in r100.\n", + __func__, ctx, texObj); - /* for r100 3D sw fallbacks don't have mt */ - if (t->image_override && t->bo) map_override(ctx, t); + } - if (!t->mt) + /* for r100 3D sw fallbacks don't have mt */ + if (!t->mt) { + radeon_warning("%s(%p, tex %p) No miptree in texture.\n", + __func__, ctx, texObj); return; + } radeon_bo_map(t->mt->bo, GL_FALSE); for(face = 0; face < t->mt->faces; ++face) { @@ -184,6 +219,10 @@ void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj) radeonTexObj* t = radeon_tex_obj(texObj); int face, level; + radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, + "%s(%p, tex %p)\n", + __func__, ctx, texObj); + if (t->image_override && t->bo) unmap_override(ctx, t); /* for r100 3D sw fallbacks don't have mt */ @@ -197,21 +236,6 @@ void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj) radeon_bo_unmap(t->mt->bo); } -GLuint radeon_face_for_target(GLenum target) -{ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - return (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; - default: - return 0; - } -} - /** * Wraps Mesa's implementation to ensure that the base level image is mapped. * @@ -225,6 +249,10 @@ static void radeon_generate_mipmap(GLcontext *ctx, GLenum target, GLuint nr_faces = (t->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; int i, face; + radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, + "%s(%p, tex %p) Target type %s.\n", + __func__, ctx, texObj, + _mesa_lookup_enum_by_nr(target)); _mesa_generate_mipmap(ctx, target, texObj); @@ -248,8 +276,24 @@ static void radeon_generate_mipmap(GLcontext *ctx, GLenum target, void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj) { - GLuint face = radeon_face_for_target(target); + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + struct radeon_bo *bo; + GLuint face = _mesa_tex_target_to_face(target); radeon_texture_image *baseimage = get_radeon_texture_image(texObj->Image[face][texObj->BaseLevel]); + bo = !baseimage->mt ? baseimage->bo : baseimage->mt->bo; + + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s(%p, target %s, tex %p)\n", + __func__, ctx, _mesa_lookup_enum_by_nr(target), + texObj); + + if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) { + radeon_print(RADEON_TEXTURE, RADEON_NORMAL, + "%s(%p, tex %p) Trying to generate mipmap for texture " + "in processing by GPU.\n", + __func__, ctx, texObj); + radeon_firevertices(rmesa); + } radeon_teximage_map(baseimage, GL_FALSE); radeon_generate_mipmap(ctx, target, texObj); @@ -312,12 +356,14 @@ gl_format radeonChooseTextureFormat(GLcontext * ctx, (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16); (void)format; -#if 0 - fprintf(stderr, "InternalFormat=%s(%d) type=%s format=%s\n", + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s InternalFormat=%s(%d) type=%s format=%s\n", + __func__, _mesa_lookup_enum_by_nr(internalFormat), internalFormat, _mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format)); - fprintf(stderr, "do32bpt=%d force16bpt=%d\n", do32bpt, force16bpt); -#endif + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s do32bpt=%d force16bpt=%d\n", + __func__, do32bpt, force16bpt); switch (internalFormat) { case 4: @@ -566,11 +612,10 @@ static void teximage_assign_miptree(radeonContextPtr rmesa, if (!t->mt || !radeon_miptree_matches_image(t->mt, texImage, face, level)) { radeon_miptree_unreference(&t->mt); radeon_try_alloc_miptree(rmesa, t); - if (RADEON_DEBUG & RADEON_TEXTURE) { - fprintf(stderr, "%s: texObj %p, texImage %p, face %d, level %d, " + radeon_print(RADEON_TEXTURE, RADEON_NORMAL, + "%s: texObj %p, texImage %p, face %d, level %d, " "texObj miptree doesn't match, allocated new miptree %p\n", __FUNCTION__, texObj, texImage, face, level, t->mt); - } } /* Miptree alocation may have failed, @@ -579,7 +624,9 @@ static void teximage_assign_miptree(radeonContextPtr rmesa, image->mtface = face; image->mtlevel = level; radeon_miptree_reference(t->mt, &image->mt); - } + } else + radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, + "%s Failed to allocate miptree.\n", __func__); } static GLuint * allocate_image_offsets(GLcontext *ctx, @@ -590,7 +637,7 @@ static GLuint * allocate_image_offsets(GLcontext *ctx, int i; GLuint *offsets; - offsets = _mesa_malloc(depth * sizeof(GLuint)) ; + offsets = malloc(depth * sizeof(GLuint)) ; if (!offsets) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTex[Sub]Image"); return NULL; @@ -623,6 +670,10 @@ static void radeon_store_teximage(GLcontext* ctx, int dims, GLuint dstRowStride; GLuint *dstImageOffsets; + radeon_print(RADEON_TEXTURE, RADEON_TRACE, + "%s(%p, tex %p, image %p) compressed %d\n", + __func__, ctx, texObj, texImage, compressed); + if (image->mt) { dstRowStride = image->mt->levels[image->mtlevel].rowstride; } else if (t->bo) { @@ -639,6 +690,7 @@ static void radeon_store_teximage(GLcontext* ctx, int dims, unsigned alignedWidth = dstRowStride/_mesa_get_format_bytes(texImage->TexFormat); dstImageOffsets = allocate_image_offsets(ctx, alignedWidth, texImage->Height, texImage->Depth); if (!dstImageOffsets) { + radeon_warning("%s Failed to allocate dstImaeOffset.\n", __func__); return; } } else { @@ -684,7 +736,7 @@ static void radeon_store_teximage(GLcontext* ctx, int dims, } if (dims == 3) { - _mesa_free(dstImageOffsets); + free(dstImageOffsets); } radeon_teximage_unmap(image); @@ -710,20 +762,23 @@ static void radeon_teximage( radeon_texture_image* image = get_radeon_texture_image(texImage); GLint postConvWidth = width; GLint postConvHeight = height; - GLuint face = radeon_face_for_target(target); + GLuint face = _mesa_tex_target_to_face(target); + radeon_print(RADEON_TEXTURE, RADEON_NORMAL, + "%s %dd: texObj %p, texImage %p, face %d, level %d\n", + __func__, dims, texObj, texImage, face, level); { struct radeon_bo *bo; bo = !image->mt ? image->bo : image->mt->bo; if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) { + radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, + "%s Calling teximage for texture that is " + "queued for GPU processing.\n", + __func__); radeon_firevertices(rmesa); } } - if (RADEON_DEBUG & RADEON_TEXTURE) { - fprintf(stderr, "radeon_teximage%dd: texObj %p, texImage %p, face %d, level %d\n", - dims, texObj, texImage, face, level); - } t->validated = GL_FALSE; @@ -755,11 +810,10 @@ static void radeon_teximage( texImage->Height, texImage->Depth); texImage->Data = _mesa_alloc_texmemory(size); - if (RADEON_DEBUG & RADEON_TEXTURE) { - fprintf(stderr, "radeon_teximage%dd: texObj %p, texImage %p, " + radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, + "%s %dd: texObj %p, texImage %p, " " no miptree assigned, using local memory %p\n", - dims, texObj, texImage, texImage->Data); - } + __func__, dims, texObj, texImage, texImage->Data); } } @@ -853,18 +907,22 @@ static void radeon_texsubimage(GLcontext* ctx, int dims, GLenum target, int leve radeonTexObj* t = radeon_tex_obj(texObj); radeon_texture_image* image = get_radeon_texture_image(texImage); + radeon_print(RADEON_TEXTURE, RADEON_NORMAL, + "%s %dd: texObj %p, texImage %p, face %d, level %d\n", + __func__, dims, texObj, texImage, + _mesa_tex_target_to_face(target), level); { struct radeon_bo *bo; bo = !image->mt ? image->bo : image->mt->bo; if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) { + radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, + "%s Calling texsubimage for texture that is " + "queued for GPU processing.\n", + __func__); radeon_firevertices(rmesa); } } - if (RADEON_DEBUG & RADEON_TEXTURE) { - fprintf(stderr, "radeon_texsubimage%dd: texObj %p, texImage %p, face %d, level %d\n", - dims, texObj, texImage, radeon_face_for_target(target), level); - } t->validated = GL_FALSE; if (compressed) { @@ -953,6 +1011,10 @@ radeon_get_tex_image(GLcontext * ctx, GLenum target, GLint level, { radeon_texture_image *image = get_radeon_texture_image(texImage); + radeon_print(RADEON_TEXTURE, RADEON_NORMAL, + "%s(%p, tex %p, image %p) compressed %d.\n", + __func__, ctx, texObj, image, compressed); + if (image->mt) { /* Map the texture image read-only */ radeon_teximage_map(image, GL_FALSE); diff --git a/radeon/radeon_texture.h b/radeon/radeon_texture.h index 906daf1..f09dd65 100644 --- a/radeon/radeon_texture.h +++ b/radeon/radeon_texture.h @@ -44,7 +44,6 @@ void radeonMapTexture(GLcontext *ctx, struct gl_texture_object *texObj); void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj); void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj); int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj); -GLuint radeon_face_for_target(GLenum target); gl_format radeonChooseTextureFormat_mesa(GLcontext * ctx, GLint internalFormat, @@ -126,4 +125,14 @@ void radeonGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level, struct gl_texture_object *texObj, struct gl_texture_image *texImage); +void radeonCopyTexImage2D(GLcontext *ctx, GLenum target, GLint level, + GLenum internalFormat, + GLint x, GLint y, GLsizei width, GLsizei height, + GLint border); + +void radeonCopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLint x, GLint y, + GLsizei width, GLsizei height); + #endif diff --git a/radeon/server/radeon_egl.c b/radeon/server/radeon_egl.c deleted file mode 100644 index c16d66e..0000000 --- a/radeon/server/radeon_egl.c +++ /dev/null @@ -1,1088 +0,0 @@ -/* - * EGL driver for radeon_dri.so - */ -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <dirent.h> -#include <errno.h> -#include <fcntl.h> -#include <unistd.h> -#include <sys/ioctl.h> -#include <sys/mman.h> - -#include "eglconfig.h" -#include "eglcontext.h" -#include "egldisplay.h" -#include "egldriver.h" -#include "eglglobals.h" -#include "egllog.h" -#include "eglmode.h" -#include "eglscreen.h" -#include "eglsurface.h" -#include "egldri.h" - -#include "mtypes.h" -#include "memops.h" -#include "drm.h" -#include "drm_sarea.h" -#include "radeon_drm.h" -#include "radeon_dri.h" -#include "radeon.h" - -static size_t radeon_drm_page_size; - -/** - * radeon driver-specific driver class derived from _EGLDriver - */ -typedef struct radeon_driver -{ - _EGLDriver Base; /* base class/object */ - GLuint radeonStuff; -} radeonDriver; - -static int -RADEONSetParam(driDisplay *disp, int param, int value) -{ - drm_radeon_setparam_t sp; - int ret; - - memset(&sp, 0, sizeof(sp)); - sp.param = param; - sp.value = value; - - if ((ret=drmCommandWrite(disp->drmFD, DRM_RADEON_SETPARAM, &sp, sizeof(sp)))) { - fprintf(stderr,"Set param failed\n", ret); - return -1; - } - - return 0; -} - -static int -RADEONCheckDRMVersion(driDisplay *disp, RADEONInfoPtr info) -{ - drmVersionPtr version; - - version = drmGetVersion(disp->drmFD); - if (version) { - int req_minor, req_patch; - - /* Need 1.21.x for card type detection getparam - */ - req_minor = 21; - req_patch = 0; - - if (version->version_major != 1 || - version->version_minor < req_minor || - (version->version_minor == req_minor && - version->version_patchlevel < req_patch)) { - /* Incompatible drm version */ - fprintf(stderr, - "[dri] RADEONDRIScreenInit failed because of a version " - "mismatch.\n" - "[dri] radeon.o kernel module version is %d.%d.%d " - "but version 1.%d.%d or newer is needed.\n" - "[dri] Disabling DRI.\n", - version->version_major, - version->version_minor, - version->version_patchlevel, - req_minor, - req_patch); - drmFreeVersion(version); - return 0; - } - - info->drmMinor = version->version_minor; - drmFreeVersion(version); - } - - return 1; -} - - -/** - * \brief Compute base 2 logarithm. - * - * \param val value. - * - * \return base 2 logarithm of \p val. - */ -static int RADEONMinBits(int val) -{ - int bits; - - if (!val) return 1; - for (bits = 0; val; val >>= 1, ++bits); - return bits; -} - - -/* Initialize the PCI GART state. Request memory for use in PCI space, - * and initialize the Radeon registers to point to that memory. - */ -static int RADEONDRIPciInit(driDisplay *disp, RADEONInfoPtr info) -{ - int ret; - int flags = DRM_READ_ONLY | DRM_LOCKED | DRM_KERNEL; - int s, l; - - ret = drmScatterGatherAlloc(disp->drmFD, info->gartSize*1024*1024, - &info->gartMemHandle); - if (ret < 0) { - fprintf(stderr, "[pci] Out of memory (%d)\n", ret); - return 0; - } - fprintf(stderr, - "[pci] %d kB allocated with handle 0x%04lx\n", - info->gartSize*1024, (long) info->gartMemHandle); - - info->gartOffset = 0; - - /* Initialize the CP ring buffer data */ - info->ringStart = info->gartOffset; - info->ringMapSize = info->ringSize*1024*1024 + radeon_drm_page_size; - - info->ringReadOffset = info->ringStart + info->ringMapSize; - info->ringReadMapSize = radeon_drm_page_size; - - /* Reserve space for vertex/indirect buffers */ - info->bufStart = info->ringReadOffset + info->ringReadMapSize; - info->bufMapSize = info->bufSize*1024*1024; - - /* Reserve the rest for AGP textures */ - info->gartTexStart = info->bufStart + info->bufMapSize; - s = (info->gartSize*1024*1024 - info->gartTexStart); - l = RADEONMinBits((s-1) / RADEON_NR_TEX_REGIONS); - if (l < RADEON_LOG_TEX_GRANULARITY) l = RADEON_LOG_TEX_GRANULARITY; - info->gartTexMapSize = (s >> l) << l; - info->log2GARTTexGran = l; - - if (drmAddMap(disp->drmFD, info->ringStart, info->ringMapSize, - DRM_SCATTER_GATHER, flags, &info->ringHandle) < 0) { - fprintf(stderr, - "[pci] Could not add ring mapping\n"); - return 0; - } - fprintf(stderr, - "[pci] ring handle = 0x%08lx\n", info->ringHandle); - - if (drmAddMap(disp->drmFD, info->ringReadOffset, info->ringReadMapSize, - DRM_SCATTER_GATHER, flags, &info->ringReadPtrHandle) < 0) { - fprintf(stderr, - "[pci] Could not add ring read ptr mapping\n"); - return 0; - } - fprintf(stderr, - "[pci] ring read ptr handle = 0x%08lx\n", - info->ringReadPtrHandle); - - if (drmAddMap(disp->drmFD, info->bufStart, info->bufMapSize, - DRM_SCATTER_GATHER, 0, &info->bufHandle) < 0) { - fprintf(stderr, - "[pci] Could not add vertex/indirect buffers mapping\n"); - return 0; - } - fprintf(stderr, - "[pci] vertex/indirect buffers handle = 0x%08lx\n", - info->bufHandle); - - if (drmAddMap(disp->drmFD, info->gartTexStart, info->gartTexMapSize, - DRM_SCATTER_GATHER, 0, &info->gartTexHandle) < 0) { - fprintf(stderr, - "[pci] Could not add GART texture map mapping\n"); - return 0; - } - fprintf(stderr, - "[pci] GART texture map handle = 0x%08lx\n", - info->gartTexHandle); - - return 1; -} - - -/** - * \brief Initialize the AGP state - * - * \param ctx display handle. - * \param info driver private data. - * - * \return one on success, or zero on failure. - * - * Acquires and enables the AGP device. Reserves memory in the AGP space for - * the ring buffer, vertex buffers and textures. Initialize the Radeon - * registers to point to that memory and add client mappings. - */ -static int RADEONDRIAgpInit( driDisplay *disp, RADEONInfoPtr info) -{ - int mode, ret; - int s, l; - int agpmode = 1; - - if (drmAgpAcquire(disp->drmFD) < 0) { - fprintf(stderr, "[gart] AGP not available\n"); - return 0; - } - - mode = drmAgpGetMode(disp->drmFD); /* Default mode */ - /* Disable fast write entirely - too many lockups. - */ - mode &= ~RADEON_AGP_MODE_MASK; - switch (agpmode) { - case 4: mode |= RADEON_AGP_4X_MODE; - case 2: mode |= RADEON_AGP_2X_MODE; - case 1: default: mode |= RADEON_AGP_1X_MODE; - } - - if (drmAgpEnable(disp->drmFD, mode) < 0) { - fprintf(stderr, "[gart] AGP not enabled\n"); - drmAgpRelease(disp->drmFD); - return 0; - } - -#if 0 - /* Workaround for some hardware bugs */ - if (info->ChipFamily < CHIP_FAMILY_R200) - OUTREG(RADEON_AGP_CNTL, INREG(RADEON_AGP_CNTL) | 0x000e0000); -#endif - info->gartOffset = 0; - - if ((ret = drmAgpAlloc(disp->drmFD, info->gartSize*1024*1024, 0, NULL, - &info->gartMemHandle)) < 0) { - fprintf(stderr, "[gart] Out of memory (%d)\n", ret); - drmAgpRelease(disp->drmFD); - return 0; - } - fprintf(stderr, - "[gart] %d kB allocated with handle 0x%08x\n", - info->gartSize*1024, (unsigned)info->gartMemHandle); - - if (drmAgpBind(disp->drmFD, - info->gartMemHandle, info->gartOffset) < 0) { - fprintf(stderr, "[gart] Could not bind\n"); - drmAgpFree(disp->drmFD, info->gartMemHandle); - drmAgpRelease(disp->drmFD); - return 0; - } - - /* Initialize the CP ring buffer data */ - info->ringStart = info->gartOffset; - info->ringMapSize = info->ringSize*1024*1024 + radeon_drm_page_size; - - info->ringReadOffset = info->ringStart + info->ringMapSize; - info->ringReadMapSize = radeon_drm_page_size; - - /* Reserve space for vertex/indirect buffers */ - info->bufStart = info->ringReadOffset + info->ringReadMapSize; - info->bufMapSize = info->bufSize*1024*1024; - - /* Reserve the rest for AGP textures */ - info->gartTexStart = info->bufStart + info->bufMapSize; - s = (info->gartSize*1024*1024 - info->gartTexStart); - l = RADEONMinBits((s-1) / RADEON_NR_TEX_REGIONS); - if (l < RADEON_LOG_TEX_GRANULARITY) l = RADEON_LOG_TEX_GRANULARITY; - info->gartTexMapSize = (s >> l) << l; - info->log2GARTTexGran = l; - - if (drmAddMap(disp->drmFD, info->ringStart, info->ringMapSize, - DRM_AGP, DRM_READ_ONLY, &info->ringHandle) < 0) { - fprintf(stderr, "[gart] Could not add ring mapping\n"); - return 0; - } - fprintf(stderr, "[gart] ring handle = 0x%08lx\n", info->ringHandle); - - - if (drmAddMap(disp->drmFD, info->ringReadOffset, info->ringReadMapSize, - DRM_AGP, DRM_READ_ONLY, &info->ringReadPtrHandle) < 0) { - fprintf(stderr, - "[gart] Could not add ring read ptr mapping\n"); - return 0; - } - - fprintf(stderr, - "[gart] ring read ptr handle = 0x%08lx\n", - info->ringReadPtrHandle); - - if (drmAddMap(disp->drmFD, info->bufStart, info->bufMapSize, - DRM_AGP, 0, &info->bufHandle) < 0) { - fprintf(stderr, - "[gart] Could not add vertex/indirect buffers mapping\n"); - return 0; - } - fprintf(stderr, - "[gart] vertex/indirect buffers handle = 0x%08lx\n", - info->bufHandle); - - if (drmAddMap(disp->drmFD, info->gartTexStart, info->gartTexMapSize, - DRM_AGP, 0, &info->gartTexHandle) < 0) { - fprintf(stderr, - "[gart] Could not add AGP texture map mapping\n"); - return 0; - } - fprintf(stderr, - "[gart] AGP texture map handle = 0x%08lx\n", - info->gartTexHandle); - - return 1; -} - - -/** - * Initialize all the memory-related fields of the RADEONInfo object. - * This includes the various 'offset' and 'size' fields. - */ -static int -RADEONMemoryInit(driDisplay *disp, RADEONInfoPtr info) -{ - int width_bytes = disp->virtualWidth * disp->cpp; - int cpp = disp->cpp; - int bufferSize = ((disp->virtualHeight * width_bytes - + RADEON_BUFFER_ALIGN) - & ~RADEON_BUFFER_ALIGN); - int depthSize = ((((disp->virtualHeight+15) & ~15) * width_bytes - + RADEON_BUFFER_ALIGN) - & ~RADEON_BUFFER_ALIGN); - int l; - int pcie_gart_table_size = 0; - - info->frontOffset = 0; - info->frontPitch = disp->virtualWidth; - - if (disp->card_type==RADEON_CARD_PCIE) - pcie_gart_table_size = RADEON_PCIGART_TABLE_SIZE; - - /* Front, back and depth buffers - everything else texture?? - */ - info->textureSize = disp->fbSize - pcie_gart_table_size - 2 * bufferSize - depthSize; - - if (info->textureSize < 0) - return 0; - - l = RADEONMinBits((info->textureSize-1) / RADEON_NR_TEX_REGIONS); - if (l < RADEON_LOG_TEX_GRANULARITY) l = RADEON_LOG_TEX_GRANULARITY; - - /* Round the texture size up to the nearest whole number of - * texture regions. Again, be greedy about this, don't - * round down. - */ - info->log2TexGran = l; - info->textureSize = (info->textureSize >> l) << l; - - /* Set a minimum usable local texture heap size. This will fit - * two 256x256x32bpp textures. - */ - if (info->textureSize < 512 * 1024) { - info->textureOffset = 0; - info->textureSize = 0; - } - - /* Reserve space for textures */ - info->textureOffset = ((disp->fbSize - pcie_gart_table_size - info->textureSize + - RADEON_BUFFER_ALIGN) & - ~RADEON_BUFFER_ALIGN); - - /* Reserve space for the shared depth - * buffer. - */ - info->depthOffset = ((info->textureOffset - depthSize + - RADEON_BUFFER_ALIGN) & - ~RADEON_BUFFER_ALIGN); - info->depthPitch = disp->virtualWidth; - - info->backOffset = ((info->depthOffset - bufferSize + - RADEON_BUFFER_ALIGN) & - ~RADEON_BUFFER_ALIGN); - info->backPitch = disp->virtualWidth; - - if (pcie_gart_table_size) - info->pcieGartTableOffset = disp->fbSize - pcie_gart_table_size; - - fprintf(stderr, - "Will use back buffer at offset 0x%x, pitch %d\n", - info->backOffset, info->backPitch); - fprintf(stderr, - "Will use depth buffer at offset 0x%x, pitch %d\n", - info->depthOffset, info->depthPitch); - fprintf(stderr, - "Will use %d kb for textures at offset 0x%x\n", - info->textureSize/1024, info->textureOffset); - if (pcie_gart_table_size) - { - fprintf(stderr, - "Will use %d kb for PCIE GART Table at offset 0x%x\n", - pcie_gart_table_size/1024, info->pcieGartTableOffset); - } - - /* XXX I don't think these are needed. */ -#if 0 - info->frontPitchOffset = (((info->frontPitch * cpp / 64) << 22) | - (info->frontOffset >> 10)); - - info->backPitchOffset = (((info->backPitch * cpp / 64) << 22) | - (info->backOffset >> 10)); - - info->depthPitchOffset = (((info->depthPitch * cpp / 64) << 22) | - (info->depthOffset >> 10)); -#endif - - if (pcie_gart_table_size) - RADEONSetParam(disp, RADEON_SETPARAM_PCIGART_LOCATION, info->pcieGartTableOffset); - - return 1; -} - - -/** - * \brief Initialize the kernel data structures and enable the CP engine. - * - * \param ctx display handle. - * \param info driver private data. - * - * \return non-zero on success, or zero on failure. - * - * This function is a wrapper around the DRM_RADEON_CP_INIT command, passing - * all the parameters in a drm_radeon_init_t structure. - */ -static int RADEONDRIKernelInit( driDisplay *disp, - RADEONInfoPtr info) -{ - int cpp = disp->bpp / 8; - drm_radeon_init_t drmInfo; - int ret; - - memset(&drmInfo, 0, sizeof(drmInfo)); - - if ( (info->ChipFamily >= CHIP_FAMILY_R300) ) - drmInfo.func = RADEON_INIT_R300_CP; - else if ( (info->ChipFamily == CHIP_FAMILY_R200) || - (info->ChipFamily == CHIP_FAMILY_RV250) || - (info->ChipFamily == CHIP_FAMILY_M9) || - (info->ChipFamily == CHIP_FAMILY_RV280) ) - drmInfo.func = RADEON_INIT_R200_CP; - else - drmInfo.func = RADEON_INIT_CP; - - /* This is the struct passed to the kernel module for its initialization */ - /* XXX problem here: - * The front/back/depth_offset/pitch fields may change depending upon - * which drawing surface we're using!!! They can't be set just once - * during initialization. - * Looks like we'll need a new ioctl to update these fields for drawing - * to other surfaces... - */ - drmInfo.sarea_priv_offset = sizeof(drm_sarea_t); - drmInfo.cp_mode = RADEON_DEFAULT_CP_BM_MODE; - drmInfo.gart_size = info->gartSize*1024*1024; - drmInfo.ring_size = info->ringSize*1024*1024; - drmInfo.usec_timeout = 1000; - drmInfo.fb_bpp = disp->bpp; - drmInfo.depth_bpp = disp->bpp; - drmInfo.front_offset = info->frontOffset; - drmInfo.front_pitch = info->frontPitch * cpp; - drmInfo.back_offset = info->backOffset; - drmInfo.back_pitch = info->backPitch * cpp; - drmInfo.depth_offset = info->depthOffset; - drmInfo.depth_pitch = info->depthPitch * cpp; - drmInfo.ring_offset = info->ringHandle; - drmInfo.ring_rptr_offset = info->ringReadPtrHandle; - drmInfo.buffers_offset = info->bufHandle; - drmInfo.gart_textures_offset = info->gartTexHandle; - - ret = drmCommandWrite(disp->drmFD, DRM_RADEON_CP_INIT, &drmInfo, - sizeof(drm_radeon_init_t)); - - return ret >= 0; -} - - -/** - * \brief Add a map for the vertex buffers that will be accessed by any - * DRI-based clients. - * - * \param ctx display handle. - * \param info driver private data. - * - * \return one on success, or zero on failure. - * - * Calls drmAddBufs() with the previously allocated vertex buffers. - */ -static int RADEONDRIBufInit( driDisplay *disp, RADEONInfoPtr info ) -{ - /* Initialize vertex buffers */ - info->bufNumBufs = drmAddBufs(disp->drmFD, - info->bufMapSize / RADEON_BUFFER_SIZE, - RADEON_BUFFER_SIZE, - (disp->card_type!=RADEON_CARD_AGP) ? DRM_SG_BUFFER : DRM_AGP_BUFFER, - info->bufStart); - - if (info->bufNumBufs <= 0) { - fprintf(stderr, - "[drm] Could not create vertex/indirect buffers list\n"); - return 0; - } - fprintf(stderr, - "[drm] Added %d %d byte vertex/indirect buffers\n", - info->bufNumBufs, RADEON_BUFFER_SIZE); - - return 1; -} - - -/** - * \brief Install an IRQ handler. - * - * \param disp display handle. - * \param info driver private data. - * - * Attempts to install an IRQ handler via drmCtlInstHandler(), falling back to - * IRQ-free operation on failure. - */ -static void RADEONDRIIrqInit(driDisplay *disp, RADEONInfoPtr info) -{ - if ((drmCtlInstHandler(disp->drmFD, 0)) != 0) - fprintf(stderr, "[drm] failure adding irq handler, " - "there is a device already using that irq\n" - "[drm] falling back to irq-free operation\n"); -} - - -/** - * \brief Initialize the AGP heap. - * - * \param disp display handle. - * \param info driver private data. - * - * This function is a wrapper around the DRM_RADEON_INIT_HEAP command, passing - * all the parameters in a drm_radeon_mem_init_heap structure. - */ -static void RADEONDRIAgpHeapInit(driDisplay *disp, - RADEONInfoPtr info) -{ - drm_radeon_mem_init_heap_t drmHeap; - - /* Start up the simple memory manager for gart space */ - drmHeap.region = RADEON_MEM_REGION_GART; - drmHeap.start = 0; - drmHeap.size = info->gartTexMapSize; - - if (drmCommandWrite(disp->drmFD, DRM_RADEON_INIT_HEAP, - &drmHeap, sizeof(drmHeap))) { - fprintf(stderr, - "[drm] Failed to initialized gart heap manager\n"); - } else { - fprintf(stderr, - "[drm] Initialized kernel gart heap manager, %d\n", - info->gartTexMapSize); - } -} - -static int RADEONGetCardType(driDisplay *disp, RADEONInfoPtr info) -{ - drm_radeon_getparam_t gp; - int ret; - - gp.param = RADEON_PARAM_CARD_TYPE; - gp.value = &disp->card_type; - - ret=drmCommandWriteRead(disp->drmFD, DRM_RADEON_GETPARAM, &gp, sizeof(gp)); - if (ret) { - fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_CARD_TYPE) : %d\n", ret); - return -1; - } - - return disp->card_type; -} - -/** - * Called at the start of each server generation. - * - * \param disp display handle. - * \param info driver private data. - * - * \return non-zero on success, or zero on failure. - * - * Performs static frame buffer allocation. Opens the DRM device and add maps - * to the SAREA, framebuffer and MMIO regions. Fills in \p info with more - * information. Creates a \e server context to grab the lock for the - * initialization ioctls and calls the other initilization functions in this - * file. Starts the CP engine via the DRM_RADEON_CP_START command. - * - * Setups a RADEONDRIRec structure to be passed to radeon_dri.so for its - * initialization. - */ -static int -RADEONScreenInit( driDisplay *disp, RADEONInfoPtr info, - RADEONDRIPtr pRADEONDRI) -{ - int i, err; - - /* XXX this probably isn't needed here */ - { - int width_bytes = (disp->virtualWidth * disp->cpp); - int maxy = disp->fbSize / width_bytes; - - if (maxy <= disp->virtualHeight * 3) { - _eglLog(_EGL_WARNING, - "Static buffer allocation failed -- " - "need at least %d kB video memory (have %d kB)\n", - (disp->virtualWidth * disp->virtualHeight * - disp->cpp * 3 + 1023) / 1024, - disp->fbSize / 1024); - return 0; - } - } - - /* Memory manager setup */ - if (!RADEONMemoryInit(disp, info)) { - return 0; - } - - /* Create a 'server' context so we can grab the lock for - * initialization ioctls. - */ - if ((err = drmCreateContext(disp->drmFD, &disp->serverContext)) != 0) { - _eglLog(_EGL_WARNING, "%s: drmCreateContext failed %d\n", - __FUNCTION__, err); - return 0; - } - - DRM_LOCK(disp->drmFD, disp->pSAREA, disp->serverContext, 0); - - /* Initialize the kernel data structures */ - if (!RADEONDRIKernelInit(disp, info)) { - _eglLog(_EGL_WARNING, "RADEONDRIKernelInit failed\n"); - DRM_UNLOCK(disp->drmFD, disp->pSAREA, disp->serverContext); - return 0; - } - - /* Initialize the vertex buffers list */ - if (!RADEONDRIBufInit(disp, info)) { - fprintf(stderr, "RADEONDRIBufInit failed\n"); - DRM_UNLOCK(disp->drmFD, disp->pSAREA, disp->serverContext); - return 0; - } - - /* Initialize IRQ */ - RADEONDRIIrqInit(disp, info); - - /* Initialize kernel gart memory manager */ - RADEONDRIAgpHeapInit(disp, info); - - /* Initialize the SAREA private data structure */ - { - drm_radeon_sarea_t *pSAREAPriv; - pSAREAPriv = (drm_radeon_sarea_t *)(((char*)disp->pSAREA) + - sizeof(drm_sarea_t)); - memset(pSAREAPriv, 0, sizeof(*pSAREAPriv)); - pSAREAPriv->pfState = info->page_flip_enable; - } - - for ( i = 0;; i++ ) { - drmMapType type; - drmMapFlags flags; - drm_handle_t handle, offset; - drmSize size; - int rc, mtrr; - - if ( ( rc = drmGetMap( disp->drmFD, i, &offset, &size, &type, &flags, &handle, &mtrr ) ) != 0 ) - break; - if ( type == DRM_REGISTERS ) { - pRADEONDRI->registerHandle = offset; - pRADEONDRI->registerSize = size; - break; - } - } - /* Quick hack to clear the front & back buffers. Could also use - * the clear ioctl to do this, but would need to setup hw state - * first. - */ - drimemsetio((char *)disp->pFB + info->frontOffset, - 0xEE, - info->frontPitch * disp->cpp * disp->virtualHeight ); - - drimemsetio((char *)disp->pFB + info->backOffset, - 0x30, - info->backPitch * disp->cpp * disp->virtualHeight ); - - - /* This is the struct passed to radeon_dri.so for its initialization */ - pRADEONDRI->deviceID = info->Chipset; - pRADEONDRI->width = disp->virtualWidth; - pRADEONDRI->height = disp->virtualHeight; - pRADEONDRI->depth = disp->bpp; /* XXX: depth */ - pRADEONDRI->bpp = disp->bpp; - pRADEONDRI->IsPCI = (disp->card_type != RADEON_CARD_AGP);; - pRADEONDRI->frontOffset = info->frontOffset; - pRADEONDRI->frontPitch = info->frontPitch; - pRADEONDRI->backOffset = info->backOffset; - pRADEONDRI->backPitch = info->backPitch; - pRADEONDRI->depthOffset = info->depthOffset; - pRADEONDRI->depthPitch = info->depthPitch; - pRADEONDRI->textureOffset = info->textureOffset; - pRADEONDRI->textureSize = info->textureSize; - pRADEONDRI->log2TexGran = info->log2TexGran; - pRADEONDRI->statusHandle = info->ringReadPtrHandle; - pRADEONDRI->statusSize = info->ringReadMapSize; - pRADEONDRI->gartTexHandle = info->gartTexHandle; - pRADEONDRI->gartTexMapSize = info->gartTexMapSize; - pRADEONDRI->log2GARTTexGran = info->log2GARTTexGran; - pRADEONDRI->gartTexOffset = info->gartTexStart; - pRADEONDRI->sarea_priv_offset = sizeof(drm_sarea_t); - - /* Don't release the lock now - let the VT switch handler do it. */ - - return 1; -} - - -/** - * \brief Get Radeon chip family from chipset number. - * - * \param info driver private data. - * - * \return non-zero on success, or zero on failure. - * - * Called by radeonInitFBDev() to set RADEONInfoRec::ChipFamily - * according to the value of RADEONInfoRec::Chipset. Fails if the - * chipset is unrecognized or not appropriate for this driver (i.e., not - * an r100 style radeon) - */ -static int get_chipfamily_from_chipset( RADEONInfoPtr info ) -{ - switch (info->Chipset) { - case PCI_CHIP_RADEON_LY: - case PCI_CHIP_RADEON_LZ: - info->ChipFamily = CHIP_FAMILY_M6; - break; - - case PCI_CHIP_RADEON_QY: - case PCI_CHIP_RADEON_QZ: - info->ChipFamily = CHIP_FAMILY_VE; - break; - - case PCI_CHIP_R200_QL: - case PCI_CHIP_R200_QN: - case PCI_CHIP_R200_QO: - case PCI_CHIP_R200_Ql: - case PCI_CHIP_R200_BB: - info->ChipFamily = CHIP_FAMILY_R200; - break; - - case PCI_CHIP_RV200_QW: /* RV200 desktop */ - case PCI_CHIP_RV200_QX: - info->ChipFamily = CHIP_FAMILY_RV200; - break; - - case PCI_CHIP_RADEON_LW: - case PCI_CHIP_RADEON_LX: - info->ChipFamily = CHIP_FAMILY_M7; - break; - - case PCI_CHIP_RV250_Id: - case PCI_CHIP_RV250_Ie: - case PCI_CHIP_RV250_If: - case PCI_CHIP_RV250_Ig: - info->ChipFamily = CHIP_FAMILY_RV250; - break; - - case PCI_CHIP_RV250_Ld: - case PCI_CHIP_RV250_Le: - case PCI_CHIP_RV250_Lf: - case PCI_CHIP_RV250_Lg: - info->ChipFamily = CHIP_FAMILY_M9; - break; - - case PCI_CHIP_RV280_Y_: - case PCI_CHIP_RV280_Ya: - case PCI_CHIP_RV280_Yb: - case PCI_CHIP_RV280_Yc: - info->ChipFamily = CHIP_FAMILY_RV280; - break; - - case PCI_CHIP_R300_ND: - case PCI_CHIP_R300_NE: - case PCI_CHIP_R300_NF: - case PCI_CHIP_R300_NG: - info->ChipFamily = CHIP_FAMILY_R300; - break; - - case PCI_CHIP_RV370_5460: - info->ChipFamily = CHIP_FAMILY_RV380; - break; - - default: - /* Original Radeon/7200 */ - info->ChipFamily = CHIP_FAMILY_RADEON; - } - - return 1; -} - - -/** - * \brief Initialize the framebuffer device mode - * - * \param disp display handle. - * - * \return one on success, or zero on failure. - * - * Fills in \p info with some default values and some information from \p disp - * and then calls RADEONScreenInit() for the screen initialization. - * - * Before exiting clears the framebuffer memory accessing it directly. - */ -static int radeonInitFBDev( driDisplay *disp, RADEONDRIPtr pRADEONDRI ) -{ - int err; - RADEONInfoPtr info = calloc(1, sizeof(*info)); - - disp->driverPrivate = (void *)info; - - info->gartFastWrite = RADEON_DEFAULT_AGP_FAST_WRITE; - info->gartSize = RADEON_DEFAULT_AGP_SIZE; - info->gartTexSize = RADEON_DEFAULT_AGP_TEX_SIZE; - info->bufSize = RADEON_DEFAULT_BUFFER_SIZE; - info->ringSize = RADEON_DEFAULT_RING_SIZE; - info->page_flip_enable = RADEON_DEFAULT_PAGE_FLIP; - - fprintf(stderr, - "Using %d MB AGP aperture\n", info->gartSize); - fprintf(stderr, - "Using %d MB for the ring buffer\n", info->ringSize); - fprintf(stderr, - "Using %d MB for vertex/indirect buffers\n", info->bufSize); - fprintf(stderr, - "Using %d MB for AGP textures\n", info->gartTexSize); - fprintf(stderr, - "page flipping %sabled\n", info->page_flip_enable?"en":"dis"); - - info->Chipset = disp->chipset; - - if (!get_chipfamily_from_chipset( info )) { - fprintf(stderr, "Unknown or non-radeon chipset -- cannot continue\n"); - fprintf(stderr, "==> Verify PCI BusID is correct in miniglx.conf\n"); - return 0; - } -#if 0 - if (info->ChipFamily >= CHIP_FAMILY_R300) { - fprintf(stderr, - "Direct rendering not yet supported on " - "Radeon 9700 and newer cards\n"); - return 0; - } -#endif - -#if 00 - /* don't seem to need this here */ - info->frontPitch = disp->virtualWidth; -#endif - - /* Check the radeon DRM version */ - if (!RADEONCheckDRMVersion(disp, info)) { - return 0; - } - - if (RADEONGetCardType(disp, info)<0) - return 0; - - if (disp->card_type!=RADEON_CARD_AGP) { - /* Initialize PCI */ - if (!RADEONDRIPciInit(disp, info)) - return 0; - } - else { - /* Initialize AGP */ - if (!RADEONDRIAgpInit(disp, info)) - return 0; - } - - if (!RADEONScreenInit( disp, info, pRADEONDRI)) - return 0; - - /* Initialize and start the CP if required */ - if ((err = drmCommandNone(disp->drmFD, DRM_RADEON_CP_START)) != 0) { - fprintf(stderr, "%s: CP start %d\n", __FUNCTION__, err); - return 0; - } - - return 1; -} - - -/** - * Create list of all supported surface configs, attach list to the display. - */ -static EGLBoolean -radeonFillInConfigs(_EGLDisplay *disp, unsigned pixel_bits, - unsigned depth_bits, - unsigned stencil_bits, GLboolean have_back_buffer) -{ - _EGLConfig *configs; - _EGLConfig *c; - unsigned int i, num_configs; - unsigned int depth_buffer_factor; - unsigned int back_buffer_factor; - GLenum fb_format; - GLenum fb_type; - - /* Right now GLX_SWAP_COPY_OML isn't supported, but it would be easy - * enough to add support. Basically, if a context is created with an - * fbconfig where the swap method is GLX_SWAP_COPY_OML, pageflipping - * will never be used. - */ - static const GLenum back_buffer_modes[] = { - GLX_NONE, GLX_SWAP_UNDEFINED_OML /*, GLX_SWAP_COPY_OML */ - }; - - uint8_t depth_bits_array[2]; - uint8_t stencil_bits_array[2]; - - depth_bits_array[0] = depth_bits; - depth_bits_array[1] = depth_bits; - - /* Just like with the accumulation buffer, always provide some modes - * with a stencil buffer. It will be a sw fallback, but some apps won't - * care about that. - */ - stencil_bits_array[0] = 0; - stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits; - - depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 2 : 1; - back_buffer_factor = (have_back_buffer) ? 2 : 1; - - num_configs = depth_buffer_factor * back_buffer_factor * 2; - - if (pixel_bits == 16) { - fb_format = GL_RGB; - fb_type = GL_UNSIGNED_SHORT_5_6_5; - } else { - fb_format = GL_RGBA; - fb_type = GL_UNSIGNED_INT_8_8_8_8_REV; - } - - configs = calloc(sizeof(*configs), num_configs); - c = configs; - if (!_eglFillInConfigs(c, fb_format, fb_type, - depth_bits_array, stencil_bits_array, - depth_buffer_factor, - back_buffer_modes, back_buffer_factor, - GLX_TRUE_COLOR)) { - fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", - __func__, __LINE__); - return EGL_FALSE; - } - - /* Mark the visual as slow if there are "fake" stencil bits. - */ - for (i = 0, c = configs; i < num_configs; i++, c++) { - int stencil = GET_CONFIG_ATTRIB(c, EGL_STENCIL_SIZE); - if ((stencil != 0) && (stencil != stencil_bits)) { - SET_CONFIG_ATTRIB(c, EGL_CONFIG_CAVEAT, EGL_SLOW_CONFIG); - } - } - - for (i = 0, c = configs; i < num_configs; i++, c++) - _eglAddConfig(disp, c); - - free(configs); - - return EGL_TRUE; -} - - -/** - * Show the given surface on the named screen. - * If surface is EGL_NO_SURFACE, disable the screen's output. - */ -static EGLBoolean -radeonShowScreenSurfaceMESA(_EGLDriver *drv, EGLDisplay dpy, EGLScreenMESA screen, - EGLSurface surface, EGLModeMESA m) -{ - EGLBoolean b = _eglDRIShowScreenSurfaceMESA(drv, dpy, screen, surface, m); - return b; -} - - -/** - * Called via eglInitialize() by user. - */ -static EGLBoolean -radeonInitialize(_EGLDriver *drv, EGLDisplay dpy, EGLint *major, EGLint *minor) -{ - __DRIframebuffer framebuffer; - driDisplay *display; - - /* one-time init */ - radeon_drm_page_size = getpagesize(); - - if (!_eglDRIInitialize(drv, dpy, major, minor)) - return EGL_FALSE; - - display = Lookup_driDisplay(dpy); - - framebuffer.dev_priv_size = sizeof(RADEONDRIRec); - framebuffer.dev_priv = malloc(sizeof(RADEONDRIRec)); - - /* XXX we shouldn't hard-code values here! */ - /* we won't know the screen surface size until the user calls - * eglCreateScreenSurfaceMESA(). - */ -#if 0 - display->virtualWidth = 1024; - display->virtualHeight = 768; -#else - display->virtualWidth = 1280; - display->virtualHeight = 1024; -#endif - display->bpp = 32; - display->cpp = 4; - - if (!_eglDRIGetDisplayInfo(display)) - return EGL_FALSE; - - framebuffer.base = display->pFB; - framebuffer.width = display->virtualWidth; - framebuffer.height = display->virtualHeight; - framebuffer.stride = display->virtualWidth; - framebuffer.size = display->fbSize; - radeonInitFBDev( display, framebuffer.dev_priv ); - - if (!_eglDRICreateDisplay(display, &framebuffer)) - return EGL_FALSE; - - if (!_eglDRICreateScreens(display)) - return EGL_FALSE; - - /* create a variety of both 32 and 16-bit configurations */ - radeonFillInConfigs(&display->Base, 32, 24, 8, GL_TRUE); - radeonFillInConfigs(&display->Base, 16, 16, 0, GL_TRUE); - - drv->Initialized = EGL_TRUE; - return EGL_TRUE; -} - - -/** - * The bootstrap function. Return a new radeonDriver object and - * plug in API functions. - */ -_EGLDriver * -_eglMain(_EGLDisplay *dpy) -{ - radeonDriver *radeon; - - radeon = (radeonDriver *) calloc(1, sizeof(*radeon)); - if (!radeon) { - return NULL; - } - - /* First fill in the dispatch table with defaults */ - _eglDRIInitDriverFallbacks(&radeon->Base); - - /* then plug in our radeon-specific functions */ - radeon->Base.API.Initialize = radeonInitialize; - radeon->Base.API.ShowScreenSurfaceMESA = radeonShowScreenSurfaceMESA; - - return &radeon->Base; -} diff --git a/radeon/server/radeon_reg.h b/radeon/server/radeon_reg.h index e81d7fd..1b33de1 100644 --- a/radeon/server/radeon_reg.h +++ b/radeon/server/radeon_reg.h @@ -1959,7 +1959,30 @@ #define RADEON_SE_ZBIAS_FACTOR 0x1db0 #define RADEON_SE_ZBIAS_CONSTANT 0x1db4 - +#define RADEON_SE_VTX_FMT 0x2080 +# define RADEON_SE_VTX_FMT_XY 0x00000000 +# define RADEON_SE_VTX_FMT_W0 0x00000001 +# define RADEON_SE_VTX_FMT_FPCOLOR 0x00000002 +# define RADEON_SE_VTX_FMT_FPALPHA 0x00000004 +# define RADEON_SE_VTX_FMT_PKCOLOR 0x00000008 +# define RADEON_SE_VTX_FMT_FPSPEC 0x00000010 +# define RADEON_SE_VTX_FMT_FPFOG 0x00000020 +# define RADEON_SE_VTX_FMT_PKSPEC 0x00000040 +# define RADEON_SE_VTX_FMT_ST0 0x00000080 +# define RADEON_SE_VTX_FMT_ST1 0x00000100 +# define RADEON_SE_VTX_FMT_Q1 0x00000200 +# define RADEON_SE_VTX_FMT_ST2 0x00000400 +# define RADEON_SE_VTX_FMT_Q2 0x00000800 +# define RADEON_SE_VTX_FMT_ST3 0x00001000 +# define RADEON_SE_VTX_FMT_Q3 0x00002000 +# define RADEON_SE_VTX_FMT_Q0 0x00004000 +# define RADEON_SE_VTX_FMT_BLND_WEIGHT_CNT_MASK 0x00038000 +# define RADEON_SE_VTX_FMT_N0 0x00040000 +# define RADEON_SE_VTX_FMT_XY1 0x08000000 +# define RADEON_SE_VTX_FMT_Z1 0x10000000 +# define RADEON_SE_VTX_FMT_W1 0x20000000 +# define RADEON_SE_VTX_FMT_N1 0x40000000 +# define RADEON_SE_VTX_FMT_Z 0x80000000 /* Registers for CP and Microcode Engine */ #define RADEON_CP_ME_RAM_ADDR 0x07d4 |