diff options
author | Richard Li <RichardZ.Li@amd.com> | 2009-04-17 14:30:13 -0400 |
---|---|---|
committer | Alex Deucher <alexdeucher@gmail.com> | 2009-04-17 14:30:13 -0400 |
commit | 042c0b2564fa13d7e44c43831ed3074a0d1dc73b (patch) | |
tree | 28f571ea5175fed52a0efc44a256b96c32f77600 | |
parent | becdd43ed9b0635b2a52f265c291bae043678b3d (diff) |
Initial 3D driver for R6xx/R7xx chips
This driver is not fully functional yet.
Authors:
Richard Li
Cooper Yuan
Matthias Hopf
50 files changed, 21663 insertions, 5 deletions
diff --git a/configure.ac b/configure.ac index cef21f9f5d..c0b7f1c5e8 100644 --- a/configure.ac +++ b/configure.ac @@ -672,7 +672,7 @@ if test "$mesa_driver" = dri; then # because there is no x86-64 system where they could *ever* # be used. if test "x$DRI_DIRS" = "xyes"; then - DRI_DIRS="i915 i965 mach64 mga r128 r200 r300 radeon \ + DRI_DIRS="i915 i965 mach64 mga r128 r200 r300 r600 radeon \ savage tdfx unichrome swrast" fi ;; @@ -680,13 +680,13 @@ if test "$mesa_driver" = dri; then # Build only the drivers for cards that exist on PowerPC. # At some point MGA will be added, but not yet. if test "x$DRI_DIRS" = "xyes"; then - DRI_DIRS="mach64 r128 r200 r300 radeon tdfx swrast" + DRI_DIRS="mach64 r128 r200 r300 r600 radeon tdfx swrast" fi ;; sparc*) # Build only the drivers for cards that exist on sparc` if test "x$DRI_DIRS" = "xyes"; then - DRI_DIRS="mach64 r128 r200 r300 radeon ffb swrast" + DRI_DIRS="mach64 r128 r200 r300 r600 radeon ffb swrast" fi ;; esac @@ -705,7 +705,7 @@ if test "$mesa_driver" = dri; then # ffb and gamma are missing because they have not been converted # to use the new interface. if test "x$DRI_DIRS" = "xyes"; then - DRI_DIRS="i810 i915 i965 mach64 mga r128 r200 r300 radeon tdfx \ + DRI_DIRS="i810 i915 i965 mach64 mga r128 r200 r300 r600 radeon tdfx \ unichrome savage sis swrast" fi ;; @@ -720,7 +720,7 @@ if test "$mesa_driver" = dri; then # default drivers if test "x$DRI_DIRS" = "xyes"; then - DRI_DIRS="i810 i915 i965 mach64 mga r128 r200 r300 radeon s3v \ + DRI_DIRS="i810 i915 i965 mach64 mga r128 r200 r300 r600 radeon s3v \ savage sis tdfx trident unichrome ffb swrast" fi diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile new file mode 100644 index 0000000000..c59843fd45 --- /dev/null +++ b/src/mesa/drivers/dri/r600/Makefile @@ -0,0 +1,67 @@ +# src/mesa/drivers/dri/r600/Makefile + +TOP = ../../../../.. +include $(TOP)/configs/current + +LIBNAME = r600_dri.so + +#MINIGLX_SOURCES = server/radeon_dri.c + +#ifeq ($(USING_EGL), 1) +#EGL_SOURCES = server/radeon_egl.c +#endif + +COMMON_SOURCES = \ + ../../common/driverfuncs.c \ + ../common/mm.c \ + ../common/utils.c \ + ../common/texmem.c \ + ../common/vblank.c \ + ../common/xmlconfig.c \ + ../common/dri_util.c + +DRIVER_SOURCES = \ + r600_context.c \ + r600_id.c \ + r600_screen.c \ + r600_span.c \ + r600_cmdbuf.c \ + r600_mem.c \ + r600_emit.c \ + r700_assembler.c \ + r700_fragprog.c \ + r700_oglprog.c \ + r700_shader.c \ + r700_shaderinst.c \ + r700_vertprog.c \ + r700_chip.c \ + r700_state.c \ + r700_tex.c \ + r700_render.c \ + r700_clear.c \ + r700_debug.c \ + $(EGL_SOURCES) + +C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES) + +DRIVER_DEFINES = + +SYMLINKS = \ + server/radeon_dri.h + +COMMON_SYMLINKS = + +##### TARGETS ##### + +include ../Makefile.template + +server: + mkdir -p server + +$(SYMLINKS): server + @[ -e $@ ] || ln -sf ../../radeon/$@ server/ + +$(COMMON_SYMLINKS): + @[ -e $@ ] || ln -sf ../radeon/$@ ./ + +symlinks: $(SYMLINKS) $(COMMON_SYMLINKS) diff --git a/src/mesa/drivers/dri/r600/defaultendian.h b/src/mesa/drivers/dri/r600/defaultendian.h new file mode 100644 index 0000000000..32caf32cd2 --- /dev/null +++ b/src/mesa/drivers/dri/r600/defaultendian.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + */ + + +#ifndef _DEFINEENDIAN_H_ +#define _DEFINEENDIAN_H_ + +//We have to choose a reg bits orientation if there is no compile flag for it. +#if defined(LITTLEENDIAN_CPU) +#elif defined(BIGENDIAN_CPU) +#else +#define LITTLEENDIAN_CPU +#endif + +#endif //_DEFINEENDIAN_H_ diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c new file mode 100644 index 0000000000..14b09d7bcc --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c @@ -0,0 +1,251 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com> + */ + +#include <errno.h> + +#include "mtypes.h" +#include "r600_common.h" +#include "r600_context.h" +#include "r600_lock.h" +#include "radeon_drm.h" +#include "imports.h" + +/* + * Allocate system memory for the command buffer and initialized the state + * this system memory will be filled with command and in r600FlushCmdBuffer + * it will be IOCTL to kernel module where kernel memory will be allocated + * and copy_from_user will be involked. + */ +void r600InitCmdBuf(context_t *context) +{ + int size; + /* Initialize command buffer */ + + size = 65536; /* hard code as (64*4)k, temp */ + context->cmdbuf.size = size; + context->cmdbuf.cmd_buf = (uint32_t *)CALLOC(size * 4); + context->cmdbuf.count_used = 0; + context->cmdbuf.count_reemit = 0; +} + +/* + * Make sure that enough space is available in the command buffer + * by flushing if necessary. + * + * \param dwords The number of dwords we need to be free on the command buffer + */ +static void r600EnsureCmdBufSpace(context_t *context, int dwords) +{ + assert(dwords < context->cmdbuf.size); + + if (context->cmdbuf.count_used + dwords > context->cmdbuf.size) + { + if(GL_TRUE == context->cmdbuf.bUseDMAasIndirect) + { + r600FlushIndirectBuffer(context); + } + else + { + r600FlushCmdBuffer(context); + } + } +} + +uint32_t *r600AllocCmdBuf(context_t *context, int dwords) +{ + uint32_t *ptr; + + r600EnsureCmdBufSpace(context, dwords); + + if (!context->cmdbuf.count_used) + { + fprintf(stderr, "Reemit state after flush %s\n", __FUNCTION__); + + /* TODO, if cmdbuf is not NULL, first emit old states */ + //r600EmitState(context); + } + + ptr = &context->cmdbuf.cmd_buf[context->cmdbuf.count_used]; + context->cmdbuf.count_used += dwords; + return ptr; +} + +void r600DestroyCmdBuf(context_t *context) +{ + FREE(context->cmdbuf.cmd_buf); +} + +/* + * flush command buffer to hw + */ +int r600FlushCmdBuffer(context_t *context) +{ + int ret; + drm_radeon_cmd_buffer_t cmd; + int start = 0; + + drmGetLock(context->fd, context->hwContext, DRM_LOCK_HELD); + + cmd.buf = (char *)(context->cmdbuf.cmd_buf + start); + cmd.bufsz = (context->cmdbuf.count_used - start) * 4; + cmd.nbox = 1; + /* + * TODO, temp allocate boxes here + */ + cmd.boxes = (struct drm_clip_rect *)CALLOC(sizeof(struct drm_clip_rect)); + cmd.boxes->x1 = 102; + cmd.boxes->y1 = 127; + cmd.boxes->x2 = 352; + cmd.boxes->y2 = 377; + + ret = drmCommandWrite(context->fd, DRM_RADEON_CMDBUF, &cmd, sizeof(cmd)); + + drmUnlock(context->fd, context->hwContext); + + context->cmdbuf.count_used = 0; + context->cmdbuf.count_reemit = 0; + + return ret; +} + +int r600InitIndirectBuffer(context_t *context) +{ + int i; + int iRet = 0; + drmDMAReq dma; + int nIndex; + int nSize; + + dma.context = context->hwContext; + dma.send_count = 0; + dma.request_count = 1; /* now we need one dma for indirect buffer */ + dma.request_size = 0x10000 * 4; /* 64k DWORDS*/ + dma.request_list = &nIndex; + dma.request_sizes = &nSize; + dma.flags = DRM_DMA_WAIT; + + for (i=0; i<2000000; i++) + { + drmGetLock(context->fd, context->hwContext, DRM_LOCK_READY); + + iRet = drmDMA(context->fd, &dma); + + if( iRet != -EBUSY ) + { + drmUnlock(context->fd,context); + + if(!iRet)break; + + return iRet; + } + + drmUnlock(context->fd, context->hwContext); + } + + if(2000000 == i) + { + /* drm always busy to map a buf entry for us */ + return -EBUSY; + } + + context->cmdbuf.size = (nSize & 0xFFFFFFC0)/4; + context->cmdbuf.cmd_buf = (unsigned int*)(context->screen->buffers->list[nIndex].address); + context->cmdbuf.count_used = 0; + context->cmdbuf.count_reemit = 0; + context->cmdbuf.nDMAindex = nIndex; + + return iRet; +} + +int r600FlushIndirectBuffer(context_t *context) +{ + int iRet; + unsigned int *dest; + drm_radeon_indirect_t ind; + + if(0 == context->cmdbuf.count_used) + { + return 0; + } + + if(context->cmdbuf.count_used < context->cmdbuf.size) + { /* not full cmd buf, need to check 16 DWORDs alignment. */ + dest = context->cmdbuf.cmd_buf + context->cmdbuf.count_used; + + while( (context->cmdbuf.count_used & 0xF) != 0 ) + { + *dest = 0x80000000; /* NOP */ + context->cmdbuf.count_used++; + }; + } + + ind.idx = context->cmdbuf.nDMAindex; + ind.start = 0; + ind.end = context->cmdbuf.count_used * 4; + ind.discard = 0; + + drmGetLock(context->fd, context->hwContext, DRM_LOCK_READY); + + iRet = drmCommandWriteRead(context->fd, + DRM_RADEON_INDIRECT, + &ind, + sizeof(drm_radeon_indirect_t)); + + drmUnlock(context->fd, context->hwContext); + + context->cmdbuf.count_used = 0; + context->cmdbuf.count_reemit = 0; + + return iRet; +} + +void r600Flush(GLcontext * ctx) +{ + int ret; + + R600_CONTEXT; + + if(GL_TRUE == context->cmdbuf.bUseDMAasIndirect) + { + ret = r600FlushIndirectBuffer(context); + } + else + { + ret = r600FlushCmdBuffer(context); + } + + if (ret) + { + fprintf(stderr, "drmRadeonCmdBuffer: %d\n", ret); + _mesa_exit(ret); + } +} + +void r600InitCmdBufferFuncs(struct dd_function_table *functions) +{ + functions->Flush = r600Flush; +} diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.h b/src/mesa/drivers/dri/r600/r600_cmdbuf.h new file mode 100644 index 0000000000..6c7516e68e --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com> + */ + +#ifndef __R600_CMDBUF_H__ +#define __R600_CMDBUF_H__ + +#include "r600_common.h" +#include "mtypes.h" + +extern int r600InitIndirectBuffer(context_t *context); +extern int r600FlushIndirectBuffer(context_t *context); + +extern void r600Flush(GLcontext * ctx); +extern void r600InitCmdBufferFuncs(struct dd_function_table *functions); +extern void r600InitCmdBuf(context_t * context); +extern void r600DestroyCmdBuf(context_t *context); +extern int r600FlushCmdBuffer(context_t *context); +extern uint32_t *r600AllocCmdBuf(context_t *context, int dwords); + +#endif /* __R600_CMDBUF_H__ */ diff --git a/src/mesa/drivers/dri/r600/r600_common.h b/src/mesa/drivers/dri/r600/r600_common.h new file mode 100644 index 0000000000..e5d17b585e --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_common.h @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _R600_COMMON_H +#define _R600_COMMON_H + +#include "glheader.h" + +#include <xf86drm.h> + + +extern int debug; + +#define DEBUG_FUNC do { if (debug >= 1) fprintf (stderr, "[r600] %s (%s:%d)\n", __func__, __FILE__, __LINE__); } while (0) +#define DEBUG_FUNCF(x...) do { if (debug >= 1) { fprintf (stderr, "[r600] %s (%s:%d) ", __func__, __FILE__, __LINE__); fprintf (stderr, x); } } while (0) +#define DEBUGF(x...) do { if (debug >= 1) { fprintf (stderr, "[r600] " x); } } while (0) +#define DEBUGP(x...) do { if (debug >= 1) { fprintf (stderr, "" x); } } while (0) + + +#if __SIZEOF_LONG__ == 8 +# define PRINTF_INT64 "%ld" +# define PRINTF_UINT64 "%lu" +# define PRINTF_UINT64_HEX "%010lx" /* Yes, 64bit need 16 characters, but typically 10 are enough (mostly addresses) */ +#else +# define PRINTF_INT64 "%lld" +# define PRINTF_UINT64 "%llu" +# define PRINTF_UINT64_HEX "%010llx" +#endif + +typedef int16_t bool_t; + +typedef struct screen_s screen_t; +typedef struct context_s context_t; + + +/* Preambles */ +#define R600_CONTEXT context_t *context = (context_t *) ctx->DriverCtx + +#define R700_CONTEXT(ctx) ((context_t *)(ctx->DriverCtx)) + +/* Reference to GPU address only (struct for easy upgrade to target_t/mem_t) */ +typedef struct { + uint64_t gpu; +} gpu_t; + +/* Memory area, addressable on both CPU and GPU */ +typedef struct { + uint64_t gpu; + void *cpu; + uint32_t size; +} mem_t; + +/* Render target */ +typedef struct { + uint64_t gpu; + void *cpu; + uint32_t size; /* total, in bytes */ + uint32_t pitch; /* in pixels */ +} target_t; + +/* Memory area, addressable on both CPU and GPU, mapped by DRM */ +typedef struct { + uint64_t gpu; + void *cpu; + drmSize size; + drm_handle_t handle; +} memmap_t; + +/* Buffer to be sent to DRM */ +typedef struct { + uint32_t *data; + int pos; /* Current writing pos in # of uint32_t */ + int size; /* Total size in # of uint32_t */ + uint32_t fence; /* !=0: wait for fence before reuse */ +} buffer_t; + +/* Color formats */ +typedef struct { + float a, r, g, b; +} color_f_t; + +typedef union { + struct { + uint8_t a, r, g, b; /* TODO: probably not endian aware */ + } u8; + uint32_t u32; +} color_i_t; + + +/* Emit commands to set according to state. Only if state is active, though. + * May verify for (partially) changed state unless force == 1 (context lost). */ +typedef void (*emit_t) (context_t *context, void *state, int force); + + +#endif /* _R600_COMMON_H */ diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c new file mode 100644 index 0000000000..f84b6d7fa7 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -0,0 +1,453 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Matthias Hopf + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com> + */ + +#include "r600_context.h" +#include "r600_span.h" +#include "r600_mem.h" + +#include "utils.h" +#include "drivers/common/driverfuncs.h" +#include "drm_sarea.h" /* r600CopyBuffer */ + +#include "tnl/tnl.h" +#include "tnl/t_pipeline.h" +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "api_arrayelt.h" +#include "extensions.h" +#include "state.h" + +#include "r700_interface.h" + +#include "simple_list.h" +#include "xmlpool.h" + +#define need_GL_EXT_stencil_two_side +#define need_GL_ARB_multisample +#define need_GL_ARB_point_parameters +#define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object +#define need_GL_ARB_vertex_program +#define need_GL_EXT_blend_minmax +//#define need_GL_EXT_fog_coord +#define need_GL_EXT_multi_draw_arrays +#define need_GL_EXT_secondary_color +#define need_GL_EXT_blend_equation_separate +#define need_GL_EXT_blend_func_separate +#define need_GL_EXT_gpu_program_parameters +#define need_GL_NV_vertex_program +#include "extension_helper.h" + +/* richard TODO */ +extern void GLAPIENTRY +_mesa_ProgramStringARB(GLenum target, GLenum format, GLsizei len, + const GLvoid *string); +/*-----------------*/ + +// TODO : copied from r300_context.c, need verify for r6/r7 +const struct dri_extension r600_card_extensions[] = { + /* *INDENT-OFF* */ + {"GL_ARB_depth_texture", NULL}, + {"GL_ARB_fragment_program", NULL}, + {"GL_ARB_multisample", NULL}, + {"GL_ARB_multitexture", NULL}, + {"GL_ARB_point_parameters", NULL}, + {"GL_ARB_shadow", NULL}, + {"GL_ARB_shadow_ambient", NULL}, + {"GL_ARB_texture_border_clamp", NULL}, + {"GL_ARB_texture_compression", NULL}, + {"GL_ARB_texture_cube_map", NULL}, + {"GL_ARB_texture_env_add", NULL}, + {"GL_ARB_texture_env_combine", NULL}, + {"GL_ARB_texture_env_crossbar", NULL}, + {"GL_ARB_texture_env_dot3", NULL}, + {"GL_ARB_texture_mirrored_repeat", NULL}, + {"GL_ARB_vertex_buffer_object", NULL}, + {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions}, + {"GL_EXT_blend_equation_separate", NULL}, + {"GL_EXT_blend_func_separate", NULL}, + {"GL_EXT_blend_minmax", NULL}, + {"GL_EXT_blend_subtract", NULL}, +// {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, + {"GL_EXT_multi_draw_arrays", NULL}, + {"GL_EXT_gpu_program_parameters", NULL}, + {"GL_EXT_secondary_color", NULL}, + {"GL_EXT_shadow_funcs", NULL}, + {"GL_EXT_stencil_two_side", NULL}, + {"GL_EXT_stencil_wrap", NULL}, + {"GL_EXT_texture_edge_clamp", NULL}, + {"GL_EXT_texture_env_combine", NULL}, + {"GL_EXT_texture_env_dot3", NULL}, + {"GL_EXT_texture_filter_anisotropic", NULL}, + {"GL_EXT_texture_lod_bias", NULL}, + {"GL_EXT_texture_mirror_clamp", NULL}, + {"GL_EXT_texture_rectangle", NULL}, + {"GL_ATI_texture_env_combine3", NULL}, + {"GL_ATI_texture_mirror_once", NULL}, + {"GL_MESA_pack_invert", NULL}, + {"GL_MESA_ycbcr_texture", NULL}, + {"GL_MESAX_texture_float", NULL}, + {"GL_NV_blend_square", NULL}, + {"GL_NV_vertex_program", NULL}, + {"GL_SGIS_generate_mipmap", NULL}, + {"GL_SGIX_depth_texture", NULL}, + {NULL, NULL} + /* *INDENT-ON* */ +}; + +extern void r600EmitShader( GLcontext * ctx, + struct r600_dma_region *rvb, + GLvoid * data, + int sizeinDWORD); +extern void r600FreeDmaRegion(context_t *context, + struct r600_dma_region *region); +extern void r600EmitVec(GLcontext * ctx, + struct r600_dma_region *rvb, + GLvoid * data, + int size, + int stride, + int count); +extern void r600ReleaseArrays(GLcontext * ctx); +extern int r600FlushCmdBuffer(context_t *context); +extern void r600MemUse(context_t *context, int id); + +extern int r600FlushIndirectBuffer(context_t *context); + +extern const struct tnl_pipeline_stage *r600_pipeline[]; + +/* glGetString() */ +static const GLubyte * +r600GetString (GLcontext *ctx, GLenum name) +{ + R600_CONTEXT; + + switch (name) { + case GL_VENDOR: + return (GLubyte *) "rhd DRI project"; + case GL_RENDERER: + switch (context->screen->chip.type & CHIP_TYPE_ARCH_MASK) { + case CHIP_TYPE_ARCH_R6xx: + return (GLubyte *) "R6xx"; + case CHIP_TYPE_ARCH_R7xx: + return (GLubyte *) "R7xx"; + default: + assert (0); + } + } + return NULL; +} + +/* dummy for now. TODO : tex lbj delete func. */ +void rDestroyTexObj(void * driverContext, driTextureObject * t) +{ + return; +} + +/* + * API + */ + +GLboolean +r600CreateContext (const __GLcontextModes *glVisual, + __DRIcontextPrivate *driContextPriv, + void *sharedContextPriv) +{ + __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; + screen_t *screen = (screen_t *) sPriv->private; + context_t *context; + GLcontext *ctx, *sharedCtx; + struct dd_function_table functions; + + unsigned int ui; + + assert (glVisual); + assert (screen); + + /* Allocate context */ + if (! (context = CALLOC (sizeof(*context))) ) + return GL_FALSE; + + /* Init default driver functions */ + _mesa_init_driver_functions (&functions); + + context->cmdbuf.bUseDMAasIndirect = GL_FALSE; + + r700InitChipObject(context); /* let the eag... */ + + (context->chipobj.InitFuncs)(&functions); + + context->chipobj.EmitShader = r600EmitShader; + context->chipobj.FreeDmaRegion = r600FreeDmaRegion; + context->chipobj.EmitVec = r600EmitVec; + context->chipobj.ReleaseArrays = r600ReleaseArrays; + context->chipobj.LoadMemSurf = r600LoadMemSurf; + context->chipobj.AllocMemSurf = r600AllocMemSurf; + if(GL_TRUE == context->cmdbuf.bUseDMAasIndirect) + { + context->chipobj.FlushCmdBuffer = r600FlushIndirectBuffer; + } + else + { + context->chipobj.FlushCmdBuffer = r600FlushCmdBuffer; + } + context->chipobj.MemUse = r600MemUse; + + for(ui=0; ui<R600_MAX_AOS_ARRAYS; ui++) + { + context->aos[ui].buf = NULL; + } + + /* Fill in additional standard functions. */ + functions.GetString = r600GetString; + + r600MemInit(context); + + /* Allocate and initialize the Mesa context */ + if (sharedContextPriv) + sharedCtx = ((context_t *)sharedContextPriv)->ctx; + else + sharedCtx = NULL; + + if (! (ctx = _mesa_create_context (glVisual, sharedCtx, + &functions, context)) ) + return GL_FALSE; + + context->ctx = ctx; + context->screen = screen; + context->fd = sPriv->fd; + context->hwContext = driContextPriv->hHWContext; + context->hwLock = &sPriv->pSAREA->lock; + context->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA + + screen->sareaPrivOffset); + + if( (screen->chip.type & CHIP_TYPE_ARCH_MASK) == CHIP_TYPE_ARCH_R7xx) + { + /* use vram only for now */ + context->nr_heaps = 1; + (void)memset(context->texture_heaps, 0, sizeof(context->texture_heaps)); + make_empty_list(&context->swapped); + context->texture_heaps[RADEON_LOCAL_TEX_HEAP] = + driCreateTextureHeap(RADEON_LOCAL_TEX_HEAP, + (void*)context, + (unsigned)screen->texSize[RADEON_LOCAL_TEX_HEAP], + 8, /* 256 bytes aligned */ + RADEON_NR_TEX_REGIONS, /* 64 regions */ + (drmTextureRegionPtr)&context->sarea->tex_list[RADEON_LOCAL_TEX_HEAP], + (unsigned *)&context->sarea->tex_age[RADEON_LOCAL_TEX_HEAP], + (driTextureObject *)&context->swapped, + (unsigned)(context->chipobj.GetTexObjSize)(), + (destroy_texture_object_t *)rDestroyTexObj); + } + + ctx->Const.MinPointSize = 0x0001 / 8.0; /* PA_SU_POINT_SIZE: 12.4 bits radius */ + ctx->Const.MaxPointSize = 0xffff / 8.0; + ctx->Const.MinPointSizeAA = 0x0001 / 8.0; + ctx->Const.MaxPointSizeAA = 0xffff / 8.0; + ctx->Const.PointSizeGranularity = 0x0001 / 8.0; + ctx->Const.MinLineWidth = 0x0001 / 8.0; /* PA_SU_LINE_CNTL: 12.4 bits halfwidth */ + ctx->Const.MaxLineWidth = 0xffff / 8.0; + ctx->Const.MinLineWidthAA = 0x0001 / 8.0; + ctx->Const.MaxLineWidthAA = 0xffff / 8.0; + ctx->Const.LineWidthGranularity = 0x0001 / 8.0; + + /* TODO : driQueryOptioni, and check r6/r7 caps */ + context->texture_depth = DRI_CONF_TEXTURE_DEPTH_32; + ctx->Const.MaxTextureLevels = 12; /**< Maximum number of allowed mipmap levels. */ + ctx->Const.Max3DTextureLevels = 9; /**< Maximum number of allowed mipmap levels for 3D texture targets. */ + ctx->Const.MaxCubeTextureLevels = 12; /**< Maximum number of allowed mipmap levels for GL_ARB_texture_cube_map */ + ctx->Const.MaxArrayTextureLayers = 64; /**< Maximum number of layers in an array texture. */ + ctx->Const.MaxTextureRectSize = 2048; /* GL_NV_texture_rectangle */ + ctx->Const.MaxTextureCoordUnits = 8; + ctx->Const.MaxTextureImageUnits = 8; + ctx->Const.MaxTextureUnits = 8; /* = MIN(CoordUnits, ImageUnits) */ + ctx->Const.MaxTextureMaxAnisotropy = 16; /* GL_EXT_texture_filter_anisotropic */ + ctx->Const.MaxTextureLodBias = 0; /* GL_EXT_texture_lod_bias */ + ctx->Const.MaxTextureLevels = 12; /* Max width is 8192, but max pitch is 16k -> max 4096 */ + ctx->Const.MaxTextureRectSize = 2048; + +#if 0 + GLuint MaxClipPlanes; + GLuint MaxLights; + GLfloat MaxShininess; /* GL_NV_light_max_exponent */ + GLfloat MaxSpotExponent; /* GL_NV_light_max_exponent */ + struct gl_program_constants VertexProgram; /* GL_ARB_vertex_program */ + struct gl_program_constants FragmentProgram; /* GL_ARB_fragment_program */ + /* shared by vertex and fragment program: */ + GLuint MaxProgramMatrices; + GLuint MaxProgramMatrixStackDepth; + /* vertex array / buffer object bounds checking */ + GLboolean CheckArrayBounds; + /* GL_ARB_draw_buffers */ + GLuint MaxDrawBuffers; + /* GL_OES_read_format */ + GLenum ColorReadFormat; + GLenum ColorReadType; + /* GL_EXT_framebuffer_object */ + GLuint MaxColorAttachments; + GLuint MaxRenderbufferSize; + /* GL_ARB_vertex_shader */ + GLuint MaxVertexTextureImageUnits; + GLuint MaxVarying; +#endif + + /* Initialize the software rasterizer and helper modules */ + _swrast_CreateContext (ctx); + _vbo_CreateContext (ctx); + _tnl_CreateContext (ctx); + _swsetup_CreateContext (ctx); + _swsetup_Wakeup (ctx); + _ae_create_context (ctx); + + /* Install the customized pipeline */ + _tnl_destroy_pipeline (ctx); + _tnl_install_pipeline(ctx, (const struct tnl_pipeline_stage **)(context->chipobj.stages)); + + /* Configure swrast and TNL to match hardware characteristics */ + _swrast_allow_pixel_fog(ctx, GL_FALSE); + _swrast_allow_vertex_fog(ctx, GL_TRUE); + _tnl_allow_pixel_fog(ctx, GL_FALSE); + _tnl_allow_vertex_fog(ctx, GL_TRUE); + + /* TODO: vertex programs */ + ctx->Const.VertexProgram.MaxInstructions = 1024; + ctx->Const.VertexProgram.MaxNativeInstructions = 1024; + ctx->Const.VertexProgram.MaxNativeAttribs = 16; //For init run + ctx->Const.VertexProgram.MaxTemps = 223; //256 - 1(R0) - 16 ins -16 outs + ctx->Const.VertexProgram.MaxNativeTemps = 223; + ctx->Const.VertexProgram.MaxNativeParameters = 256; + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; + + ctx->Const.FragmentProgram.MaxNativeTemps = 232; //256 - 9 outs - 15 ins + ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* why? copy i915... */ + ctx->Const.FragmentProgram.MaxNativeParameters = 256; + ctx->Const.FragmentProgram.MaxNativeAluInstructions = 1024; + ctx->Const.FragmentProgram.MaxNativeTexInstructions = 64; //ONLY limited by inst, almost no + ctx->Const.FragmentProgram.MaxNativeInstructions = 1024; + ctx->Const.FragmentProgram.MaxNativeTexIndirections = 4; //PFS_MAX_TEX_INDIRECT; //Need check + ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */ + + _tnl_ProgramCacheInit(ctx); + + ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; + + driInitExtensions (ctx, r600_card_extensions, GL_FALSE); + + r600InitSpanFuncs(ctx); + + if(GL_TRUE == context->cmdbuf.bUseDMAasIndirect) + { + r600InitIndirectBuffer(context); + } + else + { + r600InitCmdBuf(context); + } +/* r600InitStateObjects (context); */ + + TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; + + driContextPriv->driverPrivate = context; + + /* richard TODO */ + ctx->CurrentDispatch->ProgramStringARB = _mesa_ProgramStringARB; + /*------------- */ + + return GL_TRUE; +} + +void +r600DestroyContext (__DRIcontextPrivate * driContextPriv) +{ + GET_CURRENT_CONTEXT (ctx); + context_t *context = (context_t *) driContextPriv->driverPrivate; + + DEBUG_FUNC; + + if (ctx == context->ctx) { + // TODO: Flush + _mesa_make_current (NULL, NULL, NULL); + } + + r600DestroyCmdBuf(context); + /* TODO: texture release, gart allocs, cmd bufs, cliprects, option cache */ + r600MemDestroy(context); + + (context->chipobj.DestroyChipObj)(context->chipobj.pvChipObj); + + FREE (context); +} + +GLboolean +r600MakeCurrent (__DRIcontextPrivate *driContextPriv, + __DRIdrawablePrivate *driDrawPriv, + __DRIdrawablePrivate *driReadPriv) +{ + context_t *context = (context_t *) driContextPriv->driverPrivate; + + DEBUG_FUNC; + DEBUGF("Context %p Draw %p Read %p\n", driContextPriv, driDrawPriv, driReadPriv); + + if (! driContextPriv) { + _mesa_make_current (NULL, NULL, NULL); + return GL_TRUE; + } + + /* TODO: set VBlank */ + + if (context->currentDraw != driDrawPriv || + context->lastStamp != driDrawPriv->lastStamp) { +// context->currentDraw = driDrawPriv; +// r600SetCliprects (context); +// r600UpdateViewportOffset (context); /* TODO: */ + } + + context->currentDraw = driDrawPriv; + context->currentRead = driReadPriv; + + /* Got drawable, init chip context states. */ + (context->chipobj.InitState)(context->ctx); + + _mesa_make_current (context->ctx, + (GLframebuffer *) driDrawPriv->driverPrivate, + (GLframebuffer *) driReadPriv->driverPrivate); + + _mesa_update_state (context->ctx); + + return GL_TRUE; +} + +GLboolean +r600UnbindContext (__DRIcontextPrivate * driContextPriv) +{ + DEBUG_FUNC; + + return GL_TRUE; +} + + diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h new file mode 100644 index 0000000000..445fa1f10d --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -0,0 +1,411 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _R600_CONTEXT_H +#define _R600_CONTEXT_H + +#include "r600_common.h" +#include "r600_screen.h" + +#include "mtypes.h" +#include "radeon_drm.h" + +#include "texmem.h" + +#include "main/dd.h" +#include "tnl/t_pipeline.h" + + +/* From http://gcc.gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html . + I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble + with other compilers ... GLUE! +*/ +#define WARN_ONCE(a, ...) \ +{ \ + static int warn##__LINE__=1; \ + if(warn##__LINE__) \ + { \ + fprintf(stderr, "**********WARN_ONCE******************\n"); \ + fprintf(stderr, "File %s function %s line %d\n", __FILE__, \ + __FUNCTION__, __LINE__); \ + fprintf(stderr, a, ## __VA_ARGS__); \ + fprintf(stderr, "*************************************\n"); \ + warn##__LINE__=0; \ + } \ +} + +#if defined(USE_X86_ASM) +#define COPY_DWORDS( dst, src, nr ) \ + do { \ + int __tmp; \ + __asm__ __volatile__( "rep ; movsl" \ + : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \ + : "0" (nr), \ + "D" ((long)dst), \ + "S" ((long)src) ); \ + } while (0) +#else +#define COPY_DWORDS( dst, src, nr ) \ + do { \ + int j; \ + for ( j = 0 ; j < nr ; j++ ) \ + dst[j] = ((int *)src)[j]; \ + dst += nr; \ + } while (0) +#endif + +#define GET_START(dr) \ + (r600GARTOffsetFromVirtual(context, (dr)->address + (dr)->start)) + +#define R600_FALLBACK_NONE 0 +#define R600_FALLBACK_TCL 1 +#define R600_FALLBACK_RAST 2 + +/* TODO: textures */ +/* Pretty evil + simple shortcut - max. numbers of textures used simultaneously */ +#define NUM_TEXTURES 16 + + + +/* + * State object. + * State is concentrated into these objects, and only submitted to the HW if necessary. + */ + +/* TODO: verify what is necessary, and what can be derived from GLcontext trivially */ +/* state: render targets */ +typedef struct { + target_t rt; + target_t depth; + color_f_t clear_col; + float clear_depth; +} state_target_t; + +/* state: viewport + scissors */ +typedef struct { +} state_viewport_t; + +/* state: modelview, projection, inverse modelview matrices */ +typedef struct { +} state_matrices_t; + +/* state: depth, alpha, stencil tests */ +typedef struct { + int depth_func; /* something's wrong here, still */ + int alpha_func; /* TODO: see XXX */ + int stencil_func; + float alpha_ref; + uint32_t stencil_ref; +} state_tests_t; + +/* state: blending mode */ +typedef struct { + int blend_mode; /* TODO: see XXX */ + color_f_t blend_col; +} state_blend_t; + +/* state: vertex submission format */ +/* doesn't emit any commands, but is updated by glColor() etc., + * cleared by glBegin(), and used by glEnd(). */ +typedef struct { + /* unless we know how to reliably submit integer coords, + * we are always float for coords, and possibly uint8_t for color */ + int vert_dim; /* dimensionality: 2, 3, or 4 */ + bool_t normals; /* always 3-dimensional */ + bool_t colors; /* always 4-dimensional */ + bool_t colors_float; + int tex_dim[NUM_TEXTURES]; /* dimensionality: 1, 2, 3, or 4 */ +} state_vtx_format_t; + +/* Vertex buffer / vtx resource */ +typedef struct +{ + int id; + uint32_t vb_addr; + uint32_t vtx_num_entries; + uint32_t vtx_size_dw; + int clamp_x; + int format; + int num_format_all; + int format_comp_all; + int srf_mode_all; + int endian; + int mem_req_size; +} vtx_resource_t; + +/* Shader */ +typedef struct +{ + uint32_t shader_addr; + int num_gprs; + int stack_size; + int dx10_clamp; + int fetch_cache_lines; + int clamp_consts; + int export_mode; + int uncached_first_inst; +} shader_config_t; + +/* Draw command */ +typedef struct +{ + uint32_t prim_type; + uint32_t vgt_draw_initiator; + uint32_t index_type; + uint32_t num_instances; + uint32_t num_indices; +} draw_config_t; + +/* Color buffer / render target */ +typedef struct +{ + int id; + int w; + int h; + uint32_t base; + int format; + int endian; + int array_mode; // tiling + int number_type; + int read_size; + int comp_swap; + int tile_mode; + int blend_clamp; + int clear_color; + int blend_bypass; + int blend_float32; + int simple_float; + int round_mode; + int tile_compact; + int source_format; +} cb_config_t; + +#if 0 /* TODO: textures */ +/* state: texture resource */ +typedef struct { + /* TODO: */ +} state_tex_resource_t; + +/* state: texture sampler */ +typedef struct { + /* TODO: */ +} state_tex_sampler_t; +#endif + +/* + * TODO: Missing: a lot, e.g. line, point modes, edges, stippling, shaders, vertex buffers, + * user clipplanes, fog, dithering, stencil ... + */ + +struct r600_cmdbuf +{ + int size; /* DWORDS allocated for buffer */ + uint32_t *cmd_buf; /* pointer to command buffer */ + int count_used; /* DWORDS filled so far */ + int count_reemit; /* size of re-emission batch */ + + int nDMAindex; + GLboolean bUseDMAasIndirect; +}; + +struct r600_dma_buffer +{ + int refcount; /* the number of retained regions in buf */ + drmBufPtr buf; + int id; +}; + +/* + * A retained region, eg vertices for indexed vertices. + */ +struct r600_dma_region +{ + struct r600_dma_buffer *buf; + char *address; /* == buf->address */ + int start; + int end; + int ptr; /* offsets from start of buf */ + + int aos_offset; /* address in GART memory */ + int aos_stride; /* distance between elements, in dwords */ + int aos_size; /* number of components (1-4) */ +}; + + +struct r600_dma +{ + /* + * Active dma region. Allocations for vertices and retained + * regions come from here. Also used for emitting random vertices, + * these may be flushed by calling flush_current(); + */ + struct r600_dma_region current; + + void (*flush) (context_t *context); + + char *buf0_address; /* start of buf[0], for index calcs */ + + /* + * Number of "in-flight" DMA buffers, i.e. the number of buffers + * for which a DISCARD command is currently queued in the command buffer. + */ + GLuint nr_released_bufs; +}; + +typedef struct chip_object +{ + void *pvChipObj; + + /* ------------ OUT ------------------- */ + GLboolean (*DestroyChipObj)(void* pvChipObj); + + void (*InitFuncs)(struct dd_function_table *functions); + + void (*InitState)(GLcontext * ctx); + + GLuint (*GetTexObjSize)(void); + + /* ------------ IN ------------------- */ + void (*EmitShader)( GLcontext * ctx, + struct r600_dma_region *rvb, + GLvoid * data, + int sizeinDWORD); + void (*FreeDmaRegion)( GLcontext * ctx, + struct r600_dma_region *region); + void (*EmitVec)(GLcontext * ctx, + struct r600_dma_region *rvb, + GLvoid * data, + int size, + int stride, + int count); + void (*MemUse)(struct context_s *context, int id); + void (*ReleaseArrays)(GLcontext * ctx); + int (*FlushCmdBuffer)(GLcontext * ctx); + GLboolean (*LoadMemSurf)(context_t *context, + GLuint dst_offset, /* gpu addr */ + GLuint dst_pitch_in_pixel, + GLuint src_width_in_pixel, + GLuint height, + GLuint byte_per_pixel, + unsigned char* pSrc); /* source data */ + GLboolean (*AllocMemSurf)(context_t *context, + void **ppmemBlock, + void **ppheap, + GLuint *prefered_heap, + GLuint totalSize); + + struct tnl_pipeline_stage **stages; +} chip_object; + +#define R600_MAX_AOS_ARRAYS 32 /* TODO : should be VERT_ATTRIB_MAX */ + +/* + * Context + */ +struct context_s { + GLcontext *ctx; /* Mesa context */ + screen_t *screen; + GLboolean lost_context; + + int fd; /* DRM's fd == context->screen->driScreen->fd */ + drm_context_t hwContext; + drm_hw_lock_t *hwLock; + + __DRIdrawablePrivate *currentDraw; + __DRIdrawablePrivate *currentRead; + unsigned int lastStamp; + + /* dma buffer */ + struct r600_dma dma; + + /* user space command buffer */ + struct r600_cmdbuf cmdbuf; + struct r600_mem_manager *memManager; + + /* Drawable, cliprect and scissor information */ + int numClipRects; /* Cliprects for the draw buffer */ + drm_clip_rect_t *pClipRects; + bool_t reloadContext; + drm_radeon_sarea_t *sarea; /* Private SAREA data */ + + /* State */ + state_target_t target; + state_viewport_t viewport; + state_matrices_t matrices; + state_tests_t tests; + state_blend_t blend; + +#if 0 /* TODO: textures */ + state_tex_resource_t tex_resource[NUM_TEXTURES]; /* up to SQ_TEX_RESOURCE_ps_num */ + state_tex_sampler_t tex_sampler[NUM_TEXTURES]; /* up to SQ_TEX_SAMPLER_WORD_ps_num */ +#endif + + /* For vertex shader */ + vtx_resource_t vtx_res[16]; + state_vtx_format_t vtx_format; + shader_config_t vs_config[8]; + shader_config_t ps_config[8]; + shader_config_t fs_config[8]; + + chip_object chipobj; + + GLuint NewGLState; + + struct r600_dma_region aos[R600_MAX_AOS_ARRAYS]; + int aos_count; + + GLboolean bEnablePerspective; + + GLint vport_x; + GLint vport_y; + GLsizei vport_width; + GLsizei vport_height; + + /* TEXTURE */ + int texture_depth; /* driQueryOption */ + driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS]; + driTextureObject swapped; + unsigned nr_heaps; +}; + + +/* + * Functions + */ + +GLboolean +r600CreateContext (const __GLcontextModes *glVisual, + __DRIcontextPrivate *driContextPriv, + void *sharedContextPriv); +void +r600DestroyContext (__DRIcontextPrivate * driContextPriv); + +GLboolean +r600MakeCurrent (__DRIcontextPrivate *driContextPriv, + __DRIdrawablePrivate *driDrawPriv, + __DRIdrawablePrivate *driReadPriv); + +GLboolean +r600UnbindContext (__DRIcontextPrivate * driContextPriv); + + +#endif /* _R600_SCREEN_H */ diff --git a/src/mesa/drivers/dri/r600/r600_emit.c b/src/mesa/drivers/dri/r600/r600_emit.c new file mode 100644 index 0000000000..265dcf6a74 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_emit.c @@ -0,0 +1,220 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com> + */ + +#include "glheader.h" +#include "mtypes.h" + +#include "r600_context.h" +#include "r600_mem.h" + +static uint32_t r600EmitData(context_t *context, struct r600_dma_region *dmaRegion, + GLvoid *data, int size, int stride, int count) +{ + uint32_t addr; + int *out = (int *)(dmaRegion->address + dmaRegion->start); + addr = out; + + dmaRegion->aos_offset = GET_START(dmaRegion); + dmaRegion->aos_stride = size; + + COPY_DWORDS(out, data, count); + + addr = addr - (uint32_t)context->screen->gartTextures.cpu + (uint32_t)context->screen->gartTextures.gpu; + return addr; +} + +static void r600EmitVec4(GLcontext * ctx, + struct r600_dma_region *rvb, + GLvoid * data, + int stride, + int count) +{ + int i; + int *out = (int *)(rvb->address + rvb->start); + + if (stride == 4) + { + COPY_DWORDS(out, data, count); + } + else + { + for (i = 0; i < count; i++) + { + out[0] = *(int *)data; + out++; + data += stride; + } + } +} + +static void r600EmitVec8(GLcontext * ctx, + struct r600_dma_region *rvb, + GLvoid * data, + int stride, + int count) +{ + int i; + int *out = (int *)(rvb->address + rvb->start); + + if (stride == 8) + { + COPY_DWORDS(out, data, count * 2); + } + else + { + for (i = 0; i < count; i++) + { + out[0] = *(int *)data; + out[1] = *(int *)(data + 4); + out += 2; + data += stride; + } + } +} + +static void r600EmitVec12(GLcontext * ctx, + struct r600_dma_region *rvb, + GLvoid * data, + int stride, + int count) +{ + int i; + int *out = (int *)(rvb->address + rvb->start); + + if (stride == 12) + { + COPY_DWORDS(out, data, count * 3); + } + else + { + for (i = 0; i < count; i++) + { + out[0] = *(int *)data; + out[1] = *(int *)(data + 4); + out[2] = *(int *)(data + 8); + out += 3; + data += stride; + } + } +} + +static void r600EmitVec16(GLcontext * ctx, + struct r600_dma_region *rvb, + GLvoid * data, + int stride, + int count) +{ + int i; + int *out = (int *)(rvb->address + rvb->start); + + if (stride == 16) + { + COPY_DWORDS(out, data, count * 4); + } + else + { + for (i = 0; i < count; i++) + { + out[0] = *(int *)data; + out[1] = *(int *)(data + 4); + out[2] = *(int *)(data + 8); + out[3] = *(int *)(data + 12); + out += 4; + data += stride; + } + } +} + +void r600EmitShader(GLcontext * ctx, + struct r600_dma_region *rvb, + GLvoid * data, + int sizeinDWORD) +{ + R600_CONTEXT; + + r600AllocDmaRegion(context, rvb, sizeinDWORD * 4, 256); + rvb->aos_offset = GET_START(rvb); + rvb->aos_stride = 0; + + r600EmitVec4(ctx, rvb, data, 4, sizeinDWORD); +} + +void r600EmitVec(GLcontext * ctx, + struct r600_dma_region *rvb, + GLvoid * data, + int size, + int stride, + int count) +{ + R600_CONTEXT; + + if (stride == 0) + { + r600AllocDmaRegion(context, rvb, size * count * 4, 4); + + rvb->aos_offset = GET_START(rvb); + rvb->aos_stride = 0; + } + else + { + r600AllocDmaRegion(context, rvb, size * count * 4, 4); + rvb->aos_offset = GET_START(rvb); + rvb->aos_stride = size; + } + + switch (size) + { + case 1: + r600EmitVec4(ctx, rvb, data, stride, count); + break; + case 2: + r600EmitVec8(ctx, rvb, data, stride, count); + break; + case 3: + r600EmitVec12(ctx, rvb, data, stride, count); + break; + case 4: + r600EmitVec16(ctx, rvb, data, stride, count); + break; + default: + assert(0); + break; + } +} + +void r600ReleaseArrays(GLcontext * ctx) +{ + R600_CONTEXT; + + int i; + + for (i = 0; i < context->aos_count; i++) + { + r600FreeDmaRegion(context, &(context->aos[i])); + } +} + diff --git a/src/mesa/drivers/dri/r600/r600_id.c b/src/mesa/drivers/dri/r600/r600_id.c new file mode 100644 index 0000000000..4d07f70def --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_id.c @@ -0,0 +1,129 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "r600_id.h" + +chip_t r600_chips[] = { + { 0x9400, CHIP_TYPE_R600, "R600: Radeon HD 2900 XT", 0 }, + { 0x9401, CHIP_TYPE_R600, "R600: Radeon HD 2900 XT", 0 }, + { 0x9402, CHIP_TYPE_R600, "R600: Radeon HD 2900 XT", 0 }, + { 0x9403, CHIP_TYPE_R600, "R600: Radeon HD 2900 Pro", 0 }, + { 0x9405, CHIP_TYPE_R600, "R600: Radeon HD 2900 GT", 0 }, + { 0x940A, CHIP_TYPE_R600, "R600: FireGL V8650", 0 }, + { 0x940B, CHIP_TYPE_R600, "R600: FireGL V8600", 0 }, + { 0x940F, CHIP_TYPE_R600, "R600: FireGL V7600", 0 }, + { 0x9440, CHIP_TYPE_RV770, "RV770: Radeon 4800 Series", 0 }, + { 0x9441, CHIP_TYPE_RV770, "RV770: Radeon 4870 X2", 0 }, + { 0x9442, CHIP_TYPE_RV770, "RV770: Radeon 4800 Series", 0 }, + { 0x9443, CHIP_TYPE_R700, "R700: Radeon 4800 Series", 0 }, + { 0x9444, CHIP_TYPE_RV770, "RV770: Everest FirePro", 0 }, + { 0x9446, CHIP_TYPE_RV770, "RV770: K2 FirePro", 0 }, + { 0x9447, CHIP_TYPE_R700, "R700: K2 FirePro", 0 }, + { 0x944A, CHIP_TYPE_M98, "M98", CHIP_FLAG_MOBILITY }, + { 0x944B, CHIP_TYPE_M98, "M98", CHIP_FLAG_MOBILITY }, + { 0x944E, CHIP_TYPE_RV770, "RV770", 0 }, + { 0x944F, CHIP_TYPE_R700, "R700", 0 }, + { 0x9456, CHIP_TYPE_RV770, "RV770: Denali FirePro", 0 }, + { 0x945A, CHIP_TYPE_M98, "M98", CHIP_FLAG_MOBILITY }, + { 0x945B, CHIP_TYPE_M98, "M98", CHIP_FLAG_MOBILITY }, + { 0x946A, CHIP_TYPE_M98, "M98", CHIP_FLAG_MOBILITY }, + { 0x946B, CHIP_TYPE_M98, "M98", CHIP_FLAG_MOBILITY }, + { 0x947A, CHIP_TYPE_M98, "M98", CHIP_FLAG_MOBILITY }, + { 0x947B, CHIP_TYPE_M98, "M98", CHIP_FLAG_MOBILITY }, + { 0x9480, CHIP_TYPE_M96, "M96", CHIP_FLAG_MOBILITY }, + { 0x9487, CHIP_TYPE_RV730, "RV730", 0 }, + { 0x9488, CHIP_TYPE_M96, "M96", CHIP_FLAG_MOBILITY }, + { 0x9489, CHIP_TYPE_M96, "M96", CHIP_FLAG_MOBILITY }, + { 0x948F, CHIP_TYPE_RV730, "RV730", 0 }, + { 0x9487, CHIP_TYPE_RV730, "RV730", 0 }, + { 0x9490, CHIP_TYPE_RV730, "RV730", 0 }, + { 0x9498, CHIP_TYPE_RV730, "RV730", 0 }, + { 0x949E, CHIP_TYPE_RV730, "RV730", 0 }, + { 0x949F, CHIP_TYPE_RV730, "RV730", 0 }, + { 0x94C0, CHIP_TYPE_RV610, "RV610", 0 }, + { 0x94C1, CHIP_TYPE_RV610, "RV610: Radeon HD 2400 XT", 0 }, + { 0x94C3, CHIP_TYPE_RV610, "RV610: Radeon HD 2400 Pro", 0 }, + { 0x94C4, CHIP_TYPE_RV610, "RV610: Radeon HD 2400 PRO AGP", 0 }, + { 0x94C5, CHIP_TYPE_RV610, "RV610: FireGL V4000", 0 }, + { 0x94C6, CHIP_TYPE_RV610, "RV610", 0 }, + { 0x94C7, CHIP_TYPE_RV610, "RV610: Radeon HD 2350", 0 }, + { 0x94C8, CHIP_TYPE_M74, "M74: Mobility Radeon HD 2400 XT", CHIP_FLAG_MOBILITY }, + { 0x94C9, CHIP_TYPE_M72, "M72: Mobility Radeon HD 2400", CHIP_FLAG_MOBILITY }, + { 0x94CB, CHIP_TYPE_M72, "M72: Radeon E2400", CHIP_FLAG_MOBILITY }, + { 0x94CC, CHIP_TYPE_RV610, "RV610: Radeon HD 2400", 0 }, + { 0x9500, CHIP_TYPE_RV670, "RV670", 0 }, + { 0x9501, CHIP_TYPE_RV670, "RV670: Radeon HD3870", 0 }, + { 0x9504, CHIP_TYPE_M88, "M88: Mobility Radeon HD 3850", CHIP_FLAG_MOBILITY }, + { 0x9505, CHIP_TYPE_RV670, "RV670: Radeon HD3850", 0 }, + { 0x9506, CHIP_TYPE_M88, "M88: Mobility Radeon HD 3850 X2", CHIP_FLAG_MOBILITY }, + { 0x9507, CHIP_TYPE_RV670, "RV670", 0 }, + { 0x9508, CHIP_TYPE_M88, "M88: Mobility Radeon HD 3870", CHIP_FLAG_MOBILITY }, + { 0x9509, CHIP_TYPE_M88, "M88: Mobility Radeon HD 3870 X2", CHIP_FLAG_MOBILITY }, + { 0x950F, CHIP_TYPE_R680, "R680: Radeon HD3870 X2", 0 }, + { 0x9511, CHIP_TYPE_RV670, "RV670: FireGL V7700", 0 }, + { 0x9515, CHIP_TYPE_RV670, "RV670: Radeon HD 3850 AGP", 0 }, + { 0x9517, CHIP_TYPE_RV670, "RV670: Radeon HD 3960", 0 }, + { 0x9519, CHIP_TYPE_RV670, "RV670: FireStream 9170", 0 }, + { 0x9540, CHIP_TYPE_RV710, "RV710", 0 }, + { 0x9541, CHIP_TYPE_RV710, "RV710", 0 }, + { 0x9542, CHIP_TYPE_RV710, "RV710", 0 }, + { 0x954E, CHIP_TYPE_RV710, "RV710", 0 }, + { 0x954f, CHIP_TYPE_RV710, "RV710", 0 }, + { 0x9580, CHIP_TYPE_RV630, "RV630", 0 }, + { 0x9581, CHIP_TYPE_M76, "M76: Mobility Radeon HD 2600", CHIP_FLAG_MOBILITY }, + { 0x9583, CHIP_TYPE_M76, "M76: Mobility Radeon HD 2600 XT", CHIP_FLAG_MOBILITY }, + { 0x9586, CHIP_TYPE_RV630, "RV630: Radeon HD 2600 XT AGP", 0 }, + { 0x9587, CHIP_TYPE_RV630, "RV630: Radeon HD 2600 Pro AGP", 0 }, + { 0x9588, CHIP_TYPE_RV630, "RV630: Radeon HD 2600 XT", 0 }, + { 0x9589, CHIP_TYPE_RV630, "RV630: Radeon HD 2600 Pro", 0 }, + { 0x958A, CHIP_TYPE_RV630, "RV630: Gemini RV630", 0 }, + { 0x958B, CHIP_TYPE_M76, "M76: Gemini Mobility Radeon HD 2600 XT", CHIP_FLAG_MOBILITY }, + { 0x958C, CHIP_TYPE_RV630, "RV630: FireGL V5600", 0 }, + { 0x958D, CHIP_TYPE_RV630, "RV630: FireGL V3600", 0 }, + { 0x958E, CHIP_TYPE_RV630, "RV630: Radeon HD 2600 LE", 0 }, + { 0x958F, CHIP_TYPE_M76, "M76: Mobility FireGL", CHIP_FLAG_MOBILITY }, + { 0x9590, CHIP_TYPE_RV635, "RV635: ATI Radeon HD 3600 Series", 0 }, + { 0x9591, CHIP_TYPE_M86, "M86: Mobility Radeon HD 3650", CHIP_FLAG_MOBILITY }, + { 0x9593, CHIP_TYPE_M86, "M86: Mobility Radeon HD 3670", CHIP_FLAG_MOBILITY }, + { 0x9595, CHIP_TYPE_M86, "M86: Mobility FireGL V5700", CHIP_FLAG_MOBILITY }, + { 0x9596, CHIP_TYPE_RV635, "RV635: Radeon HD 3650 AGP", 0 }, + { 0x9597, CHIP_TYPE_RV635, "RV635: Radeon HD 3600 Series", 0 }, + { 0x9598, CHIP_TYPE_RV635, "RV635: Radeon HD 3670", 0 }, + { 0x9599, CHIP_TYPE_RV635, "RV635: Radeon HD 3600 Series", 0 }, + { 0x959B, CHIP_TYPE_M86, "M86: Mobility FireGL", CHIP_FLAG_MOBILITY }, + { 0x95C0, CHIP_TYPE_RV620, "RV620: Radeon HD 3470", 0 }, + { 0x95C2, CHIP_TYPE_M82, "M82: Mobility Radeon HD 3430", CHIP_FLAG_MOBILITY }, + { 0x95C4, CHIP_TYPE_M82, "M82: Mobility Radeon HD 3400 Series", CHIP_FLAG_MOBILITY }, + { 0x95C5, CHIP_TYPE_RV620, "RV620: Radeon HD 3450", 0 }, + { 0x95C6, CHIP_TYPE_RV620, "RV620: Radeon HD 3450", 0 }, + { 0x95C7, CHIP_TYPE_RV620, "RV620: Radeon HD 3430", 0 }, + { 0x95CC, CHIP_TYPE_RV620, "RV620: Fire PRO", 0 }, + { 0x95CD, CHIP_TYPE_RV620, "RV620: FireMV 2450", 0 }, + { 0x95CE, CHIP_TYPE_RV620, "RV620: FireMV 2260", 0 }, + { 0x95CF, CHIP_TYPE_RV620, "RV620: FireMV 2260", 0 }, + { 0x9610, CHIP_TYPE_RS780, "RS780: Radeon HD 3200", 0 }, + { 0x9611, CHIP_TYPE_RS780, "RS780: Radeon 3100", 0 }, + { 0x9612, CHIP_TYPE_RS780, "RS780: Radeon HD 3200", 0 }, + { 0x9613, CHIP_TYPE_RS780, "RS780: Radeon 3100", 0 }, + { 0x9614, CHIP_TYPE_RS780, "RS780: Radeon HD 3300", 0 }, + { 0, 0, NULL, 0 } +}; diff --git a/src/mesa/drivers/dri/r600/r600_id.h b/src/mesa/drivers/dri/r600/r600_id.h new file mode 100644 index 0000000000..0093fa0608 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_id.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _R600_ID_H +#define _R600_ID_H + +#include <stdint.h> +#include <stdlib.h> + +enum chip_type_e { + CHIP_TYPE_ARCH_MASK = 0xf000, + CHIP_TYPE_ARCH_R6xx = 1 << 12, + CHIP_TYPE_ARCH_R7xx = 2 << 12, + + CHIP_TYPE_R600 = CHIP_TYPE_ARCH_R6xx + 1, + CHIP_TYPE_RV610, + CHIP_TYPE_RV630, + CHIP_TYPE_M72, + CHIP_TYPE_M74, + CHIP_TYPE_M76, + CHIP_TYPE_RV670, + CHIP_TYPE_M88, + CHIP_TYPE_R680, + CHIP_TYPE_RV620, + CHIP_TYPE_M82, + CHIP_TYPE_RV635, + CHIP_TYPE_M86, + CHIP_TYPE_RS780, + CHIP_TYPE_RV770 = CHIP_TYPE_ARCH_R7xx + 1, + CHIP_TYPE_R700, + CHIP_TYPE_M98, + CHIP_TYPE_RV730, + CHIP_TYPE_M96, + CHIP_TYPE_RV710, +}; + +enum chip_flag_e { + CHIP_FLAG_MOBILITY = 1 << 0, + CHIP_FLAG_VERIFIED = 1 << 16, +} ; + +typedef struct chip_s { + uint16_t id; + uint16_t type; + char *name; + uint32_t flags; +} chip_t; + +extern chip_t r600_chips[]; + +#endif /* _R600_ID_H */ diff --git a/src/mesa/drivers/dri/r600/r600_lock.h b/src/mesa/drivers/dri/r600/r600_lock.h new file mode 100644 index 0000000000..65b2e1a060 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_lock.h @@ -0,0 +1,167 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Matthias Hopf + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com> + */ + +#ifndef __R600_LOCK_H__ +#define __R600_LOCK_H__ + +#include "r600_context.h" + +/* + * Turn DEBUG_LOCKING on to find locking conflicts. + */ +#define DEBUG_LOCKING 0 + +#if DEBUG_LOCKING +extern char *prevLockFile; +extern int prevLockLine; + +#define DEBUG_LOCK() \ + do \ + { \ + prevLockFile = (__FILE__); \ + prevLockLine = (__LINE__); \ + } while (0) + +#define DEBUG_RESET() \ + do \ + { \ + prevLockFile = 0; \ + prevLockLine = 0; \ + } while (0) + +#define DEBUG_CHECK_LOCK() \ + do \ + { \ + if (prevLockFile) \ + { \ + fprintf(stderr, \ + "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n", \ + prevLockFile, prevLockLine, __FILE__, __LINE__); \ + exit(1); \ + } \ + } while (0) + +#else + +#define DEBUG_LOCK() +#define DEBUG_RESET() +#define DEBUG_CHECK_LOCK() + +#endif + +/* + * Update the hardware state. This is called if another context has + * grabbed the hardware lock, which includes the X server. This + * function also updates the driver's window state after the X server + * moves, resizes or restacks a window -- the change will be reflected + * in the drawable position and clip rects. Since the X server grabs + * the hardware lock when it changes the window state, this routine will + * automatically be called after such a change. + */ +static inline void r600GetLock(context_t *context, GLuint flags) +{ + __DRIdrawablePrivate *const drawable = context->currentDraw; + __DRIdrawablePrivate *const readable = context->currentRead; + __DRIscreenPrivate *sPriv = context->screen->driScreen; + drm_radeon_sarea_t *sarea = context->sarea; + + assert(drawable != NULL); + + drmGetLock(context->fd, context->hwContext, flags); + + /* + * The window might have moved, so we might need to get new clip + * rects. + * + * NOTE: This releases and regrabs the hw lock to allow the X server + * to respond to the DRI protocol request for new drawable info. + * Since the hardware state depends on having the latest drawable + * clip rects, all state checking must be done _after_ this call. + */ +#if 0 + DRI_VALIDATE_DRAWABLE_INFO(sPriv, drawable); + if (drawable != readable) + { + DRI_VALIDATE_DRAWABLE_INFO(sPriv, readable); + } + + if (context->lastStamp != drawable->lastStamp) + { + radeonUpdatePageFlipping(context); + radeonSetCliprects(context); + r300UpdateViewportOffset(context->glCtx); + driUpdateFramebufferSize(context->glCtx, drawable); + } + + if (sarea->ctx_owner != context->dri.hwContext) + { + int i; + + sarea->ctx_owner = context->dri.hwContext; + for (i = 0; i < r300->nr_heaps; i++) + { + DRI_AGE_TEXTURES(r300->texture_heaps[i]); + } + } + + context->lost_context = GL_TRUE; +#endif +} + + +/* + * !!! We may want to separate locks from locks with validation. This + * could be used to improve performance for those things commands that + * do not do any drawing !!! + */ + +/* Lock the hardware and validate our state. + */ +#define R600_LOCK_HARDWARE( context ) \ + do \ + { \ + char __ret = 0; \ + DEBUG_CHECK_LOCK(); \ + DRM_CAS((context)->hwLock, (context)->hwContext, \ + (DRM_LOCK_HELD | (context)->hwContext), __ret); \ + if (__ret) \ + r600GetLock(context,0); \ + DEBUG_LOCK(); \ + } while (0) + +#define R600_UNLOCK_HARDWARE( context ) \ + do \ + { \ + DRM_UNLOCK((context)->fd, \ + (context)->hwLock, \ + (context)->hwContext); \ + DEBUG_RESET(); \ + } while (0) + +#endif /* __R600_LOCK_H__ */ diff --git a/src/mesa/drivers/dri/r600/r600_mem.c b/src/mesa/drivers/dri/r600/r600_mem.c new file mode 100644 index 0000000000..374def6f40 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_mem.c @@ -0,0 +1,537 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com> + */ + +#include "imports.h" +#include "macros.h" + +#include "r600_context.h" +#include "r600_cmdbuf.h" +#include "r600_mem.h" +#include "r600_lock.h" + +GLboolean r600AllocMemSurf(context_t *context, + void **ppmemBlock, + void **ppheap, + GLuint *prefered_heap, /* Now used RADEON_LOCAL_TEX_HEAP, return actual heap used. */ + GLuint totalSize) +{ + driTextureObject t; + + t.heap = (driTexHeap*)*ppheap; + t.memBlock = (struct mem_block*)*ppmemBlock; + t.totalSize = totalSize; + + *prefered_heap = driAllocateTexture(context->texture_heaps, context->nr_heaps, &t); + + if(-1 == *prefered_heap) + { + return GL_FALSE; + } + + *ppmemBlock = t.memBlock; + *ppheap = t.heap; + return GL_TRUE; +} + +GLboolean r600LoadMemSurf(context_t *context, + GLuint dst_offset, /* gpu addr */ + GLuint dst_pitch_in_pixel, + GLuint src_width_in_pixel, + GLuint height, + GLuint byte_per_pixel, + unsigned char* pSrc) /* source data */ +{ + /* TEST CODE ONLY */ + + unsigned char *pDst; + GLuint i; + + pDst = (unsigned char*)(dst_offset - context->screen->fb.gpu + context->screen->fb.cpu); + + for(i=0; i<height; i++) + { + memcpy(pDst, pSrc, src_width_in_pixel*byte_per_pixel); + pDst += dst_pitch_in_pixel * byte_per_pixel; + pSrc += src_width_in_pixel * byte_per_pixel; + } + + return GL_TRUE; +} + +static unsigned int r600GetAge(context_t *context) +{ + drm_radeon_getparam_t gp; + int ret; + uint32_t age; + + gp.param = RADEON_PARAM_LAST_CLEAR; + gp.value = (int *)&age; + ret = drmCommandWriteRead(context->fd, + DRM_RADEON_GETPARAM, + &gp, + sizeof(gp)); + if (ret) + { + fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, ret); + exit(1); + } + + return age; +} + +static void r600ResizeUList(context_t *context) +{ + void *temp; + int nsize; + + temp = context->memManager->u_list; + nsize = context->memManager->u_size * 2; + + context->memManager->u_list = + _mesa_malloc(nsize * sizeof(*context->memManager->u_list)); + _mesa_memset(context->memManager->u_list, 0, + nsize * sizeof(*context->memManager->u_list)); + + if (temp) + { + r600Flush(&(context->ctx)); + _mesa_memcpy(context->memManager->u_list, + temp, + context->memManager->u_size * + sizeof(*context->memManager->u_list)); + _mesa_free(temp); + } + + context->memManager->u_size = nsize; +} + +void r600MemInit(context_t *context) +{ + context->memManager = malloc(sizeof(struct r600_mem_manager)); + memset(context->memManager, 0, sizeof(struct r600_mem_manager)); + + context->memManager->u_size = 128; + r600ResizeUList(context); +} + +void r600MemDestroy(context_t *context) +{ + _mesa_free(context->memManager->u_list); + context->memManager->u_list = NULL; + + _mesa_free(context->memManager); + context->memManager = NULL; +} + +void *r600MemPtr(context_t *context, int id) +{ + assert(id <= context->memManager->u_last); + return context->memManager->u_list[id].ptr; +} + +int r600MemFind(context_t *context, void *ptr) +{ + int i; + + for (i = 1; i < context->memManager->u_size + 1; i++) + { + if (context->memManager->u_list[i].ptr && + ptr >= context->memManager->u_list[i].ptr && + ptr < context->memManager->u_list[i].ptr + + context->memManager->u_list[i].size) + break; + } + + if (i < context->memManager->u_size + 1) + return i; + + fprintf(stderr, "%p failed\n", ptr); + return 0; +} + +int r600MemAlloc(context_t *context, int alignment, int size) +{ + drm_radeon_mem_alloc_t alloc; + int offset = 0, ret; + int i, free = -1; + int done_age; + drm_radeon_mem_free_t memfree; + int tries = 0; + static int bytes_wasted = 0, allocated = 0; + + if (size < 4096) + bytes_wasted += 4096 - size; + + allocated += size; + memfree.region = RADEON_MEM_REGION_GART; + + while ((free == -1) && (tries < 100)) + { + done_age = r600GetAge(context); + + if (context->memManager->u_last + 1 >= context->memManager->u_size) + { + r600ResizeUList(context); + } + + for (i = context->memManager->u_last + 1; i > 0; i--) + { + if (context->memManager->u_list[i].ptr == NULL) + { + free = i; + continue; + } + + if (context->memManager->u_list[i].h_pending == 0 && + context->memManager->u_list[i].pending && + context->memManager->u_list[i].age <= done_age) + { + memfree.region_offset = + (char *)context->memManager->u_list[i].ptr - + (char *)context->screen->gartTextures.cpu; + + ret = drmCommandWrite(context->fd, + DRM_RADEON_FREE, + &memfree, + sizeof(memfree)); + + if (ret) + { + fprintf(stderr, "Failed to free at %p\n", + context->memManager->u_list[i].ptr); + fprintf(stderr, "ret = %s\n", strerror(-ret)); + exit(1); + } + else + { + fprintf(stderr, "really freed %d at age %x\n", i, + r600GetAge(context)); + if (i == context->memManager->u_last) + { + context->memManager->u_last--; + } + + if (context->memManager->u_list[i].size < 4096) + { + bytes_wasted -= 4096 - context->memManager->u_list[i].size; + } + + allocated -= context->memManager->u_list[i].size; + context->memManager->u_list[i].pending = 0; + context->memManager->u_list[i].ptr = NULL; + free = i; + } + } + } + + context->memManager->u_head = i; + tries ++; + } + + if (free == -1) + { + WARN_ONCE("Ran out of slots!\n"); + r600Flush(&(context->ctx)); + exit(1); + } + + alloc.region = RADEON_MEM_REGION_GART; + alloc.alignment = alignment; + alloc.size = size; + alloc.region_offset = &offset; + + /* DRM_RADEON_ALLOC will alloc a dma region for us */ + ret = drmCommandWriteRead(context->fd, + DRM_RADEON_ALLOC, + &alloc, + sizeof(alloc)); + if (ret) + { + WARN_ONCE ("Ran out of GART memory (for %d)!\n", size); + return 0; + } + + i = free; + + if (i > context->memManager->u_last) + context->memManager->u_last = i; + + context->memManager->u_list[i].ptr = + ((GLubyte *) context->screen->gartTextures.cpu) + offset; + + context->memManager->u_list[i].size = size; + context->memManager->u_list[i].age = 0; + + DEBUG_FUNCF ("allocated %d at age %x\n", i, r600GetAge(context)); + + return i; +} + +void r600MemFree(context_t *context, int id) +{ + fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,r600GetAge(context)); + assert(id <= context->memManager->u_last); + + if (id == 0) + return; + + if (context->memManager->u_list[id].ptr == NULL) { + WARN_ONCE("Not allocated!\n"); + return; + } + + if (context->memManager->u_list[id].pending) { + WARN_ONCE("%p already pended!\n", context->memManager->u_list[id].ptr); + return; + } + + context->memManager->u_list[id].pending = 1; +} + +void r600MemUse(context_t *context, int id) +{ + uint64_t ull; + fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, r600GetAge(context)); + drm_r300_cmd_header_t *cmd; + + assert(id <= context->memManager->u_last); + + if (id == 0) + return; + + cmd = (drm_r300_cmd_header_t *) r600AllocCmdBuf(context, 2 + sizeof(ull) / 4); + cmd[0].scratch.cmd_type = R300_CMD_SCRATCH; + cmd[0].scratch.reg = R600_MEM_SCRATCH; + cmd[0].scratch.n_bufs = 1; + cmd[0].scratch.flags = 0; + cmd++; + + ull = (uint64_t) (intptr_t) & context->memManager->u_list[id].age; + _mesa_memcpy(cmd, &ull, sizeof(ull)); + cmd += sizeof(ull) / 4; + + cmd[0].u = /*id */ 0; + + R600_LOCK_HARDWARE(context); /* Protect from DRM. */ + context->memManager->u_list[id].h_pending++; + R600_UNLOCK_HARDWARE(context); +} + +unsigned long r600MemOffset(context_t *context, int id) +{ + unsigned long offset; + assert(id <= context->memManager->u_last); + + offset = (char *)context->memManager->u_list[id].ptr - + (char *)context->screen->gartTextures.cpu; + offset += context->screen->gart_texture_offset; + + return offset; +} + +void *r600MemMap(context_t *context, int id, int access) +{ + void *ptr; + int tries = 0; + fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, r600GetAge(context)); + + assert(id <= context->memManager->u_last); + + if (access == R600_MEM_R) + { + if (context->memManager->u_list[id].mapped == 1) + { + WARN_ONCE("buffer %d already mapped\n", id); + } + + context->memManager->u_list[id].mapped = 1; + ptr = r600MemPtr(context, id); + return ptr; + } + + if (context->memManager->u_list[id].h_pending) + { + r600Flush(&(context->ctx)); + } + + if (context->memManager->u_list[id].h_pending) + { + return NULL; + } + + while (context->memManager->u_list[id].age > r600GetAge(context) && + tries++ < 1000) + { + usleep(10); + } + + if (tries >= 1000) + { + fprintf(stderr, "Idling failed (%x vs %x)\n", + context->memManager->u_list[id].age, + r600GetAge(context)); + return NULL; + } + + if (context->memManager->u_list[id].mapped == 1) + { + WARN_ONCE("buffer %d already mapped\n", id); + } + + context->memManager->u_list[id].mapped = 1; + ptr = r600MemPtr(context, id); + + return ptr; +} + +void r600MemUnmap(context_t *context, int id) +{ + fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, r600GetAge(context)); + + assert(id <= context->memManager->u_last); + + if (context->memManager->u_list[id].mapped == 0) + { + WARN_ONCE("buffer %d not mapped\n", id); + } + context->memManager->u_list[id].mapped = 0; +} + +static void r600RefillCurrentDmaRegion(context_t *context, int size) +{ + struct r600_dma_buffer *dmabuf; + size = MAX2(size, RADEON_BUFFER_SIZE * 16); + + if (context->dma.flush) + context->dma.flush(context); + + if (context->dma.nr_released_bufs > 4) + r600Flush(&(context->ctx)); + + dmabuf = CALLOC_STRUCT(r600_dma_buffer); + dmabuf->buf = (void *)1; /* hack */ + dmabuf->refcount = 1; + dmabuf->id = r600MemAlloc(context, 4, size); + + if (dmabuf->id == 0) + { + R600_LOCK_HARDWARE(context); /* no need to validate */ + + r600Flush(&(context->ctx)); + //radeonWaitForIdleLocked(context); + dmabuf->id = r600MemAlloc(context, 4, size); + + R600_UNLOCK_HARDWARE(context); + + if (dmabuf->id == 0) + { + fprintf(stderr, "Error: Could not get dma buffer... exiting\n"); + _mesa_exit(-1); + } + } + + context->dma.current.buf = dmabuf; + context->dma.current.address = r600MemPtr(context, dmabuf->id); + context->dma.current.end = size; + context->dma.current.start = 0; + context->dma.current.ptr = 0; +} + +void r600FreeDmaRegion(context_t *context, struct r600_dma_region *region) +{ + if (!region->buf) + return; + + if (context->dma.flush) + context->dma.flush(context); + + if (--region->buf->refcount == 0) + { + r600MemFree(context, region->buf->id); + FREE(region->buf); + context->dma.nr_released_bufs ++; + } + + region->buf = 0; + region->start = 0; +} + +/* + * Allocates a region from context->dma.current. If there isn't enough + * space in current, grab a new buffer (and discard what was left of current) + */ +void r600AllocDmaRegion(context_t *context, struct r600_dma_region *region, + int bytes, int alignment) +{ + if (context->dma.flush) + context->dma.flush(context); + + if (region->buf) + r600FreeDmaRegion(context, region); + + alignment--; + context->dma.current.ptr = (context->dma.current.ptr + alignment) & + ~alignment; + context->dma.current.start = context->dma.current.ptr; + + /* + * TODO, shader buffer is 256-bytes aligned, we should do it more clear. + */ + if (bytes < 0x1000) // 4096-bytes aligned + bytes = 0x1000; + + if (context->dma.current.ptr + bytes > context->dma.current.end) + { + r600RefillCurrentDmaRegion(context, (bytes + 0x7) & ~0x7); + //if (context->dma.current.buf) + // r600MemUse(context, context->dma.current.buf->id); + } + + region->start = context->dma.current.start; + region->ptr = context->dma.current.start; + region->end = context->dma.current.start + bytes; + region->address = context->dma.current.address; + region->buf = context->dma.current.buf; + region->buf->refcount++; + + context->dma.current.ptr += bytes; /* bug - if alignment > 7 */ + context->dma.current.ptr = (context->dma.current.ptr + 0x7) & ~0x7; + context->dma.current.start = context->dma.current.ptr; + + assert(context->dma.current.ptr <= context->dma.current.end); +} + +unsigned long r600GARTOffsetFromVirtual(context_t *context, GLvoid * pointer) +{ + int offset = (char *)pointer - + (char *)context->screen->gartTextures.cpu; + + if (offset < 0 || offset > context->screen->gartTextures.size) + return ~0; + else + return context->screen->gart_texture_offset + offset; +} diff --git a/src/mesa/drivers/dri/r600/r600_mem.h b/src/mesa/drivers/dri/r600/r600_mem.h new file mode 100644 index 0000000000..8a74e4b844 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_mem.h @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com> + */ + +#ifndef __R600_MEM_H__ +#define __R600_MEM_H__ + +#define R600_MEM_UL 1 + +#define R600_MEM_R 1 +#define R600_MEM_W 2 +#define R600_MEM_RW (R300_MEM_R | R300_MEM_W) + +#define R600_MEM_SCRATCH 2 + +struct r600_mem_manager +{ + struct + { + void *ptr; + uint32_t size; + uint32_t age; + uint32_t h_pending; + int pending; + int mapped; + } *u_list; + int u_head, u_size, u_last; +}; + +extern void r600MemInit(context_t *context); +extern void r600MemDestroy(context_t *context); +extern void *r600MemPtr(context_t *context, int id); +extern int r600MemFind(context_t *context, void *ptr); +extern int r600MemAlloc(context_t *context, int alignment, int size); +extern void r600MemFree(context_t *context, int id); +extern void r600MemUse(context_t *context, int id); +extern unsigned long r600MemOffset(context_t *context, int id); +extern void *r600MemMap(context_t *context, int id, int access); +extern void r600MemUnmap(context_t *context, int id); + +extern void r600AllocDmaRegion(context_t *context, + struct r600_dma_region *region, + int bytes, + int alignment); +extern void r600FreeDmaRegion(context_t *context, + struct r600_dma_region *region); + +extern GLboolean r600LoadMemSurf(context_t *context, + GLuint dst_offset, /* gpu addr */ + GLuint dst_pitch_in_pixel, + GLuint src_width_in_pixel, + GLuint height, + GLuint byte_per_pixel, + unsigned char* pSrc); /* source data */ + +extern GLboolean r600AllocMemSurf(context_t *context, + void **ppmemBlock, + void **ppheap, + GLuint *prefered_heap, + GLuint totalSize); + +#endif diff --git a/src/mesa/drivers/dri/r600/r600_reg.h b/src/mesa/drivers/dri/r600/r600_reg.h new file mode 100644 index 0000000000..8c22b56def --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_reg.h @@ -0,0 +1,133 @@ +/* + * RadeonHD R6xx, R7xx Register documentation + * + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _R600_REG_H_ +#define _R600_REG_H_ + +/* + * Register definitions + */ + +#include "r600_reg_auto_r6xx.h" +#include "r600_reg_r6xx.h" +#include "r600_reg_r7xx.h" + + +/* SET_*_REG offsets + ends */ +enum +{ + SET_CONFIG_REG_offset = 0x00008000, + SET_CONFIG_REG_end = 0x0000ac00, + SET_CONTEXT_REG_offset = 0x00028000, + SET_CONTEXT_REG_end = 0x00029000, + SET_ALU_CONST_offset = 0x00030000, + SET_ALU_CONST_end = 0x00032000, + SET_RESOURCE_offset = 0x00038000, + SET_RESOURCE_end = 0x0003c000, + SET_SAMPLER_offset = 0x0003c000, + SET_SAMPLER_end = 0x0003cff0, + SET_CTL_CONST_offset = 0x0003cff0, + SET_CTL_CONST_end = 0x0003e200, + SET_LOOP_CONST_offset = 0x0003e200, + SET_LOOP_CONST_end = 0x0003e380, + SET_BOOL_CONST_offset = 0x0003e380, + SET_BOOL_CONST_end = 0x00040000, +}; + +/* packet3 IT_SURFACE_BASE_UPDATE bits */ +enum +{ + DEPTH_BASE = (1 << 0), + COLOR0_BASE = (1 << 1), + COLOR1_BASE = (1 << 2), + COLOR2_BASE = (1 << 3), + COLOR3_BASE = (1 << 4), + COLOR4_BASE = (1 << 5), + COLOR5_BASE = (1 << 6), + COLOR6_BASE = (1 << 7), + COLOR7_BASE = (1 << 8), + STRMOUT_BASE0 = (1 << 9), + STRMOUT_BASE1 = (1 << 10), + STRMOUT_BASE2 = (1 << 11), + STRMOUT_BASE3 = (1 << 12), + COHER_BASE0 = (1 << 13), + COHER_BASE1 = (1 << 14), +}; + +/* CP packet types */ +enum +{ + RADEON_CP_PACKET0 = 0x00000000, + RADEON_CP_PACKET1 = 0x40000000, + RADEON_CP_PACKET2 = 0x80000000, + RADEON_CP_PACKET3 = 0xC0000000, +}; + +/* Packet3 commands */ +enum +{ + IT_NOP = 0x10, + IT_INDIRECT_BUFFER_END = 0x17, + IT_SET_PREDICATION = 0x20, + IT_REG_RMW = 0x21, + IT_COND_EXEC = 0x22, + IT_PRED_EXEC = 0x23, + IT_START_3D_CMDBUF = 0x24, + IT_DRAW_INDEX_2 = 0x27, + IT_CONTEXT_CONTROL = 0x28, + IT_DRAW_INDEX_IMMD_BE = 0x29, + IT_INDEX_TYPE = 0x2A, + IT_DRAW_INDEX = 0x2B, + IT_DRAW_INDEX_AUTO = 0x2D, + IT_DRAW_INDEX_IMMD = 0x2E, + IT_NUM_INSTANCES = 0x2F, + IT_STRMOUT_BUFFER_UPDATE = 0x34, + IT_INDIRECT_BUFFER_MP = 0x38, + IT_MEM_SEMAPHORE = 0x39, + IT_MPEG_INDEX = 0x3A, + IT_WAIT_REG_MEM = 0x3C, + IT_MEM_WRITE = 0x3D, + IT_INDIRECT_BUFFER = 0x32, + IT_CP_INTERRUPT = 0x40, + IT_SURFACE_SYNC = 0x43, + IT_ME_INITIALIZE = 0x44, + IT_COND_WRITE = 0x45, + IT_EVENT_WRITE = 0x46, + IT_EVENT_WRITE_EOP = 0x47, + IT_ONE_REG_WRITE = 0x57, + IT_SET_CONFIG_REG = 0x68, + IT_SET_CONTEXT_REG = 0x69, + IT_SET_ALU_CONST = 0x6A, + IT_SET_BOOL_CONST = 0x6B, + IT_SET_LOOP_CONST = 0x6C, + IT_SET_RESOURCE = 0x6D, + IT_SET_SAMPLER = 0x6E, + IT_SET_CTL_CONST = 0x6F, + IT_SURFACE_BASE_UPDATE = 0x73, +}; + +#define CP_PACKET0(reg, n) (RADEON_CP_PACKET0 | (((n)-1)<<16) | ((reg)>>2)) +#define CP_PACKET3(cmd, n) (RADEON_CP_PACKET3 | (((n)-1)<<16) | ((cmd)<<8)) + +#endif diff --git a/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h b/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h new file mode 100644 index 0000000000..9d5aa3c7e4 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h @@ -0,0 +1,3087 @@ +/* + * RadeonHD R6xx, R7xx Register documentation + * + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _AUTOREGS +#define _AUTOREGS + +enum { + + VGT_VTX_VECT_EJECT_REG = 0x000088b0, + PRIM_COUNT_mask = 0x3ff << 0, + PRIM_COUNT_shift = 0, + VGT_LAST_COPY_STATE = 0x000088c0, + SRC_STATE_ID_mask = 0x07 << 0, + SRC_STATE_ID_shift = 0, + DST_STATE_ID_mask = 0x07 << 16, + DST_STATE_ID_shift = 16, + VGT_CACHE_INVALIDATION = 0x000088c4, + CACHE_INVALIDATION_mask = 0x03 << 0, + CACHE_INVALIDATION_shift = 0, + VC_ONLY = 0x00, + TC_ONLY = 0x01, + VC_AND_TC = 0x02, + VS_NO_EXTRA_BUFFER_bit = 1 << 5, + VGT_GS_PER_ES = 0x000088c8, + VGT_ES_PER_GS = 0x000088cc, + VGT_GS_VERTEX_REUSE = 0x000088d4, + VERT_REUSE_mask = 0x1f << 0, + VERT_REUSE_shift = 0, + VGT_MC_LAT_CNTL = 0x000088d8, + MC_TIME_STAMP_RES_mask = 0x03 << 0, + MC_TIME_STAMP_RES_shift = 0, + X_0_992_MAX_LATENCY = 0x00, + X_0_496_MAX_LATENCY = 0x01, + X_0_248_MAX_LATENCY = 0x02, + X_0_124_MAX_LATENCY = 0x03, + VGT_GS_PER_VS = 0x000088e8, + GS_PER_VS_mask = 0x0f << 0, + GS_PER_VS_shift = 0, + VGT_CNTL_STATUS = 0x000088f0, + VGT_OUT_INDX_BUSY_bit = 1 << 0, + VGT_OUT_BUSY_bit = 1 << 1, + VGT_PT_BUSY_bit = 1 << 2, + VGT_TE_BUSY_bit = 1 << 3, + VGT_VR_BUSY_bit = 1 << 4, + VGT_GRP_BUSY_bit = 1 << 5, + VGT_DMA_REQ_BUSY_bit = 1 << 6, + VGT_DMA_BUSY_bit = 1 << 7, + VGT_GS_BUSY_bit = 1 << 8, + VGT_BUSY_bit = 1 << 9, + VGT_PRIMITIVE_TYPE = 0x00008958, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask = 0x3f << 0, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift = 0, + DI_PT_NONE = 0x00, + DI_PT_POINTLIST = 0x01, + DI_PT_LINELIST = 0x02, + DI_PT_LINESTRIP = 0x03, + DI_PT_TRILIST = 0x04, + DI_PT_TRIFAN = 0x05, + DI_PT_TRISTRIP = 0x06, + DI_PT_UNUSED_0 = 0x07, + DI_PT_UNUSED_1 = 0x08, + DI_PT_UNUSED_2 = 0x09, + DI_PT_LINELIST_ADJ = 0x0a, + DI_PT_LINESTRIP_ADJ = 0x0b, + DI_PT_TRILIST_ADJ = 0x0c, + DI_PT_TRISTRIP_ADJ = 0x0d, + DI_PT_UNUSED_3 = 0x0e, + DI_PT_UNUSED_4 = 0x0f, + DI_PT_TRI_WITH_WFLAGS = 0x10, + DI_PT_RECTLIST = 0x11, + DI_PT_LINELOOP = 0x12, + DI_PT_QUADLIST = 0x13, + DI_PT_QUADSTRIP = 0x14, + DI_PT_POLYGON = 0x15, + DI_PT_2D_COPY_RECT_LIST_V0 = 0x16, + DI_PT_2D_COPY_RECT_LIST_V1 = 0x17, + DI_PT_2D_COPY_RECT_LIST_V2 = 0x18, + DI_PT_2D_COPY_RECT_LIST_V3 = 0x19, + DI_PT_2D_FILL_RECT_LIST = 0x1a, + DI_PT_2D_LINE_STRIP = 0x1b, + DI_PT_2D_TRI_STRIP = 0x1c, + VGT_INDEX_TYPE = 0x0000895c, + INDEX_TYPE_mask = 0x03 << 0, + INDEX_TYPE_shift = 0, + DI_INDEX_SIZE_16_BIT = 0x00, + DI_INDEX_SIZE_32_BIT = 0x01, + VGT_STRMOUT_BUFFER_FILLED_SIZE_0 = 0x00008960, + VGT_STRMOUT_BUFFER_FILLED_SIZE_1 = 0x00008964, + VGT_STRMOUT_BUFFER_FILLED_SIZE_2 = 0x00008968, + VGT_STRMOUT_BUFFER_FILLED_SIZE_3 = 0x0000896c, + VGT_NUM_INDICES = 0x00008970, + VGT_NUM_INSTANCES = 0x00008974, + PA_CL_CNTL_STATUS = 0x00008a10, + CL_BUSY_bit = 1 << 31, + PA_CL_ENHANCE = 0x00008a14, + CLIP_VTX_REORDER_ENA_bit = 1 << 0, + NUM_CLIP_SEQ_mask = 0x03 << 1, + NUM_CLIP_SEQ_shift = 1, + CLIPPED_PRIM_SEQ_STALL_bit = 1 << 3, + VE_NAN_PROC_DISABLE_bit = 1 << 4, + PA_SU_CNTL_STATUS = 0x00008a50, + SU_BUSY_bit = 1 << 31, + PA_SC_LINE_STIPPLE_STATE = 0x00008b10, + CURRENT_PTR_mask = 0x0f << 0, + CURRENT_PTR_shift = 0, + CURRENT_COUNT_mask = 0xff << 8, + CURRENT_COUNT_shift = 8, + PA_SC_MULTI_CHIP_CNTL = 0x00008b20, + LOG2_NUM_CHIPS_mask = 0x07 << 0, + LOG2_NUM_CHIPS_shift = 0, + MULTI_CHIP_TILE_SIZE_mask = 0x03 << 3, + MULTI_CHIP_TILE_SIZE_shift = 3, + X_16_X_16_PIXEL_TILE_PER_CHIP = 0x00, + X_32_X_32_PIXEL_TILE_PER_CHIP = 0x01, + X_64_X_64_PIXEL_TILE_PER_CHIP = 0x02, + X_128X128_PIXEL_TILE_PER_CHIP = 0x03, + CHIP_TILE_X_LOC_mask = 0x07 << 5, + CHIP_TILE_X_LOC_shift = 5, + CHIP_TILE_Y_LOC_mask = 0x07 << 8, + CHIP_TILE_Y_LOC_shift = 8, + CHIP_SUPER_TILE_B_bit = 1 << 11, + PA_SC_AA_SAMPLE_LOCS_2S = 0x00008b40, + S0_X_mask = 0x0f << 0, + S0_X_shift = 0, + S0_Y_mask = 0x0f << 4, + S0_Y_shift = 4, + S1_X_mask = 0x0f << 8, + S1_X_shift = 8, + S1_Y_mask = 0x0f << 12, + S1_Y_shift = 12, + PA_SC_AA_SAMPLE_LOCS_4S = 0x00008b44, +/* S0_X_mask = 0x0f << 0, */ +/* S0_X_shift = 0, */ +/* S0_Y_mask = 0x0f << 4, */ +/* S0_Y_shift = 4, */ +/* S1_X_mask = 0x0f << 8, */ +/* S1_X_shift = 8, */ +/* S1_Y_mask = 0x0f << 12, */ +/* S1_Y_shift = 12, */ + S2_X_mask = 0x0f << 16, + S2_X_shift = 16, + S2_Y_mask = 0x0f << 20, + S2_Y_shift = 20, + S3_X_mask = 0x0f << 24, + S3_X_shift = 24, + S3_Y_mask = 0x0f << 28, + S3_Y_shift = 28, + PA_SC_AA_SAMPLE_LOCS_8S_WD0 = 0x00008b48, +/* S0_X_mask = 0x0f << 0, */ +/* S0_X_shift = 0, */ +/* S0_Y_mask = 0x0f << 4, */ +/* S0_Y_shift = 4, */ +/* S1_X_mask = 0x0f << 8, */ +/* S1_X_shift = 8, */ +/* S1_Y_mask = 0x0f << 12, */ +/* S1_Y_shift = 12, */ +/* S2_X_mask = 0x0f << 16, */ +/* S2_X_shift = 16, */ +/* S2_Y_mask = 0x0f << 20, */ +/* S2_Y_shift = 20, */ +/* S3_X_mask = 0x0f << 24, */ +/* S3_X_shift = 24, */ +/* S3_Y_mask = 0x0f << 28, */ +/* S3_Y_shift = 28, */ + PA_SC_AA_SAMPLE_LOCS_8S_WD1 = 0x00008b4c, + S4_X_mask = 0x0f << 0, + S4_X_shift = 0, + S4_Y_mask = 0x0f << 4, + S4_Y_shift = 4, + S5_X_mask = 0x0f << 8, + S5_X_shift = 8, + S5_Y_mask = 0x0f << 12, + S5_Y_shift = 12, + S6_X_mask = 0x0f << 16, + S6_X_shift = 16, + S6_Y_mask = 0x0f << 20, + S6_Y_shift = 20, + S7_X_mask = 0x0f << 24, + S7_X_shift = 24, + S7_Y_mask = 0x0f << 28, + S7_Y_shift = 28, + PA_SC_CNTL_STATUS = 0x00008be0, + MPASS_OVERFLOW_bit = 1 << 30, + PA_SC_ENHANCE = 0x00008bf0, + FORCE_EOV_MAX_CLK_CNT_mask = 0xfff << 0, + FORCE_EOV_MAX_CLK_CNT_shift = 0, + FORCE_EOV_MAX_TILE_CNT_mask = 0xfff << 12, + FORCE_EOV_MAX_TILE_CNT_shift = 12, + SQ_CONFIG = 0x00008c00, + VC_ENABLE_bit = 1 << 0, + EXPORT_SRC_C_bit = 1 << 1, + DX9_CONSTS_bit = 1 << 2, + ALU_INST_PREFER_VECTOR_bit = 1 << 3, + SQ_CONFIG__DX10_CLAMP_bit = 1 << 4, + ALU_PREFER_ONE_WATERFALL_bit = 1 << 5, + ALU_MAX_ONE_WATERFALL_bit = 1 << 6, + CLAUSE_SEQ_PRIO_mask = 0x03 << 8, + CLAUSE_SEQ_PRIO_shift = 8, + SQ_CL_PRIO_RND_ROBIN = 0x00, + SQ_CL_PRIO_MACRO_SEQ = 0x01, + SQ_CL_PRIO_NONE = 0x02, + PS_PRIO_mask = 0x03 << 24, + PS_PRIO_shift = 24, + VS_PRIO_mask = 0x03 << 26, + VS_PRIO_shift = 26, + GS_PRIO_mask = 0x03 << 28, + GS_PRIO_shift = 28, + ES_PRIO_mask = 0x03 << 30, + ES_PRIO_shift = 30, + SQ_GPR_RESOURCE_MGMT_1 = 0x00008c04, + NUM_PS_GPRS_mask = 0xff << 0, + NUM_PS_GPRS_shift = 0, + NUM_VS_GPRS_mask = 0xff << 16, + NUM_VS_GPRS_shift = 16, + NUM_CLAUSE_TEMP_GPRS_mask = 0x0f << 28, + NUM_CLAUSE_TEMP_GPRS_shift = 28, + SQ_GPR_RESOURCE_MGMT_2 = 0x00008c08, + NUM_GS_GPRS_mask = 0xff << 0, + NUM_GS_GPRS_shift = 0, + NUM_ES_GPRS_mask = 0xff << 16, + NUM_ES_GPRS_shift = 16, + SQ_THREAD_RESOURCE_MGMT = 0x00008c0c, + NUM_PS_THREADS_mask = 0xff << 0, + NUM_PS_THREADS_shift = 0, + NUM_VS_THREADS_mask = 0xff << 8, + NUM_VS_THREADS_shift = 8, + NUM_GS_THREADS_mask = 0xff << 16, + NUM_GS_THREADS_shift = 16, + NUM_ES_THREADS_mask = 0xff << 24, + NUM_ES_THREADS_shift = 24, + SQ_STACK_RESOURCE_MGMT_1 = 0x00008c10, + NUM_PS_STACK_ENTRIES_mask = 0xfff << 0, + NUM_PS_STACK_ENTRIES_shift = 0, + NUM_VS_STACK_ENTRIES_mask = 0xfff << 16, + NUM_VS_STACK_ENTRIES_shift = 16, + SQ_STACK_RESOURCE_MGMT_2 = 0x00008c14, + NUM_GS_STACK_ENTRIES_mask = 0xfff << 0, + NUM_GS_STACK_ENTRIES_shift = 0, + NUM_ES_STACK_ENTRIES_mask = 0xfff << 16, + NUM_ES_STACK_ENTRIES_shift = 16, + SQ_ESGS_RING_BASE = 0x00008c40, + SQ_ESGS_RING_SIZE = 0x00008c44, + SQ_GSVS_RING_BASE = 0x00008c48, + SQ_GSVS_RING_SIZE = 0x00008c4c, + SQ_ESTMP_RING_BASE = 0x00008c50, + SQ_ESTMP_RING_SIZE = 0x00008c54, + SQ_GSTMP_RING_BASE = 0x00008c58, + SQ_GSTMP_RING_SIZE = 0x00008c5c, + SQ_VSTMP_RING_BASE = 0x00008c60, + SQ_VSTMP_RING_SIZE = 0x00008c64, + SQ_PSTMP_RING_BASE = 0x00008c68, + SQ_PSTMP_RING_SIZE = 0x00008c6c, + SQ_FBUF_RING_BASE = 0x00008c70, + SQ_FBUF_RING_SIZE = 0x00008c74, + SQ_REDUC_RING_BASE = 0x00008c78, + SQ_REDUC_RING_SIZE = 0x00008c7c, + SQ_ALU_WORD1_OP3 = 0x00008dfc, + SRC2_SEL_mask = 0x1ff << 0, + SRC2_SEL_shift = 0, + SQ_ALU_SRC_0 = 0xf8, + SQ_ALU_SRC_1 = 0xf9, + SQ_ALU_SRC_1_INT = 0xfa, + SQ_ALU_SRC_M_1_INT = 0xfb, + SQ_ALU_SRC_0_5 = 0xfc, + SQ_ALU_SRC_LITERAL = 0xfd, + SQ_ALU_SRC_PV = 0xfe, + SQ_ALU_SRC_PS = 0xff, + SRC2_REL_bit = 1 << 9, + SRC2_CHAN_mask = 0x03 << 10, + SRC2_CHAN_shift = 10, + SQ_CHAN_X = 0x00, + SQ_CHAN_Y = 0x01, + SQ_CHAN_Z = 0x02, + SQ_CHAN_W = 0x03, + SRC2_NEG_bit = 1 << 12, + SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13, + SQ_ALU_WORD1_OP3__ALU_INST_shift = 13, + SQ_OP3_INST_MUL_LIT = 0x0c, + SQ_OP3_INST_MUL_LIT_M2 = 0x0d, + SQ_OP3_INST_MUL_LIT_M4 = 0x0e, + SQ_OP3_INST_MUL_LIT_D2 = 0x0f, + SQ_OP3_INST_MULADD = 0x10, + SQ_OP3_INST_MULADD_M2 = 0x11, + SQ_OP3_INST_MULADD_M4 = 0x12, + SQ_OP3_INST_MULADD_D2 = 0x13, + SQ_OP3_INST_MULADD_IEEE = 0x14, + SQ_OP3_INST_MULADD_IEEE_M2 = 0x15, + SQ_OP3_INST_MULADD_IEEE_M4 = 0x16, + SQ_OP3_INST_MULADD_IEEE_D2 = 0x17, + SQ_OP3_INST_CNDE = 0x18, + SQ_OP3_INST_CNDGT = 0x19, + SQ_OP3_INST_CNDGE = 0x1a, + SQ_OP3_INST_CNDE_INT = 0x1c, + SQ_OP3_INST_CNDGT_INT = 0x1d, + SQ_OP3_INST_CNDGE_INT = 0x1e, + SQ_TEX_WORD2 = 0x00008dfc, + OFFSET_X_mask = 0x1f << 0, + OFFSET_X_shift = 0, + OFFSET_Y_mask = 0x1f << 5, + OFFSET_Y_shift = 5, + OFFSET_Z_mask = 0x1f << 10, + OFFSET_Z_shift = 10, + SAMPLER_ID_mask = 0x1f << 15, + SAMPLER_ID_shift = 15, + SQ_TEX_WORD2__SRC_SEL_X_mask = 0x07 << 20, + SQ_TEX_WORD2__SRC_SEL_X_shift = 20, + SQ_SEL_X = 0x00, + SQ_SEL_Y = 0x01, + SQ_SEL_Z = 0x02, + SQ_SEL_W = 0x03, + SQ_SEL_0 = 0x04, + SQ_SEL_1 = 0x05, + SRC_SEL_Y_mask = 0x07 << 23, + SRC_SEL_Y_shift = 23, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SRC_SEL_Z_mask = 0x07 << 26, + SRC_SEL_Z_shift = 26, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SRC_SEL_W_mask = 0x07 << 29, + SRC_SEL_W_shift = 29, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc, + BURST_COUNT_mask = 0x0f << 17, + BURST_COUNT_shift = 17, + END_OF_PROGRAM_bit = 1 << 21, + VALID_PIXEL_MODE_bit = 1 << 22, + SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0x7f << 23, + SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_shift = 23, + SQ_CF_INST_MEM_STREAM0 = 0x20, + SQ_CF_INST_MEM_STREAM1 = 0x21, + SQ_CF_INST_MEM_STREAM2 = 0x22, + SQ_CF_INST_MEM_STREAM3 = 0x23, + SQ_CF_INST_MEM_SCRATCH = 0x24, + SQ_CF_INST_MEM_REDUCTION = 0x25, + SQ_CF_INST_MEM_RING = 0x26, + SQ_CF_INST_EXPORT = 0x27, + SQ_CF_INST_EXPORT_DONE = 0x28, + WHOLE_QUAD_MODE_bit = 1 << 30, + BARRIER_bit = 1 << 31, + SQ_CF_ALU_WORD1 = 0x00008dfc, + KCACHE_MODE1_mask = 0x03 << 0, + KCACHE_MODE1_shift = 0, + SQ_CF_KCACHE_NOP = 0x00, + SQ_CF_KCACHE_LOCK_1 = 0x01, + SQ_CF_KCACHE_LOCK_2 = 0x02, + SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, + KCACHE_ADDR0_mask = 0xff << 2, + KCACHE_ADDR0_shift = 2, + KCACHE_ADDR1_mask = 0xff << 10, + KCACHE_ADDR1_shift = 10, + SQ_CF_ALU_WORD1__COUNT_mask = 0x7f << 18, + SQ_CF_ALU_WORD1__COUNT_shift = 18, + SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25, + SQ_CF_ALU_WORD1__CF_INST_mask = 0x0f << 26, + SQ_CF_ALU_WORD1__CF_INST_shift = 26, + SQ_CF_INST_ALU = 0x08, + SQ_CF_INST_ALU_PUSH_BEFORE = 0x09, + SQ_CF_INST_ALU_POP_AFTER = 0x0a, + SQ_CF_INST_ALU_POP2_AFTER = 0x0b, + SQ_CF_INST_ALU_CONTINUE = 0x0d, + SQ_CF_INST_ALU_BREAK = 0x0e, + SQ_CF_INST_ALU_ELSE_AFTER = 0x0f, +/* WHOLE_QUAD_MODE_bit = 1 << 30, */ +/* BARRIER_bit = 1 << 31, */ + SQ_TEX_WORD1 = 0x00008dfc, + SQ_TEX_WORD1__DST_GPR_mask = 0x7f << 0, + SQ_TEX_WORD1__DST_GPR_shift = 0, + SQ_TEX_WORD1__DST_REL_bit = 1 << 7, + SQ_TEX_WORD1__DST_SEL_X_mask = 0x07 << 9, + SQ_TEX_WORD1__DST_SEL_X_shift = 9, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_SEL_MASK = 0x07, + SQ_TEX_WORD1__DST_SEL_Y_mask = 0x07 << 12, + SQ_TEX_WORD1__DST_SEL_Y_shift = 12, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_TEX_WORD1__DST_SEL_Z_mask = 0x07 << 15, + SQ_TEX_WORD1__DST_SEL_Z_shift = 15, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_TEX_WORD1__DST_SEL_W_mask = 0x07 << 18, + SQ_TEX_WORD1__DST_SEL_W_shift = 18, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_TEX_WORD1__LOD_BIAS_mask = 0x7f << 21, + SQ_TEX_WORD1__LOD_BIAS_shift = 21, + COORD_TYPE_X_bit = 1 << 28, + COORD_TYPE_Y_bit = 1 << 29, + COORD_TYPE_Z_bit = 1 << 30, + COORD_TYPE_W_bit = 1 << 31, + SQ_VTX_WORD0 = 0x00008dfc, + VTX_INST_mask = 0x1f << 0, + VTX_INST_shift = 0, + SQ_VTX_INST_FETCH = 0x00, + SQ_VTX_INST_SEMANTIC = 0x01, + FETCH_TYPE_mask = 0x03 << 5, + FETCH_TYPE_shift = 5, + SQ_VTX_FETCH_VERTEX_DATA = 0x00, + SQ_VTX_FETCH_INSTANCE_DATA = 0x01, + SQ_VTX_FETCH_NO_INDEX_OFFSET = 0x02, + FETCH_WHOLE_QUAD_bit = 1 << 7, + BUFFER_ID_mask = 0xff << 8, + BUFFER_ID_shift = 8, + SRC_GPR_mask = 0x7f << 16, + SRC_GPR_shift = 16, + SRC_REL_bit = 1 << 23, + SQ_VTX_WORD0__SRC_SEL_X_mask = 0x03 << 24, + SQ_VTX_WORD0__SRC_SEL_X_shift = 24, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ + MEGA_FETCH_COUNT_mask = 0x3f << 26, + MEGA_FETCH_COUNT_shift = 26, + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ = 0x00008dfc, + SEL_X_mask = 0x07 << 0, + SEL_X_shift = 0, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SEL_Y_mask = 0x07 << 3, + SEL_Y_shift = 3, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SEL_Z_mask = 0x07 << 6, + SEL_Z_shift = 6, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SEL_W_mask = 0x07 << 9, + SEL_W_shift = 9, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_ALU_WORD1 = 0x00008dfc, + ENCODING_mask = 0x07 << 15, + ENCODING_shift = 15, + BANK_SWIZZLE_mask = 0x07 << 18, + BANK_SWIZZLE_shift = 18, + SQ_ALU_VEC_012 = 0x00, + SQ_ALU_VEC_021 = 0x01, + SQ_ALU_VEC_120 = 0x02, + SQ_ALU_VEC_102 = 0x03, + SQ_ALU_VEC_201 = 0x04, + SQ_ALU_VEC_210 = 0x05, + SQ_ALU_WORD1__DST_GPR_mask = 0x7f << 21, + SQ_ALU_WORD1__DST_GPR_shift = 21, + SQ_ALU_WORD1__DST_REL_bit = 1 << 28, + DST_CHAN_mask = 0x03 << 29, + DST_CHAN_shift = 29, + CHAN_X = 0x00, + CHAN_Y = 0x01, + CHAN_Z = 0x02, + CHAN_W = 0x03, + SQ_ALU_WORD1__CLAMP_bit = 1 << 31, + SQ_CF_ALU_WORD0 = 0x00008dfc, + SQ_CF_ALU_WORD0__ADDR_mask = 0x3fffff << 0, + SQ_CF_ALU_WORD0__ADDR_shift = 0, + KCACHE_BANK0_mask = 0x0f << 22, + KCACHE_BANK0_shift = 22, + KCACHE_BANK1_mask = 0x0f << 26, + KCACHE_BANK1_shift = 26, + KCACHE_MODE0_mask = 0x03 << 30, + KCACHE_MODE0_shift = 30, +/* SQ_CF_KCACHE_NOP = 0x00, */ +/* SQ_CF_KCACHE_LOCK_1 = 0x01, */ +/* SQ_CF_KCACHE_LOCK_2 = 0x02, */ +/* SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, */ + SQ_VTX_WORD2 = 0x00008dfc, + SQ_VTX_WORD2__OFFSET_mask = 0xffff << 0, + SQ_VTX_WORD2__OFFSET_shift = 0, + SQ_VTX_WORD2__ENDIAN_SWAP_mask = 0x03 << 16, + SQ_VTX_WORD2__ENDIAN_SWAP_shift = 16, + SQ_ENDIAN_NONE = 0x00, + SQ_ENDIAN_8IN16 = 0x01, + SQ_ENDIAN_8IN32 = 0x02, + CONST_BUF_NO_STRIDE_bit = 1 << 18, + MEGA_FETCH_bit = 1 << 19, + SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20, + SQ_ALU_WORD1_OP2_V2 = 0x00008dfc, + SRC0_ABS_bit = 1 << 0, + SRC1_ABS_bit = 1 << 1, + UPDATE_EXECUTE_MASK_bit = 1 << 2, + UPDATE_PRED_bit = 1 << 3, + WRITE_MASK_bit = 1 << 4, + SQ_ALU_WORD1_OP2_V2__OMOD_mask = 0x03 << 5, + SQ_ALU_WORD1_OP2_V2__OMOD_shift = 5, + SQ_ALU_OMOD_OFF = 0x00, + SQ_ALU_OMOD_M2 = 0x01, + SQ_ALU_OMOD_M4 = 0x02, + SQ_ALU_OMOD_D2 = 0x03, + SQ_ALU_WORD1_OP2_V2__ALU_INST_mask = 0x7ff << 7, + SQ_ALU_WORD1_OP2_V2__ALU_INST_shift = 7, + SQ_OP2_INST_ADD = 0x00, + SQ_OP2_INST_MUL = 0x01, + SQ_OP2_INST_MUL_IEEE = 0x02, + SQ_OP2_INST_MAX = 0x03, + SQ_OP2_INST_MIN = 0x04, + SQ_OP2_INST_MAX_DX10 = 0x05, + SQ_OP2_INST_MIN_DX10 = 0x06, + SQ_OP2_INST_SETE = 0x08, + SQ_OP2_INST_SETGT = 0x09, + SQ_OP2_INST_SETGE = 0x0a, + SQ_OP2_INST_SETNE = 0x0b, + SQ_OP2_INST_SETE_DX10 = 0x0c, + SQ_OP2_INST_SETGT_DX10 = 0x0d, + SQ_OP2_INST_SETGE_DX10 = 0x0e, + SQ_OP2_INST_SETNE_DX10 = 0x0f, + SQ_OP2_INST_FRACT = 0x10, + SQ_OP2_INST_TRUNC = 0x11, + SQ_OP2_INST_CEIL = 0x12, + SQ_OP2_INST_RNDNE = 0x13, + SQ_OP2_INST_FLOOR = 0x14, + SQ_OP2_INST_MOVA = 0x15, + SQ_OP2_INST_MOVA_FLOOR = 0x16, + SQ_OP2_INST_MOVA_INT = 0x18, + SQ_OP2_INST_MOV = 0x19, + SQ_OP2_INST_NOP = 0x1a, + SQ_OP2_INST_PRED_SETGT_UINT = 0x1e, + SQ_OP2_INST_PRED_SETGE_UINT = 0x1f, + SQ_OP2_INST_PRED_SETE = 0x20, + SQ_OP2_INST_PRED_SETGT = 0x21, + SQ_OP2_INST_PRED_SETGE = 0x22, + SQ_OP2_INST_PRED_SETNE = 0x23, + SQ_OP2_INST_PRED_SET_INV = 0x24, + SQ_OP2_INST_PRED_SET_POP = 0x25, + SQ_OP2_INST_PRED_SET_CLR = 0x26, + SQ_OP2_INST_PRED_SET_RESTORE = 0x27, + SQ_OP2_INST_PRED_SETE_PUSH = 0x28, + SQ_OP2_INST_PRED_SETGT_PUSH = 0x29, + SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a, + SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b, + SQ_OP2_INST_KILLE = 0x2c, + SQ_OP2_INST_KILLGT = 0x2d, + SQ_OP2_INST_KILLGE = 0x2e, + SQ_OP2_INST_KILLNE = 0x2f, + SQ_OP2_INST_AND_INT = 0x30, + SQ_OP2_INST_OR_INT = 0x31, + SQ_OP2_INST_XOR_INT = 0x32, + SQ_OP2_INST_NOT_INT = 0x33, + SQ_OP2_INST_ADD_INT = 0x34, + SQ_OP2_INST_SUB_INT = 0x35, + SQ_OP2_INST_MAX_INT = 0x36, + SQ_OP2_INST_MIN_INT = 0x37, + SQ_OP2_INST_MAX_UINT = 0x38, + SQ_OP2_INST_MIN_UINT = 0x39, + SQ_OP2_INST_SETE_INT = 0x3a, + SQ_OP2_INST_SETGT_INT = 0x3b, + SQ_OP2_INST_SETGE_INT = 0x3c, + SQ_OP2_INST_SETNE_INT = 0x3d, + SQ_OP2_INST_SETGT_UINT = 0x3e, + SQ_OP2_INST_SETGE_UINT = 0x3f, + SQ_OP2_INST_KILLGT_UINT = 0x40, + SQ_OP2_INST_KILLGE_UINT = 0x41, + SQ_OP2_INST_PRED_SETE_INT = 0x42, + SQ_OP2_INST_PRED_SETGT_INT = 0x43, + SQ_OP2_INST_PRED_SETGE_INT = 0x44, + SQ_OP2_INST_PRED_SETNE_INT = 0x45, + SQ_OP2_INST_KILLE_INT = 0x46, + SQ_OP2_INST_KILLGT_INT = 0x47, + SQ_OP2_INST_KILLGE_INT = 0x48, + SQ_OP2_INST_KILLNE_INT = 0x49, + SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a, + SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b, + SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c, + SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d, + SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e, + SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f, + SQ_OP2_INST_DOT4 = 0x50, + SQ_OP2_INST_DOT4_IEEE = 0x51, + SQ_OP2_INST_CUBE = 0x52, + SQ_OP2_INST_MAX4 = 0x53, + SQ_OP2_INST_MOVA_GPR_INT = 0x60, + SQ_OP2_INST_EXP_IEEE = 0x61, + SQ_OP2_INST_LOG_CLAMPED = 0x62, + SQ_OP2_INST_LOG_IEEE = 0x63, + SQ_OP2_INST_RECIP_CLAMPED = 0x64, + SQ_OP2_INST_RECIP_FF = 0x65, + SQ_OP2_INST_RECIP_IEEE = 0x66, + SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x67, + SQ_OP2_INST_RECIPSQRT_FF = 0x68, + SQ_OP2_INST_RECIPSQRT_IEEE = 0x69, + SQ_OP2_INST_SQRT_IEEE = 0x6a, + SQ_OP2_INST_FLT_TO_INT = 0x6b, + SQ_OP2_INST_INT_TO_FLT = 0x6c, + SQ_OP2_INST_UINT_TO_FLT = 0x6d, + SQ_OP2_INST_SIN = 0x6e, + SQ_OP2_INST_COS = 0x6f, + SQ_OP2_INST_ASHR_INT = 0x70, + SQ_OP2_INST_LSHR_INT = 0x71, + SQ_OP2_INST_LSHL_INT = 0x72, + SQ_OP2_INST_MULLO_INT = 0x73, + SQ_OP2_INST_MULHI_INT = 0x74, + SQ_OP2_INST_MULLO_UINT = 0x75, + SQ_OP2_INST_MULHI_UINT = 0x76, + SQ_OP2_INST_RECIP_INT = 0x77, + SQ_OP2_INST_RECIP_UINT = 0x78, + SQ_OP2_INST_FLT_TO_UINT = 0x79, + SQ_CF_ALLOC_EXPORT_WORD1_BUF = 0x00008dfc, + ARRAY_SIZE_mask = 0xfff << 0, + ARRAY_SIZE_shift = 0, + COMP_MASK_mask = 0x0f << 12, + COMP_MASK_shift = 12, + SQ_CF_WORD0 = 0x00008dfc, + SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc, + ARRAY_BASE_mask = 0x1fff << 0, + ARRAY_BASE_shift = 0, + SQ_CF_ALLOC_EXPORT_WORD0__TYPE_mask = 0x03 << 13, + SQ_CF_ALLOC_EXPORT_WORD0__TYPE_shift = 13, + SQ_EXPORT_PIXEL = 0x00, + SQ_EXPORT_POS = 0x01, + SQ_EXPORT_PARAM = 0x02, + X_UNUSED_FOR_SX_EXPORTS = 0x03, + RW_GPR_mask = 0x7f << 15, + RW_GPR_shift = 15, + RW_REL_bit = 1 << 22, + INDEX_GPR_mask = 0x7f << 23, + INDEX_GPR_shift = 23, + ELEM_SIZE_mask = 0x03 << 30, + ELEM_SIZE_shift = 30, + SQ_VTX_WORD1 = 0x00008dfc, + SQ_VTX_WORD1__DST_SEL_X_mask = 0x07 << 9, + SQ_VTX_WORD1__DST_SEL_X_shift = 9, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_VTX_WORD1__DST_SEL_Y_mask = 0x07 << 12, + SQ_VTX_WORD1__DST_SEL_Y_shift = 12, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_VTX_WORD1__DST_SEL_Z_mask = 0x07 << 15, + SQ_VTX_WORD1__DST_SEL_Z_shift = 15, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_VTX_WORD1__DST_SEL_W_mask = 0x07 << 18, + SQ_VTX_WORD1__DST_SEL_W_shift = 18, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + USE_CONST_FIELDS_bit = 1 << 21, + SQ_VTX_WORD1__DATA_FORMAT_mask = 0x3f << 22, + SQ_VTX_WORD1__DATA_FORMAT_shift = 22, + SQ_VTX_WORD1__NUM_FORMAT_ALL_mask = 0x03 << 28, + SQ_VTX_WORD1__NUM_FORMAT_ALL_shift = 28, + SQ_NUM_FORMAT_NORM = 0x00, + SQ_NUM_FORMAT_INT = 0x01, + SQ_NUM_FORMAT_SCALED = 0x02, + SQ_VTX_WORD1__FORMAT_COMP_ALL_bit = 1 << 30, + SQ_VTX_WORD1__SRF_MODE_ALL_bit = 1 << 31, + SQ_ALU_WORD1_OP2 = 0x00008dfc, +/* SRC0_ABS_bit = 1 << 0, */ +/* SRC1_ABS_bit = 1 << 1, */ +/* UPDATE_EXECUTE_MASK_bit = 1 << 2, */ +/* UPDATE_PRED_bit = 1 << 3, */ +/* WRITE_MASK_bit = 1 << 4, */ + FOG_MERGE_bit = 1 << 5, + SQ_ALU_WORD1_OP2__OMOD_mask = 0x03 << 6, + SQ_ALU_WORD1_OP2__OMOD_shift = 6, +/* SQ_ALU_OMOD_OFF = 0x00, */ +/* SQ_ALU_OMOD_M2 = 0x01, */ +/* SQ_ALU_OMOD_M4 = 0x02, */ +/* SQ_ALU_OMOD_D2 = 0x03, */ + SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x3ff << 8, + SQ_ALU_WORD1_OP2__ALU_INST_shift = 8, +/* SQ_OP2_INST_ADD = 0x00, */ +/* SQ_OP2_INST_MUL = 0x01, */ +/* SQ_OP2_INST_MUL_IEEE = 0x02, */ +/* SQ_OP2_INST_MAX = 0x03, */ +/* SQ_OP2_INST_MIN = 0x04, */ +/* SQ_OP2_INST_MAX_DX10 = 0x05, */ +/* SQ_OP2_INST_MIN_DX10 = 0x06, */ +/* SQ_OP2_INST_SETE = 0x08, */ +/* SQ_OP2_INST_SETGT = 0x09, */ +/* SQ_OP2_INST_SETGE = 0x0a, */ +/* SQ_OP2_INST_SETNE = 0x0b, */ +/* SQ_OP2_INST_SETE_DX10 = 0x0c, */ +/* SQ_OP2_INST_SETGT_DX10 = 0x0d, */ +/* SQ_OP2_INST_SETGE_DX10 = 0x0e, */ +/* SQ_OP2_INST_SETNE_DX10 = 0x0f, */ +/* SQ_OP2_INST_FRACT = 0x10, */ +/* SQ_OP2_INST_TRUNC = 0x11, */ +/* SQ_OP2_INST_CEIL = 0x12, */ +/* SQ_OP2_INST_RNDNE = 0x13, */ +/* SQ_OP2_INST_FLOOR = 0x14, */ +/* SQ_OP2_INST_MOVA = 0x15, */ +/* SQ_OP2_INST_MOVA_FLOOR = 0x16, */ +/* SQ_OP2_INST_MOVA_INT = 0x18, */ +/* SQ_OP2_INST_MOV = 0x19, */ +/* SQ_OP2_INST_NOP = 0x1a, */ +/* SQ_OP2_INST_PRED_SETGT_UINT = 0x1e, */ +/* SQ_OP2_INST_PRED_SETGE_UINT = 0x1f, */ +/* SQ_OP2_INST_PRED_SETE = 0x20, */ +/* SQ_OP2_INST_PRED_SETGT = 0x21, */ +/* SQ_OP2_INST_PRED_SETGE = 0x22, */ +/* SQ_OP2_INST_PRED_SETNE = 0x23, */ +/* SQ_OP2_INST_PRED_SET_INV = 0x24, */ +/* SQ_OP2_INST_PRED_SET_POP = 0x25, */ +/* SQ_OP2_INST_PRED_SET_CLR = 0x26, */ +/* SQ_OP2_INST_PRED_SET_RESTORE = 0x27, */ +/* SQ_OP2_INST_PRED_SETE_PUSH = 0x28, */ +/* SQ_OP2_INST_PRED_SETGT_PUSH = 0x29, */ +/* SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a, */ +/* SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b, */ +/* SQ_OP2_INST_KILLE = 0x2c, */ +/* SQ_OP2_INST_KILLGT = 0x2d, */ +/* SQ_OP2_INST_KILLGE = 0x2e, */ +/* SQ_OP2_INST_KILLNE = 0x2f, */ +/* SQ_OP2_INST_AND_INT = 0x30, */ +/* SQ_OP2_INST_OR_INT = 0x31, */ +/* SQ_OP2_INST_XOR_INT = 0x32, */ +/* SQ_OP2_INST_NOT_INT = 0x33, */ +/* SQ_OP2_INST_ADD_INT = 0x34, */ +/* SQ_OP2_INST_SUB_INT = 0x35, */ +/* SQ_OP2_INST_MAX_INT = 0x36, */ +/* SQ_OP2_INST_MIN_INT = 0x37, */ +/* SQ_OP2_INST_MAX_UINT = 0x38, */ +/* SQ_OP2_INST_MIN_UINT = 0x39, */ +/* SQ_OP2_INST_SETE_INT = 0x3a, */ +/* SQ_OP2_INST_SETGT_INT = 0x3b, */ +/* SQ_OP2_INST_SETGE_INT = 0x3c, */ +/* SQ_OP2_INST_SETNE_INT = 0x3d, */ +/* SQ_OP2_INST_SETGT_UINT = 0x3e, */ +/* SQ_OP2_INST_SETGE_UINT = 0x3f, */ +/* SQ_OP2_INST_KILLGT_UINT = 0x40, */ +/* SQ_OP2_INST_KILLGE_UINT = 0x41, */ +/* SQ_OP2_INST_PRED_SETE_INT = 0x42, */ +/* SQ_OP2_INST_PRED_SETGT_INT = 0x43, */ +/* SQ_OP2_INST_PRED_SETGE_INT = 0x44, */ +/* SQ_OP2_INST_PRED_SETNE_INT = 0x45, */ +/* SQ_OP2_INST_KILLE_INT = 0x46, */ +/* SQ_OP2_INST_KILLGT_INT = 0x47, */ +/* SQ_OP2_INST_KILLGE_INT = 0x48, */ +/* SQ_OP2_INST_KILLNE_INT = 0x49, */ +/* SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a, */ +/* SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b, */ +/* SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c, */ +/* SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d, */ +/* SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e, */ +/* SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f, */ +/* SQ_OP2_INST_DOT4 = 0x50, */ +/* SQ_OP2_INST_DOT4_IEEE = 0x51, */ +/* SQ_OP2_INST_CUBE = 0x52, */ +/* SQ_OP2_INST_MAX4 = 0x53, */ +/* SQ_OP2_INST_MOVA_GPR_INT = 0x60, */ +/* SQ_OP2_INST_EXP_IEEE = 0x61, */ +/* SQ_OP2_INST_LOG_CLAMPED = 0x62, */ +/* SQ_OP2_INST_LOG_IEEE = 0x63, */ +/* SQ_OP2_INST_RECIP_CLAMPED = 0x64, */ +/* SQ_OP2_INST_RECIP_FF = 0x65, */ +/* SQ_OP2_INST_RECIP_IEEE = 0x66, */ +/* SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x67, */ +/* SQ_OP2_INST_RECIPSQRT_FF = 0x68, */ +/* SQ_OP2_INST_RECIPSQRT_IEEE = 0x69, */ +/* SQ_OP2_INST_SQRT_IEEE = 0x6a, */ +/* SQ_OP2_INST_FLT_TO_INT = 0x6b, */ +/* SQ_OP2_INST_INT_TO_FLT = 0x6c, */ +/* SQ_OP2_INST_UINT_TO_FLT = 0x6d, */ +/* SQ_OP2_INST_SIN = 0x6e, */ +/* SQ_OP2_INST_COS = 0x6f, */ +/* SQ_OP2_INST_ASHR_INT = 0x70, */ +/* SQ_OP2_INST_LSHR_INT = 0x71, */ +/* SQ_OP2_INST_LSHL_INT = 0x72, */ +/* SQ_OP2_INST_MULLO_INT = 0x73, */ +/* SQ_OP2_INST_MULHI_INT = 0x74, */ +/* SQ_OP2_INST_MULLO_UINT = 0x75, */ +/* SQ_OP2_INST_MULHI_UINT = 0x76, */ +/* SQ_OP2_INST_RECIP_INT = 0x77, */ +/* SQ_OP2_INST_RECIP_UINT = 0x78, */ +/* SQ_OP2_INST_FLT_TO_UINT = 0x79, */ + SQ_CF_WORD1 = 0x00008dfc, + POP_COUNT_mask = 0x07 << 0, + POP_COUNT_shift = 0, + CF_CONST_mask = 0x1f << 3, + CF_CONST_shift = 3, + COND_mask = 0x03 << 8, + COND_shift = 8, + SQ_CF_COND_ACTIVE = 0x00, + SQ_CF_COND_FALSE = 0x01, + SQ_CF_COND_BOOL = 0x02, + SQ_CF_COND_NOT_BOOL = 0x03, + SQ_CF_WORD1__COUNT_mask = 0x07 << 10, + SQ_CF_WORD1__COUNT_shift = 10, + CALL_COUNT_mask = 0x3f << 13, + CALL_COUNT_shift = 13, + COUNT_3_bit = 1 << 19, +/* END_OF_PROGRAM_bit = 1 << 21, */ +/* VALID_PIXEL_MODE_bit = 1 << 22, */ + SQ_CF_WORD1__CF_INST_mask = 0x7f << 23, + SQ_CF_WORD1__CF_INST_shift = 23, + SQ_CF_INST_NOP = 0x00, + SQ_CF_INST_TEX = 0x01, + SQ_CF_INST_VTX = 0x02, + SQ_CF_INST_VTX_TC = 0x03, + SQ_CF_INST_LOOP_START = 0x04, + SQ_CF_INST_LOOP_END = 0x05, + SQ_CF_INST_LOOP_START_DX10 = 0x06, + SQ_CF_INST_LOOP_START_NO_AL = 0x07, + SQ_CF_INST_LOOP_CONTINUE = 0x08, + SQ_CF_INST_LOOP_BREAK = 0x09, + SQ_CF_INST_JUMP = 0x0a, + SQ_CF_INST_PUSH = 0x0b, + SQ_CF_INST_PUSH_ELSE = 0x0c, + SQ_CF_INST_ELSE = 0x0d, + SQ_CF_INST_POP = 0x0e, + SQ_CF_INST_POP_JUMP = 0x0f, + SQ_CF_INST_POP_PUSH = 0x10, + SQ_CF_INST_POP_PUSH_ELSE = 0x11, + SQ_CF_INST_CALL = 0x12, + SQ_CF_INST_CALL_FS = 0x13, + SQ_CF_INST_RETURN = 0x14, + SQ_CF_INST_EMIT_VERTEX = 0x15, + SQ_CF_INST_EMIT_CUT_VERTEX = 0x16, + SQ_CF_INST_CUT_VERTEX = 0x17, + SQ_CF_INST_KILL = 0x18, +/* WHOLE_QUAD_MODE_bit = 1 << 30, */ +/* BARRIER_bit = 1 << 31, */ + SQ_VTX_WORD1_SEM = 0x00008dfc, + SEMANTIC_ID_mask = 0xff << 0, + SEMANTIC_ID_shift = 0, + SQ_TEX_WORD0 = 0x00008dfc, + TEX_INST_mask = 0x1f << 0, + TEX_INST_shift = 0, + SQ_TEX_INST_VTX_FETCH = 0x00, + SQ_TEX_INST_VTX_SEMANTIC = 0x01, + SQ_TEX_INST_LD = 0x03, + SQ_TEX_INST_GET_TEXTURE_RESINFO = 0x04, + SQ_TEX_INST_GET_NUMBER_OF_SAMPLES = 0x05, + SQ_TEX_INST_GET_LOD = 0x06, + SQ_TEX_INST_GET_GRADIENTS_H = 0x07, + SQ_TEX_INST_GET_GRADIENTS_V = 0x08, + SQ_TEX_INST_GET_LERP = 0x09, + SQ_TEX_INST_RESERVED_10 = 0x0a, + SQ_TEX_INST_SET_GRADIENTS_H = 0x0b, + SQ_TEX_INST_SET_GRADIENTS_V = 0x0c, + SQ_TEX_INST_PASS = 0x0d, + X_Z_SET_INDEX_FOR_ARRAY_OF_CUBEMAPS = 0x0e, + SQ_TEX_INST_SAMPLE = 0x10, + SQ_TEX_INST_SAMPLE_L = 0x11, + SQ_TEX_INST_SAMPLE_LB = 0x12, + SQ_TEX_INST_SAMPLE_LZ = 0x13, + SQ_TEX_INST_SAMPLE_G = 0x14, + SQ_TEX_INST_SAMPLE_G_L = 0x15, + SQ_TEX_INST_SAMPLE_G_LB = 0x16, + SQ_TEX_INST_SAMPLE_G_LZ = 0x17, + SQ_TEX_INST_SAMPLE_C = 0x18, + SQ_TEX_INST_SAMPLE_C_L = 0x19, + SQ_TEX_INST_SAMPLE_C_LB = 0x1a, + SQ_TEX_INST_SAMPLE_C_LZ = 0x1b, + SQ_TEX_INST_SAMPLE_C_G = 0x1c, + SQ_TEX_INST_SAMPLE_C_G_L = 0x1d, + SQ_TEX_INST_SAMPLE_C_G_LB = 0x1e, + SQ_TEX_INST_SAMPLE_C_G_LZ = 0x1f, + BC_FRAC_MODE_bit = 1 << 5, +/* FETCH_WHOLE_QUAD_bit = 1 << 7, */ + RESOURCE_ID_mask = 0xff << 8, + RESOURCE_ID_shift = 8, +/* SRC_GPR_mask = 0x7f << 16, */ +/* SRC_GPR_shift = 16, */ +/* SRC_REL_bit = 1 << 23, */ + SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24, + SQ_VTX_WORD1_GPR = 0x00008dfc, + SQ_VTX_WORD1_GPR__DST_GPR_mask = 0x7f << 0, + SQ_VTX_WORD1_GPR__DST_GPR_shift = 0, + SQ_VTX_WORD1_GPR__DST_REL_bit = 1 << 7, + SQ_ALU_WORD0 = 0x00008dfc, + SRC0_SEL_mask = 0x1ff << 0, + SRC0_SEL_shift = 0, +/* SQ_ALU_SRC_0 = 0xf8, */ +/* SQ_ALU_SRC_1 = 0xf9, */ +/* SQ_ALU_SRC_1_INT = 0xfa, */ +/* SQ_ALU_SRC_M_1_INT = 0xfb, */ +/* SQ_ALU_SRC_0_5 = 0xfc, */ +/* SQ_ALU_SRC_LITERAL = 0xfd, */ +/* SQ_ALU_SRC_PV = 0xfe, */ +/* SQ_ALU_SRC_PS = 0xff, */ + SRC0_REL_bit = 1 << 9, + SRC0_CHAN_mask = 0x03 << 10, + SRC0_CHAN_shift = 10, +/* SQ_CHAN_X = 0x00, */ +/* SQ_CHAN_Y = 0x01, */ +/* SQ_CHAN_Z = 0x02, */ +/* SQ_CHAN_W = 0x03, */ + SRC0_NEG_bit = 1 << 12, + SRC1_SEL_mask = 0x1ff << 13, + SRC1_SEL_shift = 13, +/* SQ_ALU_SRC_0 = 0xf8, */ +/* SQ_ALU_SRC_1 = 0xf9, */ +/* SQ_ALU_SRC_1_INT = 0xfa, */ +/* SQ_ALU_SRC_M_1_INT = 0xfb, */ +/* SQ_ALU_SRC_0_5 = 0xfc, */ +/* SQ_ALU_SRC_LITERAL = 0xfd, */ +/* SQ_ALU_SRC_PV = 0xfe, */ +/* SQ_ALU_SRC_PS = 0xff, */ + SRC1_REL_bit = 1 << 22, + SRC1_CHAN_mask = 0x03 << 23, + SRC1_CHAN_shift = 23, +/* SQ_CHAN_X = 0x00, */ +/* SQ_CHAN_Y = 0x01, */ +/* SQ_CHAN_Z = 0x02, */ +/* SQ_CHAN_W = 0x03, */ + SRC1_NEG_bit = 1 << 25, + INDEX_MODE_mask = 0x07 << 26, + INDEX_MODE_shift = 26, + SQ_INDEX_AR_X = 0x00, + SQ_INDEX_AR_Y = 0x01, + SQ_INDEX_AR_Z = 0x02, + SQ_INDEX_AR_W = 0x03, + SQ_INDEX_LOOP = 0x04, + PRED_SEL_mask = 0x03 << 29, + PRED_SEL_shift = 29, + SQ_PRED_SEL_OFF = 0x00, + SQ_PRED_SEL_ZERO = 0x02, + SQ_PRED_SEL_ONE = 0x03, + LAST_bit = 1 << 31, + SX_EXPORT_BUFFER_SIZES = 0x0000900c, + COLOR_BUFFER_SIZE_mask = 0xff << 0, + COLOR_BUFFER_SIZE_shift = 0, + POSITION_BUFFER_SIZE_mask = 0xff << 8, + POSITION_BUFFER_SIZE_shift = 8, + SMX_BUFFER_SIZE_mask = 0xff << 16, + SMX_BUFFER_SIZE_shift = 16, + SX_MEMORY_EXPORT_BASE = 0x00009010, + SX_MEMORY_EXPORT_SIZE = 0x00009014, + SPI_CONFIG_CNTL = 0x00009100, + GPR_WRITE_PRIORITY_mask = 0x1f << 0, + GPR_WRITE_PRIORITY_shift = 0, + X_PRIORITY_ORDER = 0x00, + X_PRIORITY_ORDER_VS = 0x01, + DISABLE_INTERP_1_bit = 1 << 5, + DEBUG_THREAD_TYPE_SEL_mask = 0x03 << 6, + DEBUG_THREAD_TYPE_SEL_shift = 6, + DEBUG_GROUP_SEL_mask = 0x1f << 8, + DEBUG_GROUP_SEL_shift = 8, + DEBUG_GRBM_OVERRIDE_bit = 1 << 13, + SPI_CONFIG_CNTL_1 = 0x0000913c, + VTX_DONE_DELAY_mask = 0x0f << 0, + VTX_DONE_DELAY_shift = 0, + X_DELAY_10_CLKS = 0x00, + X_DELAY_11_CLKS = 0x01, + X_DELAY_12_CLKS = 0x02, + X_DELAY_13_CLKS = 0x03, + X_DELAY_14_CLKS = 0x04, + X_DELAY_15_CLKS = 0x05, + X_DELAY_16_CLKS = 0x06, + X_DELAY_17_CLKS = 0x07, + X_DELAY_2_CLKS = 0x08, + X_DELAY_3_CLKS = 0x09, + X_DELAY_4_CLKS = 0x0a, + X_DELAY_5_CLKS = 0x0b, + X_DELAY_6_CLKS = 0x0c, + X_DELAY_7_CLKS = 0x0d, + X_DELAY_8_CLKS = 0x0e, + X_DELAY_9_CLKS = 0x0f, + INTERP_ONE_PRIM_PER_ROW_bit = 1 << 4, + TD_FILTER4 = 0x00009400, + WEIGHT_1_mask = 0x7ff << 0, + WEIGHT_1_shift = 0, + WEIGHT_0_mask = 0x7ff << 11, + WEIGHT_0_shift = 11, + WEIGHT_PAIR_bit = 1 << 22, + PHASE_mask = 0x0f << 23, + PHASE_shift = 23, + DIRECTION_bit = 1 << 27, + TD_FILTER4_1 = 0x00009404, + TD_FILTER4_1_num = 35, +/* WEIGHT_1_mask = 0x7ff << 0, */ +/* WEIGHT_1_shift = 0, */ +/* WEIGHT_0_mask = 0x7ff << 11, */ +/* WEIGHT_0_shift = 11, */ + TD_CNTL = 0x00009490, + SYNC_PHASE_SH_mask = 0x03 << 0, + SYNC_PHASE_SH_shift = 0, + SYNC_PHASE_VC_SMX_mask = 0x03 << 4, + SYNC_PHASE_VC_SMX_shift = 4, + TD0_CNTL = 0x00009494, + TD0_CNTL_num = 4, + ID_OVERRIDE_mask = 0x03 << 28, + ID_OVERRIDE_shift = 28, + TD0_STATUS = 0x000094a4, + TD0_STATUS_num = 4, + BUSY_bit = 1 << 31, + TA_CNTL = 0x00009504, + GRADIENT_CREDIT_mask = 0x1f << 0, + GRADIENT_CREDIT_shift = 0, + WALKER_CREDIT_mask = 0x1f << 8, + WALKER_CREDIT_shift = 8, + ALIGNER_CREDIT_mask = 0x1f << 16, + ALIGNER_CREDIT_shift = 16, + TD_FIFO_CREDIT_mask = 0x3ff << 22, + TD_FIFO_CREDIT_shift = 22, + TA_CNTL_AUX = 0x00009508, + DISABLE_CUBE_WRAP_bit = 1 << 0, + SYNC_GRADIENT_bit = 1 << 24, + SYNC_WALKER_bit = 1 << 25, + SYNC_ALIGNER_bit = 1 << 26, + BILINEAR_PRECISION_bit = 1 << 31, + TA0_CNTL = 0x00009510, +/* ID_OVERRIDE_mask = 0x03 << 28, */ +/* ID_OVERRIDE_shift = 28, */ + TA1_CNTL = 0x00009514, +/* ID_OVERRIDE_mask = 0x03 << 28, */ +/* ID_OVERRIDE_shift = 28, */ + TA2_CNTL = 0x00009518, +/* ID_OVERRIDE_mask = 0x03 << 28, */ +/* ID_OVERRIDE_shift = 28, */ + TA3_CNTL = 0x0000951c, +/* ID_OVERRIDE_mask = 0x03 << 28, */ +/* ID_OVERRIDE_shift = 28, */ + TA0_STATUS = 0x00009520, + FG_PFIFO_EMPTYB_bit = 1 << 12, + FG_LFIFO_EMPTYB_bit = 1 << 13, + FG_SFIFO_EMPTYB_bit = 1 << 14, + FL_PFIFO_EMPTYB_bit = 1 << 16, + FL_LFIFO_EMPTYB_bit = 1 << 17, + FL_SFIFO_EMPTYB_bit = 1 << 18, + FA_PFIFO_EMPTYB_bit = 1 << 20, + FA_LFIFO_EMPTYB_bit = 1 << 21, + FA_SFIFO_EMPTYB_bit = 1 << 22, + IN_BUSY_bit = 1 << 24, + FG_BUSY_bit = 1 << 25, + FL_BUSY_bit = 1 << 27, + TA_BUSY_bit = 1 << 28, + FA_BUSY_bit = 1 << 29, + AL_BUSY_bit = 1 << 30, +/* BUSY_bit = 1 << 31, */ + TA1_STATUS = 0x00009524, +/* FG_PFIFO_EMPTYB_bit = 1 << 12, */ +/* FG_LFIFO_EMPTYB_bit = 1 << 13, */ +/* FG_SFIFO_EMPTYB_bit = 1 << 14, */ +/* FL_PFIFO_EMPTYB_bit = 1 << 16, */ +/* FL_LFIFO_EMPTYB_bit = 1 << 17, */ +/* FL_SFIFO_EMPTYB_bit = 1 << 18, */ +/* FA_PFIFO_EMPTYB_bit = 1 << 20, */ +/* FA_LFIFO_EMPTYB_bit = 1 << 21, */ +/* FA_SFIFO_EMPTYB_bit = 1 << 22, */ +/* IN_BUSY_bit = 1 << 24, */ +/* FG_BUSY_bit = 1 << 25, */ +/* FL_BUSY_bit = 1 << 27, */ +/* TA_BUSY_bit = 1 << 28, */ +/* FA_BUSY_bit = 1 << 29, */ +/* AL_BUSY_bit = 1 << 30, */ +/* BUSY_bit = 1 << 31, */ + TA2_STATUS = 0x00009528, +/* FG_PFIFO_EMPTYB_bit = 1 << 12, */ +/* FG_LFIFO_EMPTYB_bit = 1 << 13, */ +/* FG_SFIFO_EMPTYB_bit = 1 << 14, */ +/* FL_PFIFO_EMPTYB_bit = 1 << 16, */ +/* FL_LFIFO_EMPTYB_bit = 1 << 17, */ +/* FL_SFIFO_EMPTYB_bit = 1 << 18, */ +/* FA_PFIFO_EMPTYB_bit = 1 << 20, */ +/* FA_LFIFO_EMPTYB_bit = 1 << 21, */ +/* FA_SFIFO_EMPTYB_bit = 1 << 22, */ +/* IN_BUSY_bit = 1 << 24, */ +/* FG_BUSY_bit = 1 << 25, */ +/* FL_BUSY_bit = 1 << 27, */ +/* TA_BUSY_bit = 1 << 28, */ +/* FA_BUSY_bit = 1 << 29, */ +/* AL_BUSY_bit = 1 << 30, */ +/* BUSY_bit = 1 << 31, */ + TA3_STATUS = 0x0000952c, +/* FG_PFIFO_EMPTYB_bit = 1 << 12, */ +/* FG_LFIFO_EMPTYB_bit = 1 << 13, */ +/* FG_SFIFO_EMPTYB_bit = 1 << 14, */ +/* FL_PFIFO_EMPTYB_bit = 1 << 16, */ +/* FL_LFIFO_EMPTYB_bit = 1 << 17, */ +/* FL_SFIFO_EMPTYB_bit = 1 << 18, */ +/* FA_PFIFO_EMPTYB_bit = 1 << 20, */ +/* FA_LFIFO_EMPTYB_bit = 1 << 21, */ +/* FA_SFIFO_EMPTYB_bit = 1 << 22, */ +/* IN_BUSY_bit = 1 << 24, */ +/* FG_BUSY_bit = 1 << 25, */ +/* FL_BUSY_bit = 1 << 27, */ +/* TA_BUSY_bit = 1 << 28, */ +/* FA_BUSY_bit = 1 << 29, */ +/* AL_BUSY_bit = 1 << 30, */ +/* BUSY_bit = 1 << 31, */ + TC_STATUS = 0x00009600, + TC_BUSY_bit = 1 << 0, + TC_INVALIDATE = 0x00009604, + START_bit = 1 << 0, + TC_CNTL = 0x00009608, + FORCE_HIT_bit = 1 << 0, + FORCE_MISS_bit = 1 << 1, + L2_SIZE_mask = 0x0f << 5, + L2_SIZE_shift = 5, + _256K = 0x00, + _224K = 0x01, + _192K = 0x02, + _160K = 0x03, + _128K = 0x04, + _96K = 0x05, + _64K = 0x06, + _32K = 0x07, + L2_DISABLE_LATE_HIT_bit = 1 << 9, + DISABLE_VERT_PERF_bit = 1 << 10, + DISABLE_INVAL_BUSY_bit = 1 << 11, + DISABLE_INVAL_SAME_SURFACE_bit = 1 << 12, + PARTITION_MODE_mask = 0x03 << 13, + PARTITION_MODE_shift = 13, + X_VERTEX = 0x00, + MISS_ARB_MODE_bit = 1 << 15, + HIT_ARB_MODE_bit = 1 << 16, + DISABLE_WRITE_DELAY_bit = 1 << 17, + HIT_FIFO_DEPTH_bit = 1 << 18, + VC_CNTL = 0x00009700, + L2_INVALIDATE_bit = 1 << 0, + RESERVED_bit = 1 << 1, + CC_FORCE_MISS_bit = 1 << 2, + MI_CHAN_SEL_mask = 0x03 << 3, + MI_CHAN_SEL_shift = 3, + X_MC0_USES_CH_0_1 = 0x00, + X_MC0_USES_CH_0_3 = 0x01, + X_VC_MC0_IS_ACTIVE = 0x02, + X_VC_MC1_IS_DISABLED = 0x03, + MI_STEER_DISABLE_bit = 1 << 5, + MI_CREDIT_CTR_mask = 0x0f << 6, + MI_CREDIT_CTR_shift = 6, + MI_CREDIT_WE_bit = 1 << 10, + MI_REQ_STALL_THLD_mask = 0x07 << 11, + MI_REQ_STALL_THLD_shift = 11, + X_LATENCY_EXCEEDS_399_CLOCKS = 0x00, + X_LATENCY_EXCEEDS_415_CLOCKS = 0x01, + X_LATENCY_EXCEEDS_431_CLOCKS = 0x02, + X_LATENCY_EXCEEDS_447_CLOCKS = 0x03, + X_LATENCY_EXCEEDS_463_CLOCKS = 0x04, + X_LATENCY_EXCEEDS_479_CLOCKS = 0x05, + X_LATENCY_EXCEEDS_495_CLOCKS = 0x06, + X_LATENCY_EXCEEDS_511_CLOCKS = 0x07, + VC_CNTL__MI_TIMESTAMP_RES_mask = 0x1f << 14, + VC_CNTL__MI_TIMESTAMP_RES_shift = 14, + X_1X_SYSTEM_CLOCK = 0x00, + X_2X_SYSTEM_CLOCK = 0x01, + X_4X_SYSTEM_CLOCK = 0x02, + X_8X_SYSTEM_CLOCK = 0x03, + X_16X_SYSTEM_CLOCK = 0x04, + X_32X_SYSTEM_CLOCK = 0x05, + X_64X_SYSTEM_CLOCK = 0x06, + X_128X_SYSTEM_CLOCK = 0x07, + X_256X_SYSTEM_CLOCK = 0x08, + X_512X_SYSTEM_CLOCK = 0x09, + X_1024X_SYSTEM_CLOCK = 0x0a, + X_2048X_SYSTEM_CLOCK = 0x0b, + X_4092X_SYSTEM_CLOCK = 0x0c, + X_8192X_SYSTEM_CLOCK = 0x0d, + X_16384X_SYSTEM_CLOCK = 0x0e, + X_32768X_SYSTEM_CLOCK = 0x0f, + VC_CNTL_STATUS = 0x00009704, + RP_BUSY_bit = 1 << 0, + RG_BUSY_bit = 1 << 1, + VC_BUSY_bit = 1 << 2, + CLAMP_DETECT_bit = 1 << 3, + VC_CONFIG = 0x00009718, + WRITE_DIS_bit = 1 << 0, + GPR_DATA_PHASE_ADJ_mask = 0x07 << 1, + GPR_DATA_PHASE_ADJ_shift = 1, + X_LATENCY_BASE_0_CYCLES = 0x00, + X_LATENCY_BASE_1_CYCLES = 0x01, + X_LATENCY_BASE_2_CYCLES = 0x02, + X_LATENCY_BASE_3_CYCLES = 0x03, + TD_SIMD_SYNC_ADJ_mask = 0x07 << 4, + TD_SIMD_SYNC_ADJ_shift = 4, + X_0_CYCLES_DELAY = 0x00, + X_1_CYCLES_DELAY = 0x01, + X_2_CYCLES_DELAY = 0x02, + X_3_CYCLES_DELAY = 0x03, + X_4_CYCLES_DELAY = 0x04, + X_5_CYCLES_DELAY = 0x05, + X_6_CYCLES_DELAY = 0x06, + X_7_CYCLES_DELAY = 0x07, + SMX_DC_CTL0 = 0x0000a020, + WR_GATHER_STREAM0_bit = 1 << 0, + WR_GATHER_STREAM1_bit = 1 << 1, + WR_GATHER_STREAM2_bit = 1 << 2, + WR_GATHER_STREAM3_bit = 1 << 3, + WR_GATHER_SCRATCH_bit = 1 << 4, + WR_GATHER_REDUC_BUF_bit = 1 << 5, + WR_GATHER_RING_BUF_bit = 1 << 6, + WR_GATHER_F_BUF_bit = 1 << 7, + DISABLE_CACHES_bit = 1 << 8, + AUTO_FLUSH_INVAL_EN_bit = 1 << 10, + AUTO_FLUSH_EN_bit = 1 << 11, + AUTO_FLUSH_CNT_mask = 0xffff << 12, + AUTO_FLUSH_CNT_shift = 12, + MC_RD_STALL_FACTOR_mask = 0x03 << 28, + MC_RD_STALL_FACTOR_shift = 28, + MC_WR_STALL_FACTOR_mask = 0x03 << 30, + MC_WR_STALL_FACTOR_shift = 30, + SMX_DC_CTL1 = 0x0000a024, + OP_FIFO_SKID_mask = 0x7f << 0, + OP_FIFO_SKID_shift = 0, + CACHE_LINE_SIZE_bit = 1 << 8, + MULTI_FLUSH_MODE_bit = 1 << 9, + MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_mask = 0x0f << 10, + MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_shift = 10, + DISABLE_WR_GATHER_RD_HIT_FORCE_EVICT_bit = 1 << 16, + DISABLE_WR_GATHER_RD_HIT_COMP_VLDS_CHECK_bit = 1 << 17, + DISABLE_FLUSH_ES_ALSO_INVALS_bit = 1 << 18, + DISABLE_FLUSH_GS_ALSO_INVALS_bit = 1 << 19, + SMX_DC_CTL2 = 0x0000a028, + INVALIDATE_CACHES_bit = 1 << 0, + CACHES_INVALID_bit = 1 << 1, + CACHES_DIRTY_bit = 1 << 2, + FLUSH_ALL_bit = 1 << 4, + FLUSH_GS_THREADS_bit = 1 << 8, + FLUSH_ES_THREADS_bit = 1 << 9, + SMX_DC_MC_INTF_CTL = 0x0000a02c, + MC_RD_REQ_CRED_mask = 0xff << 0, + MC_RD_REQ_CRED_shift = 0, + MC_WR_REQ_CRED_mask = 0xff << 16, + MC_WR_REQ_CRED_shift = 16, + TD_PS_SAMPLER0_BORDER_RED = 0x0000a400, + TD_PS_SAMPLER0_BORDER_RED_num = 18, + TD_PS_SAMPLER0_BORDER_RED_offset = 16, + TD_PS_SAMPLER0_BORDER_GREEN = 0x0000a404, + TD_PS_SAMPLER0_BORDER_GREEN_num = 18, + TD_PS_SAMPLER0_BORDER_GREEN_offset = 16, + TD_PS_SAMPLER0_BORDER_BLUE = 0x0000a408, + TD_PS_SAMPLER0_BORDER_BLUE_num = 18, + TD_PS_SAMPLER0_BORDER_BLUE_offset = 16, + TD_PS_SAMPLER0_BORDER_ALPHA = 0x0000a40c, + TD_PS_SAMPLER0_BORDER_ALPHA_num = 18, + TD_PS_SAMPLER0_BORDER_ALPHA_offset = 16, + TD_VS_SAMPLER0_BORDER_RED = 0x0000a600, + TD_VS_SAMPLER0_BORDER_RED_num = 18, + TD_VS_SAMPLER0_BORDER_RED_offset = 16, + TD_VS_SAMPLER0_BORDER_GREEN = 0x0000a604, + TD_VS_SAMPLER0_BORDER_GREEN_num = 18, + TD_VS_SAMPLER0_BORDER_GREEN_offset = 16, + TD_VS_SAMPLER0_BORDER_BLUE = 0x0000a608, + TD_VS_SAMPLER0_BORDER_BLUE_num = 18, + TD_VS_SAMPLER0_BORDER_BLUE_offset = 16, + TD_VS_SAMPLER0_BORDER_ALPHA = 0x0000a60c, + TD_VS_SAMPLER0_BORDER_ALPHA_num = 18, + TD_VS_SAMPLER0_BORDER_ALPHA_offset = 16, + TD_GS_SAMPLER0_BORDER_RED = 0x0000a800, + TD_GS_SAMPLER0_BORDER_RED_num = 18, + TD_GS_SAMPLER0_BORDER_RED_offset = 16, + TD_GS_SAMPLER0_BORDER_GREEN = 0x0000a804, + TD_GS_SAMPLER0_BORDER_GREEN_num = 18, + TD_GS_SAMPLER0_BORDER_GREEN_offset = 16, + TD_GS_SAMPLER0_BORDER_BLUE = 0x0000a808, + TD_GS_SAMPLER0_BORDER_BLUE_num = 18, + TD_GS_SAMPLER0_BORDER_BLUE_offset = 16, + TD_GS_SAMPLER0_BORDER_ALPHA = 0x0000a80c, + TD_GS_SAMPLER0_BORDER_ALPHA_num = 18, + TD_GS_SAMPLER0_BORDER_ALPHA_offset = 16, + TD_PS_SAMPLER0_CLEARTYPE_KERNEL = 0x0000aa00, + TD_PS_SAMPLER0_CLEARTYPE_KERNEL_num = 18, + TD_PS_SAMPLER0_CLEARTYPE_KERNEL__WIDTH_mask = 0x07 << 0, + TD_PS_SAMPLER0_CLEARTYPE_KERNEL__WIDTH_shift = 0, + TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_mask = 0x07 << 3, + TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_shift = 3, + DB_DEPTH_SIZE = 0x00028000, + PITCH_TILE_MAX_mask = 0x3ff << 0, + PITCH_TILE_MAX_shift = 0, + SLICE_TILE_MAX_mask = 0xfffff << 10, + SLICE_TILE_MAX_shift = 10, + DB_DEPTH_VIEW = 0x00028004, + SLICE_START_mask = 0x7ff << 0, + SLICE_START_shift = 0, + SLICE_MAX_mask = 0x7ff << 13, + SLICE_MAX_shift = 13, + DB_DEPTH_BASE = 0x0002800c, + DB_DEPTH_INFO = 0x00028010, + DB_DEPTH_INFO__FORMAT_mask = 0x07 << 0, + DB_DEPTH_INFO__FORMAT_shift = 0, + DEPTH_INVALID = 0x00, + DEPTH_16 = 0x01, + DEPTH_X8_24 = 0x02, + DEPTH_8_24 = 0x03, + DEPTH_X8_24_FLOAT = 0x04, + DEPTH_8_24_FLOAT = 0x05, + DEPTH_32_FLOAT = 0x06, + DEPTH_X24_8_32_FLOAT = 0x07, + DB_DEPTH_INFO__READ_SIZE_bit = 1 << 3, + DB_DEPTH_INFO__ARRAY_MODE_mask = 0x0f << 15, + DB_DEPTH_INFO__ARRAY_MODE_shift = 15, + ARRAY_2D_TILED_THIN1 = 0x04, + TILE_SURFACE_ENABLE_bit = 1 << 25, + TILE_COMPACT_bit = 1 << 26, + ZRANGE_PRECISION_bit = 1 << 31, + DB_HTILE_DATA_BASE = 0x00028014, + DB_STENCIL_CLEAR = 0x00028028, + DB_STENCIL_CLEAR__CLEAR_mask = 0xff << 0, + DB_STENCIL_CLEAR__CLEAR_shift = 0, + MIN_mask = 0xff << 16, + MIN_shift = 16, + DB_DEPTH_CLEAR = 0x0002802c, + PA_SC_SCREEN_SCISSOR_TL = 0x00028030, + PA_SC_SCREEN_SCISSOR_TL__TL_X_mask = 0x7fff << 0, + PA_SC_SCREEN_SCISSOR_TL__TL_X_shift = 0, + PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask = 0x7fff << 16, + PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift = 16, + PA_SC_SCREEN_SCISSOR_BR = 0x00028034, + PA_SC_SCREEN_SCISSOR_BR__BR_X_mask = 0x7fff << 0, + PA_SC_SCREEN_SCISSOR_BR__BR_X_shift = 0, + PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask = 0x7fff << 16, + PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift = 16, + CB_COLOR0_BASE = 0x00028040, + CB_COLOR0_BASE_num = 8, + CB_COLOR0_SIZE = 0x00028060, + CB_COLOR0_SIZE_num = 8, +/* PITCH_TILE_MAX_mask = 0x3ff << 0, */ +/* PITCH_TILE_MAX_shift = 0, */ +/* SLICE_TILE_MAX_mask = 0xfffff << 10, */ +/* SLICE_TILE_MAX_shift = 10, */ + CB_COLOR0_VIEW = 0x00028080, + CB_COLOR0_VIEW_num = 8, +/* SLICE_START_mask = 0x7ff << 0, */ +/* SLICE_START_shift = 0, */ +/* SLICE_MAX_mask = 0x7ff << 13, */ +/* SLICE_MAX_shift = 13, */ + CB_COLOR0_INFO = 0x000280a0, + CB_COLOR0_INFO_num = 8, + ENDIAN_mask = 0x03 << 0, + ENDIAN_shift = 0, + ENDIAN_NONE = 0x00, + ENDIAN_8IN16 = 0x01, + ENDIAN_8IN32 = 0x02, + ENDIAN_8IN64 = 0x03, + CB_COLOR0_INFO__FORMAT_mask = 0x3f << 2, + CB_COLOR0_INFO__FORMAT_shift = 2, + COLOR_INVALID = 0x00, + COLOR_8 = 0x01, + COLOR_4_4 = 0x02, + COLOR_3_3_2 = 0x03, + COLOR_16 = 0x05, + COLOR_16_FLOAT = 0x06, + COLOR_8_8 = 0x07, + COLOR_5_6_5 = 0x08, + COLOR_6_5_5 = 0x09, + COLOR_1_5_5_5 = 0x0a, + COLOR_4_4_4_4 = 0x0b, + COLOR_5_5_5_1 = 0x0c, + COLOR_32 = 0x0d, + COLOR_32_FLOAT = 0x0e, + COLOR_16_16 = 0x0f, + COLOR_16_16_FLOAT = 0x10, + COLOR_8_24 = 0x11, + COLOR_8_24_FLOAT = 0x12, + COLOR_24_8 = 0x13, + COLOR_24_8_FLOAT = 0x14, + COLOR_10_11_11 = 0x15, + COLOR_10_11_11_FLOAT = 0x16, + COLOR_11_11_10 = 0x17, + COLOR_11_11_10_FLOAT = 0x18, + COLOR_2_10_10_10 = 0x19, + COLOR_8_8_8_8 = 0x1a, + COLOR_10_10_10_2 = 0x1b, + COLOR_X24_8_32_FLOAT = 0x1c, + COLOR_32_32 = 0x1d, + COLOR_32_32_FLOAT = 0x1e, + COLOR_16_16_16_16 = 0x1f, + COLOR_16_16_16_16_FLOAT = 0x20, + COLOR_32_32_32_32 = 0x22, + COLOR_32_32_32_32_FLOAT = 0x23, + CB_COLOR0_INFO__ARRAY_MODE_mask = 0x0f << 8, + CB_COLOR0_INFO__ARRAY_MODE_shift = 8, + ARRAY_LINEAR_GENERAL = 0x00, + ARRAY_LINEAR_ALIGNED = 0x01, +/* ARRAY_2D_TILED_THIN1 = 0x04, */ + NUMBER_TYPE_mask = 0x07 << 12, + NUMBER_TYPE_shift = 12, + NUMBER_UNORM = 0x00, + NUMBER_SNORM = 0x01, + NUMBER_USCALED = 0x02, + NUMBER_SSCALED = 0x03, + NUMBER_UINT = 0x04, + NUMBER_SINT = 0x05, + NUMBER_SRGB = 0x06, + NUMBER_FLOAT = 0x07, + CB_COLOR0_INFO__READ_SIZE_bit = 1 << 15, + COMP_SWAP_mask = 0x03 << 16, + COMP_SWAP_shift = 16, + SWAP_STD = 0x00, + SWAP_ALT = 0x01, + SWAP_STD_REV = 0x02, + SWAP_ALT_REV = 0x03, + CB_COLOR0_INFO__TILE_MODE_mask = 0x03 << 18, + CB_COLOR0_INFO__TILE_MODE_shift = 18, + TILE_DISABLE = 0x00, + TILE_CLEAR_ENABLE = 0x01, + TILE_FRAG_ENABLE = 0x02, + BLEND_CLAMP_bit = 1 << 20, + CLEAR_COLOR_bit = 1 << 21, + BLEND_BYPASS_bit = 1 << 22, + BLEND_FLOAT32_bit = 1 << 23, + SIMPLE_FLOAT_bit = 1 << 24, + CB_COLOR0_INFO__ROUND_MODE_bit = 1 << 25, +/* TILE_COMPACT_bit = 1 << 26, */ + SOURCE_FORMAT_bit = 1 << 27, + CB_COLOR0_TILE = 0x000280c0, + CB_COLOR0_TILE_num = 8, + CB_COLOR0_FRAG = 0x000280e0, + CB_COLOR0_FRAG_num = 8, + CB_COLOR0_MASK = 0x00028100, + CB_COLOR0_MASK_num = 8, + CMASK_BLOCK_MAX_mask = 0xfff << 0, + CMASK_BLOCK_MAX_shift = 0, + FMASK_TILE_MAX_mask = 0xfffff << 12, + FMASK_TILE_MAX_shift = 12, + CB_CLEAR_RED = 0x00028120, + CB_CLEAR_GREEN = 0x00028124, + CB_CLEAR_BLUE = 0x00028128, + CB_CLEAR_ALPHA = 0x0002812c, + SQ_ALU_CONST_BUFFER_SIZE_PS_0 = 0x00028140, + SQ_ALU_CONST_BUFFER_SIZE_PS_0_num = 16, + SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_mask = 0x1ff << 0, + SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_shift = 0, + SQ_ALU_CONST_BUFFER_SIZE_VS_0 = 0x00028180, + SQ_ALU_CONST_BUFFER_SIZE_VS_0_num = 16, + SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_mask = 0x1ff << 0, + SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_shift = 0, + SQ_ALU_CONST_BUFFER_SIZE_GS_0 = 0x000281c0, + SQ_ALU_CONST_BUFFER_SIZE_GS_0_num = 16, + SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_mask = 0x1ff << 0, + SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_shift = 0, + PA_SC_WINDOW_OFFSET = 0x00028200, + WINDOW_X_OFFSET_mask = 0x7fff << 0, + WINDOW_X_OFFSET_shift = 0, + WINDOW_Y_OFFSET_mask = 0x7fff << 16, + WINDOW_Y_OFFSET_shift = 16, + PA_SC_WINDOW_SCISSOR_TL = 0x00028204, + PA_SC_WINDOW_SCISSOR_TL__TL_X_mask = 0x3fff << 0, + PA_SC_WINDOW_SCISSOR_TL__TL_X_shift = 0, + PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask = 0x3fff << 16, + PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift = 16, + WINDOW_OFFSET_DISABLE_bit = 1 << 31, + PA_SC_WINDOW_SCISSOR_BR = 0x00028208, + PA_SC_WINDOW_SCISSOR_BR__BR_X_mask = 0x3fff << 0, + PA_SC_WINDOW_SCISSOR_BR__BR_X_shift = 0, + PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask = 0x3fff << 16, + PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift = 16, + PA_SC_CLIPRECT_RULE = 0x0002820c, + CLIP_RULE_mask = 0xffff << 0, + CLIP_RULE_shift = 0, + PA_SC_CLIPRECT_0_TL = 0x00028210, + PA_SC_CLIPRECT_0_TL_num = 4, + PA_SC_CLIPRECT_0_TL_offset = 8, + PA_SC_CLIPRECT_0_TL__TL_X_mask = 0x3fff << 0, + PA_SC_CLIPRECT_0_TL__TL_X_shift = 0, + PA_SC_CLIPRECT_0_TL__TL_Y_mask = 0x3fff << 16, + PA_SC_CLIPRECT_0_TL__TL_Y_shift = 16, + PA_SC_CLIPRECT_0_BR = 0x00028214, + PA_SC_CLIPRECT_0_BR_num = 4, + PA_SC_CLIPRECT_0_BR_offset = 8, + PA_SC_CLIPRECT_0_BR__BR_X_mask = 0x3fff << 0, + PA_SC_CLIPRECT_0_BR__BR_X_shift = 0, + PA_SC_CLIPRECT_0_BR__BR_Y_mask = 0x3fff << 16, + PA_SC_CLIPRECT_0_BR__BR_Y_shift = 16, + CB_TARGET_MASK = 0x00028238, + TARGET0_ENABLE_mask = 0x0f << 0, + TARGET0_ENABLE_shift = 0, + TARGET1_ENABLE_mask = 0x0f << 4, + TARGET1_ENABLE_shift = 4, + TARGET2_ENABLE_mask = 0x0f << 8, + TARGET2_ENABLE_shift = 8, + TARGET3_ENABLE_mask = 0x0f << 12, + TARGET3_ENABLE_shift = 12, + TARGET4_ENABLE_mask = 0x0f << 16, + TARGET4_ENABLE_shift = 16, + TARGET5_ENABLE_mask = 0x0f << 20, + TARGET5_ENABLE_shift = 20, + TARGET6_ENABLE_mask = 0x0f << 24, + TARGET6_ENABLE_shift = 24, + TARGET7_ENABLE_mask = 0x0f << 28, + TARGET7_ENABLE_shift = 28, + CB_SHADER_MASK = 0x0002823c, + OUTPUT0_ENABLE_mask = 0x0f << 0, + OUTPUT0_ENABLE_shift = 0, + OUTPUT1_ENABLE_mask = 0x0f << 4, + OUTPUT1_ENABLE_shift = 4, + OUTPUT2_ENABLE_mask = 0x0f << 8, + OUTPUT2_ENABLE_shift = 8, + OUTPUT3_ENABLE_mask = 0x0f << 12, + OUTPUT3_ENABLE_shift = 12, + OUTPUT4_ENABLE_mask = 0x0f << 16, + OUTPUT4_ENABLE_shift = 16, + OUTPUT5_ENABLE_mask = 0x0f << 20, + OUTPUT5_ENABLE_shift = 20, + OUTPUT6_ENABLE_mask = 0x0f << 24, + OUTPUT6_ENABLE_shift = 24, + OUTPUT7_ENABLE_mask = 0x0f << 28, + OUTPUT7_ENABLE_shift = 28, + PA_SC_GENERIC_SCISSOR_TL = 0x00028240, + PA_SC_GENERIC_SCISSOR_TL__TL_X_mask = 0x3fff << 0, + PA_SC_GENERIC_SCISSOR_TL__TL_X_shift = 0, + PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask = 0x3fff << 16, + PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift = 16, +/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */ + PA_SC_GENERIC_SCISSOR_BR = 0x00028244, + PA_SC_GENERIC_SCISSOR_BR__BR_X_mask = 0x3fff << 0, + PA_SC_GENERIC_SCISSOR_BR__BR_X_shift = 0, + PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask = 0x3fff << 16, + PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift = 16, + PA_SC_VPORT_SCISSOR_0_TL = 0x00028250, + PA_SC_VPORT_SCISSOR_0_TL_num = 16, + PA_SC_VPORT_SCISSOR_0_TL_offset = 8, + PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask = 0x3fff << 0, + PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift = 0, + PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask = 0x3fff << 16, + PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift = 16, +/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */ + PA_SC_VPORT_SCISSOR_0_BR = 0x00028254, + PA_SC_VPORT_SCISSOR_0_BR_num = 16, + PA_SC_VPORT_SCISSOR_0_BR_offset = 8, + PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask = 0x3fff << 0, + PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift = 0, + PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask = 0x3fff << 16, + PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift = 16, + PA_SC_VPORT_ZMIN_0 = 0x000282d0, + PA_SC_VPORT_ZMIN_0_num = 16, + PA_SC_VPORT_ZMIN_0_offset = 8, + PA_SC_VPORT_ZMAX_0 = 0x000282d4, + PA_SC_VPORT_ZMAX_0_num = 16, + PA_SC_VPORT_ZMAX_0_offset = 8, + SX_MISC = 0x00028350, + MULTIPASS_bit = 1 << 0, + SQ_VTX_SEMANTIC_0 = 0x00028380, + SQ_VTX_SEMANTIC_0_num = 32, +/* SEMANTIC_ID_mask = 0xff << 0, */ +/* SEMANTIC_ID_shift = 0, */ + VGT_MAX_VTX_INDX = 0x00028400, + VGT_MIN_VTX_INDX = 0x00028404, + VGT_INDX_OFFSET = 0x00028408, + VGT_MULTI_PRIM_IB_RESET_INDX = 0x0002840c, + SX_ALPHA_TEST_CONTROL = 0x00028410, + ALPHA_FUNC_mask = 0x07 << 0, + ALPHA_FUNC_shift = 0, + REF_NEVER = 0x00, + REF_LESS = 0x01, + REF_EQUAL = 0x02, + REF_LEQUAL = 0x03, + REF_GREATER = 0x04, + REF_NOTEQUAL = 0x05, + REF_GEQUAL = 0x06, + REF_ALWAYS = 0x07, + ALPHA_TEST_ENABLE_bit = 1 << 3, + ALPHA_TEST_BYPASS_bit = 1 << 8, + CB_BLEND_RED = 0x00028414, + CB_BLEND_GREEN = 0x00028418, + CB_BLEND_BLUE = 0x0002841c, + CB_BLEND_ALPHA = 0x00028420, + CB_FOG_RED = 0x00028424, + CB_FOG_GREEN = 0x00028428, + CB_FOG_BLUE = 0x0002842c, + DB_STENCILREFMASK = 0x00028430, + STENCILREF_mask = 0xff << 0, + STENCILREF_shift = 0, + STENCILMASK_mask = 0xff << 8, + STENCILMASK_shift = 8, + STENCILWRITEMASK_mask = 0xff << 16, + STENCILWRITEMASK_shift = 16, + DB_STENCILREFMASK_BF = 0x00028434, + STENCILREF_BF_mask = 0xff << 0, + STENCILREF_BF_shift = 0, + STENCILMASK_BF_mask = 0xff << 8, + STENCILMASK_BF_shift = 8, + STENCILWRITEMASK_BF_mask = 0xff << 16, + STENCILWRITEMASK_BF_shift = 16, + SX_ALPHA_REF = 0x00028438, + PA_CL_VPORT_XSCALE_0 = 0x0002843c, + PA_CL_VPORT_XSCALE_0_num = 16, + PA_CL_VPORT_XSCALE_0_offset = 24, + PA_CL_VPORT_XOFFSET_0 = 0x00028440, + PA_CL_VPORT_XOFFSET_0_num = 16, + PA_CL_VPORT_XOFFSET_0_offset = 24, + PA_CL_VPORT_YSCALE_0 = 0x00028444, + PA_CL_VPORT_YSCALE_0_num = 16, + PA_CL_VPORT_YSCALE_0_offset = 24, + PA_CL_VPORT_YOFFSET_0 = 0x00028448, + PA_CL_VPORT_YOFFSET_0_num = 16, + PA_CL_VPORT_YOFFSET_0_offset = 24, + PA_CL_VPORT_ZSCALE_0 = 0x0002844c, + PA_CL_VPORT_ZSCALE_0_num = 16, + PA_CL_VPORT_ZSCALE_0_offset = 24, + PA_CL_VPORT_ZOFFSET_0 = 0x00028450, + PA_CL_VPORT_ZOFFSET_0_num = 16, + PA_CL_VPORT_ZOFFSET_0_offset = 24, + SPI_VS_OUT_ID_0 = 0x00028614, + SPI_VS_OUT_ID_0_num = 10, + SEMANTIC_0_mask = 0xff << 0, + SEMANTIC_0_shift = 0, + SEMANTIC_1_mask = 0xff << 8, + SEMANTIC_1_shift = 8, + SEMANTIC_2_mask = 0xff << 16, + SEMANTIC_2_shift = 16, + SEMANTIC_3_mask = 0xff << 24, + SEMANTIC_3_shift = 24, + SPI_PS_INPUT_CNTL_0 = 0x00028644, + SPI_PS_INPUT_CNTL_0_num = 32, + SEMANTIC_mask = 0xff << 0, + SEMANTIC_shift = 0, + DEFAULT_VAL_mask = 0x03 << 8, + DEFAULT_VAL_shift = 8, + X_0_0F = 0x00, + FLAT_SHADE_bit = 1 << 10, + SEL_CENTROID_bit = 1 << 11, + SEL_LINEAR_bit = 1 << 12, + CYL_WRAP_mask = 0x0f << 13, + CYL_WRAP_shift = 13, + PT_SPRITE_TEX_bit = 1 << 17, + SEL_SAMPLE_bit = 1 << 18, + SPI_VS_OUT_CONFIG = 0x000286c4, + VS_PER_COMPONENT_bit = 1 << 0, + VS_EXPORT_COUNT_mask = 0x1f << 1, + VS_EXPORT_COUNT_shift = 1, + VS_EXPORTS_FOG_bit = 1 << 8, + VS_OUT_FOG_VEC_ADDR_mask = 0x1f << 9, + VS_OUT_FOG_VEC_ADDR_shift = 9, + SPI_PS_IN_CONTROL_0 = 0x000286cc, + NUM_INTERP_mask = 0x3f << 0, + NUM_INTERP_shift = 0, + POSITION_ENA_bit = 1 << 8, + POSITION_CENTROID_bit = 1 << 9, + POSITION_ADDR_mask = 0x1f << 10, + POSITION_ADDR_shift = 10, + PARAM_GEN_mask = 0x0f << 15, + PARAM_GEN_shift = 15, + PARAM_GEN_ADDR_mask = 0x7f << 19, + PARAM_GEN_ADDR_shift = 19, + BARYC_SAMPLE_CNTL_mask = 0x03 << 26, + BARYC_SAMPLE_CNTL_shift = 26, + CENTROIDS_ONLY = 0x00, + CENTERS_ONLY = 0x01, + CENTROIDS_AND_CENTERS = 0x02, + UNDEF = 0x03, + PERSP_GRADIENT_ENA_bit = 1 << 28, + LINEAR_GRADIENT_ENA_bit = 1 << 29, + POSITION_SAMPLE_bit = 1 << 30, + BARYC_AT_SAMPLE_ENA_bit = 1 << 31, + SPI_PS_IN_CONTROL_1 = 0x000286d0, + GEN_INDEX_PIX_bit = 1 << 0, + GEN_INDEX_PIX_ADDR_mask = 0x7f << 1, + GEN_INDEX_PIX_ADDR_shift = 1, + FRONT_FACE_ENA_bit = 1 << 8, + FRONT_FACE_CHAN_mask = 0x03 << 9, + FRONT_FACE_CHAN_shift = 9, + FRONT_FACE_ALL_BITS_bit = 1 << 11, + FRONT_FACE_ADDR_mask = 0x1f << 12, + FRONT_FACE_ADDR_shift = 12, + FOG_ADDR_mask = 0x7f << 17, + FOG_ADDR_shift = 17, + FIXED_PT_POSITION_ENA_bit = 1 << 24, + FIXED_PT_POSITION_ADDR_mask = 0x1f << 25, + FIXED_PT_POSITION_ADDR_shift = 25, + SPI_INTERP_CONTROL_0 = 0x000286d4, + FLAT_SHADE_ENA_bit = 1 << 0, + PNT_SPRITE_ENA_bit = 1 << 1, + PNT_SPRITE_OVRD_X_mask = 0x07 << 2, + PNT_SPRITE_OVRD_X_shift = 2, + SPI_PNT_SPRITE_SEL_0 = 0x00, + SPI_PNT_SPRITE_SEL_1 = 0x01, + SPI_PNT_SPRITE_SEL_S = 0x02, + SPI_PNT_SPRITE_SEL_T = 0x03, + SPI_PNT_SPRITE_SEL_NONE = 0x04, + PNT_SPRITE_OVRD_Y_mask = 0x07 << 5, + PNT_SPRITE_OVRD_Y_shift = 5, +/* SPI_PNT_SPRITE_SEL_0 = 0x00, */ +/* SPI_PNT_SPRITE_SEL_1 = 0x01, */ +/* SPI_PNT_SPRITE_SEL_S = 0x02, */ +/* SPI_PNT_SPRITE_SEL_T = 0x03, */ +/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */ + PNT_SPRITE_OVRD_Z_mask = 0x07 << 8, + PNT_SPRITE_OVRD_Z_shift = 8, +/* SPI_PNT_SPRITE_SEL_0 = 0x00, */ +/* SPI_PNT_SPRITE_SEL_1 = 0x01, */ +/* SPI_PNT_SPRITE_SEL_S = 0x02, */ +/* SPI_PNT_SPRITE_SEL_T = 0x03, */ +/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */ + PNT_SPRITE_OVRD_W_mask = 0x07 << 11, + PNT_SPRITE_OVRD_W_shift = 11, +/* SPI_PNT_SPRITE_SEL_0 = 0x00, */ +/* SPI_PNT_SPRITE_SEL_1 = 0x01, */ +/* SPI_PNT_SPRITE_SEL_S = 0x02, */ +/* SPI_PNT_SPRITE_SEL_T = 0x03, */ +/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */ + PNT_SPRITE_TOP_1_bit = 1 << 14, + SPI_INPUT_Z = 0x000286d8, + PROVIDE_Z_TO_SPI_bit = 1 << 0, + SPI_FOG_CNTL = 0x000286dc, + PASS_FOG_THROUGH_PS_bit = 1 << 0, + PIXEL_FOG_FUNC_mask = 0x03 << 1, + PIXEL_FOG_FUNC_shift = 1, + SPI_FOG_NONE = 0x00, + SPI_FOG_EXP = 0x01, + SPI_FOG_EXP2 = 0x02, + SPI_FOG_LINEAR = 0x03, + PIXEL_FOG_SRC_SEL_bit = 1 << 3, + VS_FOG_CLAMP_DISABLE_bit = 1 << 4, + SPI_FOG_FUNC_SCALE = 0x000286e0, + SPI_FOG_FUNC_BIAS = 0x000286e4, + CB_BLEND0_CONTROL = 0x00028780, + CB_BLEND0_CONTROL_num = 8, + COLOR_SRCBLEND_mask = 0x1f << 0, + COLOR_SRCBLEND_shift = 0, + COLOR_COMB_FCN_mask = 0x07 << 5, + COLOR_COMB_FCN_shift = 5, + COLOR_DESTBLEND_mask = 0x1f << 8, + COLOR_DESTBLEND_shift = 8, + OPACITY_WEIGHT_bit = 1 << 13, + ALPHA_SRCBLEND_mask = 0x1f << 16, + ALPHA_SRCBLEND_shift = 16, + ALPHA_COMB_FCN_mask = 0x07 << 21, + ALPHA_COMB_FCN_shift = 21, + ALPHA_DESTBLEND_mask = 0x1f << 24, + ALPHA_DESTBLEND_shift = 24, + SEPARATE_ALPHA_BLEND_bit = 1 << 29, + VGT_DMA_BASE_HI = 0x000287e4, + VGT_DMA_BASE_HI__BASE_ADDR_mask = 0xff << 0, + VGT_DMA_BASE_HI__BASE_ADDR_shift = 0, + VGT_DMA_BASE = 0x000287e8, + VGT_DRAW_INITIATOR = 0x000287f0, + SOURCE_SELECT_mask = 0x03 << 0, + SOURCE_SELECT_shift = 0, + DI_SRC_SEL_DMA = 0x00, + DI_SRC_SEL_IMMEDIATE = 0x01, + DI_SRC_SEL_AUTO_INDEX = 0x02, + DI_SRC_SEL_RESERVED = 0x03, + MAJOR_MODE_mask = 0x03 << 2, + MAJOR_MODE_shift = 2, + DI_MAJOR_MODE_0 = 0x00, + DI_MAJOR_MODE_1 = 0x01, + SPRITE_EN_bit = 1 << 4, + NOT_EOP_bit = 1 << 5, + USE_OPAQUE_bit = 1 << 6, + VGT_IMMED_DATA = 0x000287f4, + VGT_EVENT_ADDRESS_REG = 0x000287f8, + ADDRESS_LOW_mask = 0xfffffff << 0, + ADDRESS_LOW_shift = 0, + DB_DEPTH_CONTROL = 0x00028800, + STENCIL_ENABLE_bit = 1 << 0, + Z_ENABLE_bit = 1 << 1, + Z_WRITE_ENABLE_bit = 1 << 2, + ZFUNC_mask = 0x07 << 4, + ZFUNC_shift = 4, + FRAG_NEVER = 0x00, + FRAG_LESS = 0x01, + FRAG_EQUAL = 0x02, + FRAG_LEQUAL = 0x03, + FRAG_GREATER = 0x04, + FRAG_NOTEQUAL = 0x05, + FRAG_GEQUAL = 0x06, + FRAG_ALWAYS = 0x07, + BACKFACE_ENABLE_bit = 1 << 7, + STENCILFUNC_mask = 0x07 << 8, + STENCILFUNC_shift = 8, +/* REF_NEVER = 0x00, */ +/* REF_LESS = 0x01, */ +/* REF_EQUAL = 0x02, */ +/* REF_LEQUAL = 0x03, */ +/* REF_GREATER = 0x04, */ +/* REF_NOTEQUAL = 0x05, */ +/* REF_GEQUAL = 0x06, */ +/* REF_ALWAYS = 0x07, */ + STENCILFAIL_mask = 0x07 << 11, + STENCILFAIL_shift = 11, + STENCIL_KEEP = 0x00, + STENCIL_ZERO = 0x01, + STENCIL_REPLACE = 0x02, + STENCIL_INCR_CLAMP = 0x03, + STENCIL_DECR_CLAMP = 0x04, + STENCIL_INVERT = 0x05, + STENCIL_INCR_WRAP = 0x06, + STENCIL_DECR_WRAP = 0x07, + STENCILZPASS_mask = 0x07 << 14, + STENCILZPASS_shift = 14, +/* STENCIL_KEEP = 0x00, */ +/* STENCIL_ZERO = 0x01, */ +/* STENCIL_REPLACE = 0x02, */ +/* STENCIL_INCR_CLAMP = 0x03, */ +/* STENCIL_DECR_CLAMP = 0x04, */ +/* STENCIL_INVERT = 0x05, */ +/* STENCIL_INCR_WRAP = 0x06, */ +/* STENCIL_DECR_WRAP = 0x07, */ + STENCILZFAIL_mask = 0x07 << 17, + STENCILZFAIL_shift = 17, +/* STENCIL_KEEP = 0x00, */ +/* STENCIL_ZERO = 0x01, */ +/* STENCIL_REPLACE = 0x02, */ +/* STENCIL_INCR_CLAMP = 0x03, */ +/* STENCIL_DECR_CLAMP = 0x04, */ +/* STENCIL_INVERT = 0x05, */ +/* STENCIL_INCR_WRAP = 0x06, */ +/* STENCIL_DECR_WRAP = 0x07, */ + STENCILFUNC_BF_mask = 0x07 << 20, + STENCILFUNC_BF_shift = 20, +/* REF_NEVER = 0x00, */ +/* REF_LESS = 0x01, */ +/* REF_EQUAL = 0x02, */ +/* REF_LEQUAL = 0x03, */ +/* REF_GREATER = 0x04, */ +/* REF_NOTEQUAL = 0x05, */ +/* REF_GEQUAL = 0x06, */ +/* REF_ALWAYS = 0x07, */ + STENCILFAIL_BF_mask = 0x07 << 23, + STENCILFAIL_BF_shift = 23, +/* STENCIL_KEEP = 0x00, */ +/* STENCIL_ZERO = 0x01, */ +/* STENCIL_REPLACE = 0x02, */ +/* STENCIL_INCR_CLAMP = 0x03, */ +/* STENCIL_DECR_CLAMP = 0x04, */ +/* STENCIL_INVERT = 0x05, */ +/* STENCIL_INCR_WRAP = 0x06, */ +/* STENCIL_DECR_WRAP = 0x07, */ + STENCILZPASS_BF_mask = 0x07 << 26, + STENCILZPASS_BF_shift = 26, +/* STENCIL_KEEP = 0x00, */ +/* STENCIL_ZERO = 0x01, */ +/* STENCIL_REPLACE = 0x02, */ +/* STENCIL_INCR_CLAMP = 0x03, */ +/* STENCIL_DECR_CLAMP = 0x04, */ +/* STENCIL_INVERT = 0x05, */ +/* STENCIL_INCR_WRAP = 0x06, */ +/* STENCIL_DECR_WRAP = 0x07, */ + STENCILZFAIL_BF_mask = 0x07 << 29, + STENCILZFAIL_BF_shift = 29, +/* STENCIL_KEEP = 0x00, */ +/* STENCIL_ZERO = 0x01, */ +/* STENCIL_REPLACE = 0x02, */ +/* STENCIL_INCR_CLAMP = 0x03, */ +/* STENCIL_DECR_CLAMP = 0x04, */ +/* STENCIL_INVERT = 0x05, */ +/* STENCIL_INCR_WRAP = 0x06, */ +/* STENCIL_DECR_WRAP = 0x07, */ + CB_BLEND_CONTROL = 0x00028804, +/* COLOR_SRCBLEND_mask = 0x1f << 0, */ +/* COLOR_SRCBLEND_shift = 0, */ + BLEND_ZERO = 0x00, + BLEND_ONE = 0x01, + BLEND_SRC_COLOR = 0x02, + BLEND_ONE_MINUS_SRC_COLOR = 0x03, + BLEND_SRC_ALPHA = 0x04, + BLEND_ONE_MINUS_SRC_ALPHA = 0x05, + BLEND_DST_ALPHA = 0x06, + BLEND_ONE_MINUS_DST_ALPHA = 0x07, + BLEND_DST_COLOR = 0x08, + BLEND_ONE_MINUS_DST_COLOR = 0x09, + BLEND_SRC_ALPHA_SATURATE = 0x0a, + BLEND_BOTH_SRC_ALPHA = 0x0b, + BLEND_BOTH_INV_SRC_ALPHA = 0x0c, + BLEND_CONSTANT_COLOR = 0x0d, + BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, + BLEND_SRC1_COLOR = 0x0f, + BLEND_INV_SRC1_COLOR = 0x10, + BLEND_SRC1_ALPHA = 0x11, + BLEND_INV_SRC1_ALPHA = 0x12, + BLEND_CONSTANT_ALPHA = 0x13, + BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, +/* COLOR_COMB_FCN_mask = 0x07 << 5, */ +/* COLOR_COMB_FCN_shift = 5, */ + COMB_DST_PLUS_SRC = 0x00, + COMB_SRC_MINUS_DST = 0x01, + COMB_MIN_DST_SRC = 0x02, + COMB_MAX_DST_SRC = 0x03, + COMB_DST_MINUS_SRC = 0x04, +/* COLOR_DESTBLEND_mask = 0x1f << 8, */ +/* COLOR_DESTBLEND_shift = 8, */ +/* BLEND_ZERO = 0x00, */ +/* BLEND_ONE = 0x01, */ +/* BLEND_SRC_COLOR = 0x02, */ +/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */ +/* BLEND_SRC_ALPHA = 0x04, */ +/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */ +/* BLEND_DST_ALPHA = 0x06, */ +/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */ +/* BLEND_DST_COLOR = 0x08, */ +/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */ +/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */ +/* BLEND_BOTH_SRC_ALPHA = 0x0b, */ +/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */ +/* BLEND_CONSTANT_COLOR = 0x0d, */ +/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */ +/* BLEND_SRC1_COLOR = 0x0f, */ +/* BLEND_INV_SRC1_COLOR = 0x10, */ +/* BLEND_SRC1_ALPHA = 0x11, */ +/* BLEND_INV_SRC1_ALPHA = 0x12, */ +/* BLEND_CONSTANT_ALPHA = 0x13, */ +/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */ +/* OPACITY_WEIGHT_bit = 1 << 13, */ +/* ALPHA_SRCBLEND_mask = 0x1f << 16, */ +/* ALPHA_SRCBLEND_shift = 16, */ +/* BLEND_ZERO = 0x00, */ +/* BLEND_ONE = 0x01, */ +/* BLEND_SRC_COLOR = 0x02, */ +/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */ +/* BLEND_SRC_ALPHA = 0x04, */ +/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */ +/* BLEND_DST_ALPHA = 0x06, */ +/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */ +/* BLEND_DST_COLOR = 0x08, */ +/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */ +/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */ +/* BLEND_BOTH_SRC_ALPHA = 0x0b, */ +/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */ +/* BLEND_CONSTANT_COLOR = 0x0d, */ +/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */ +/* BLEND_SRC1_COLOR = 0x0f, */ +/* BLEND_INV_SRC1_COLOR = 0x10, */ +/* BLEND_SRC1_ALPHA = 0x11, */ +/* BLEND_INV_SRC1_ALPHA = 0x12, */ +/* BLEND_CONSTANT_ALPHA = 0x13, */ +/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */ +/* ALPHA_COMB_FCN_mask = 0x07 << 21, */ +/* ALPHA_COMB_FCN_shift = 21, */ +/* COMB_DST_PLUS_SRC = 0x00, */ +/* COMB_SRC_MINUS_DST = 0x01, */ +/* COMB_MIN_DST_SRC = 0x02, */ +/* COMB_MAX_DST_SRC = 0x03, */ +/* COMB_DST_MINUS_SRC = 0x04, */ +/* ALPHA_DESTBLEND_mask = 0x1f << 24, */ +/* ALPHA_DESTBLEND_shift = 24, */ +/* BLEND_ZERO = 0x00, */ +/* BLEND_ONE = 0x01, */ +/* BLEND_SRC_COLOR = 0x02, */ +/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */ +/* BLEND_SRC_ALPHA = 0x04, */ +/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */ +/* BLEND_DST_ALPHA = 0x06, */ +/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */ +/* BLEND_DST_COLOR = 0x08, */ +/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */ +/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */ +/* BLEND_BOTH_SRC_ALPHA = 0x0b, */ +/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */ +/* BLEND_CONSTANT_COLOR = 0x0d, */ +/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */ +/* BLEND_SRC1_COLOR = 0x0f, */ +/* BLEND_INV_SRC1_COLOR = 0x10, */ +/* BLEND_SRC1_ALPHA = 0x11, */ +/* BLEND_INV_SRC1_ALPHA = 0x12, */ +/* BLEND_CONSTANT_ALPHA = 0x13, */ +/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */ +/* SEPARATE_ALPHA_BLEND_bit = 1 << 29, */ + CB_COLOR_CONTROL = 0x00028808, + FOG_ENABLE_bit = 1 << 0, + MULTIWRITE_ENABLE_bit = 1 << 1, + DITHER_ENABLE_bit = 1 << 2, + DEGAMMA_ENABLE_bit = 1 << 3, + SPECIAL_OP_mask = 0x07 << 4, + SPECIAL_OP_shift = 4, + SPECIAL_NORMAL = 0x00, + SPECIAL_DISABLE = 0x01, + SPECIAL_FAST_CLEAR = 0x02, + SPECIAL_FORCE_CLEAR = 0x03, + SPECIAL_EXPAND_COLOR = 0x04, + SPECIAL_EXPAND_TEXTURE = 0x05, + SPECIAL_EXPAND_SAMPLES = 0x06, + SPECIAL_RESOLVE_BOX = 0x07, + PER_MRT_BLEND_bit = 1 << 7, + TARGET_BLEND_ENABLE_mask = 0xff << 8, + TARGET_BLEND_ENABLE_shift = 8, + ROP3_mask = 0xff << 16, + ROP3_shift = 16, + DB_SHADER_CONTROL = 0x0002880c, + Z_EXPORT_ENABLE_bit = 1 << 0, + STENCIL_REF_EXPORT_ENABLE_bit = 1 << 1, + Z_ORDER_mask = 0x03 << 4, + Z_ORDER_shift = 4, + LATE_Z = 0x00, + EARLY_Z_THEN_LATE_Z = 0x01, + RE_Z = 0x02, + EARLY_Z_THEN_RE_Z = 0x03, + KILL_ENABLE_bit = 1 << 6, + COVERAGE_TO_MASK_ENABLE_bit = 1 << 7, + MASK_EXPORT_ENABLE_bit = 1 << 8, + DUAL_EXPORT_ENABLE_bit = 1 << 9, + EXEC_ON_HIER_FAIL_bit = 1 << 10, + EXEC_ON_NOOP_bit = 1 << 11, + PA_CL_CLIP_CNTL = 0x00028810, + UCP_ENA_0_bit = 1 << 0, + UCP_ENA_1_bit = 1 << 1, + UCP_ENA_2_bit = 1 << 2, + UCP_ENA_3_bit = 1 << 3, + UCP_ENA_4_bit = 1 << 4, + UCP_ENA_5_bit = 1 << 5, + PS_UCP_Y_SCALE_NEG_bit = 1 << 13, + PS_UCP_MODE_mask = 0x03 << 14, + PS_UCP_MODE_shift = 14, + CLIP_DISABLE_bit = 1 << 16, + UCP_CULL_ONLY_ENA_bit = 1 << 17, + BOUNDARY_EDGE_FLAG_ENA_bit = 1 << 18, + DX_CLIP_SPACE_DEF_bit = 1 << 19, + DIS_CLIP_ERR_DETECT_bit = 1 << 20, + VTX_KILL_OR_bit = 1 << 21, + DX_LINEAR_ATTR_CLIP_ENA_bit = 1 << 24, + VTE_VPORT_PROVOKE_DISABLE_bit = 1 << 25, + ZCLIP_NEAR_DISABLE_bit = 1 << 26, + ZCLIP_FAR_DISABLE_bit = 1 << 27, + PA_SU_SC_MODE_CNTL = 0x00028814, + CULL_FRONT_bit = 1 << 0, + CULL_BACK_bit = 1 << 1, + FACE_bit = 1 << 2, + POLY_MODE_mask = 0x03 << 3, + POLY_MODE_shift = 3, + X_DISABLE_POLY_MODE = 0x00, + X_DUAL_MODE = 0x01, + POLYMODE_FRONT_PTYPE_mask = 0x07 << 5, + POLYMODE_FRONT_PTYPE_shift = 5, + X_DRAW_POINTS = 0x00, + X_DRAW_LINES = 0x01, + X_DRAW_TRIANGLES = 0x02, + POLYMODE_BACK_PTYPE_mask = 0x07 << 8, + POLYMODE_BACK_PTYPE_shift = 8, +/* X_DRAW_POINTS = 0x00, */ +/* X_DRAW_LINES = 0x01, */ +/* X_DRAW_TRIANGLES = 0x02, */ + POLY_OFFSET_FRONT_ENABLE_bit = 1 << 11, + POLY_OFFSET_BACK_ENABLE_bit = 1 << 12, + POLY_OFFSET_PARA_ENABLE_bit = 1 << 13, + VTX_WINDOW_OFFSET_ENABLE_bit = 1 << 16, + PROVOKING_VTX_LAST_bit = 1 << 19, + PERSP_CORR_DIS_bit = 1 << 20, + MULTI_PRIM_IB_ENA_bit = 1 << 21, + PA_CL_VTE_CNTL = 0x00028818, + VPORT_X_SCALE_ENA_bit = 1 << 0, + VPORT_X_OFFSET_ENA_bit = 1 << 1, + VPORT_Y_SCALE_ENA_bit = 1 << 2, + VPORT_Y_OFFSET_ENA_bit = 1 << 3, + VPORT_Z_SCALE_ENA_bit = 1 << 4, + VPORT_Z_OFFSET_ENA_bit = 1 << 5, + VTX_XY_FMT_bit = 1 << 8, + VTX_Z_FMT_bit = 1 << 9, + VTX_W0_FMT_bit = 1 << 10, + PERFCOUNTER_REF_bit = 1 << 11, + PA_CL_VS_OUT_CNTL = 0x0002881c, + CLIP_DIST_ENA_0_bit = 1 << 0, + CLIP_DIST_ENA_1_bit = 1 << 1, + CLIP_DIST_ENA_2_bit = 1 << 2, + CLIP_DIST_ENA_3_bit = 1 << 3, + CLIP_DIST_ENA_4_bit = 1 << 4, + CLIP_DIST_ENA_5_bit = 1 << 5, + CLIP_DIST_ENA_6_bit = 1 << 6, + CLIP_DIST_ENA_7_bit = 1 << 7, + CULL_DIST_ENA_0_bit = 1 << 8, + CULL_DIST_ENA_1_bit = 1 << 9, + CULL_DIST_ENA_2_bit = 1 << 10, + CULL_DIST_ENA_3_bit = 1 << 11, + CULL_DIST_ENA_4_bit = 1 << 12, + CULL_DIST_ENA_5_bit = 1 << 13, + CULL_DIST_ENA_6_bit = 1 << 14, + CULL_DIST_ENA_7_bit = 1 << 15, + USE_VTX_POINT_SIZE_bit = 1 << 16, + USE_VTX_EDGE_FLAG_bit = 1 << 17, + USE_VTX_RENDER_TARGET_INDX_bit = 1 << 18, + USE_VTX_VIEWPORT_INDX_bit = 1 << 19, + USE_VTX_KILL_FLAG_bit = 1 << 20, + VS_OUT_MISC_VEC_ENA_bit = 1 << 21, + VS_OUT_CCDIST0_VEC_ENA_bit = 1 << 22, + VS_OUT_CCDIST1_VEC_ENA_bit = 1 << 23, + PA_CL_NANINF_CNTL = 0x00028820, + VTE_XY_INF_DISCARD_bit = 1 << 0, + VTE_Z_INF_DISCARD_bit = 1 << 1, + VTE_W_INF_DISCARD_bit = 1 << 2, + VTE_0XNANINF_IS_0_bit = 1 << 3, + VTE_XY_NAN_RETAIN_bit = 1 << 4, + VTE_Z_NAN_RETAIN_bit = 1 << 5, + VTE_W_NAN_RETAIN_bit = 1 << 6, + VTE_W_RECIP_NAN_IS_0_bit = 1 << 7, + VS_XY_NAN_TO_INF_bit = 1 << 8, + VS_XY_INF_RETAIN_bit = 1 << 9, + VS_Z_NAN_TO_INF_bit = 1 << 10, + VS_Z_INF_RETAIN_bit = 1 << 11, + VS_W_NAN_TO_INF_bit = 1 << 12, + VS_W_INF_RETAIN_bit = 1 << 13, + VS_CLIP_DIST_INF_DISCARD_bit = 1 << 14, + VTE_NO_OUTPUT_NEG_0_bit = 1 << 20, + SQ_PGM_START_PS = 0x00028840, + SQ_PGM_RESOURCES_PS = 0x00028850, + NUM_GPRS_mask = 0xff << 0, + NUM_GPRS_shift = 0, + STACK_SIZE_mask = 0xff << 8, + STACK_SIZE_shift = 8, + SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit = 1 << 21, + FETCH_CACHE_LINES_mask = 0x07 << 24, + FETCH_CACHE_LINES_shift = 24, + UNCACHED_FIRST_INST_bit = 1 << 28, + CLAMP_CONSTS_bit = 1 << 31, + SQ_PGM_EXPORTS_PS = 0x00028854, + EXPORT_MODE_mask = 0x1f << 0, + EXPORT_MODE_shift = 0, + SQ_PGM_START_VS = 0x00028858, + SQ_PGM_RESOURCES_VS = 0x00028868, +/* NUM_GPRS_mask = 0xff << 0, */ +/* NUM_GPRS_shift = 0, */ +/* STACK_SIZE_mask = 0xff << 8, */ +/* STACK_SIZE_shift = 8, */ + SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit = 1 << 21, +/* FETCH_CACHE_LINES_mask = 0x07 << 24, */ +/* FETCH_CACHE_LINES_shift = 24, */ +/* UNCACHED_FIRST_INST_bit = 1 << 28, */ + SQ_PGM_START_GS = 0x0002886c, + SQ_PGM_RESOURCES_GS = 0x0002887c, +/* NUM_GPRS_mask = 0xff << 0, */ +/* NUM_GPRS_shift = 0, */ +/* STACK_SIZE_mask = 0xff << 8, */ +/* STACK_SIZE_shift = 8, */ + SQ_PGM_RESOURCES_GS__DX10_CLAMP_bit = 1 << 21, +/* FETCH_CACHE_LINES_mask = 0x07 << 24, */ +/* FETCH_CACHE_LINES_shift = 24, */ +/* UNCACHED_FIRST_INST_bit = 1 << 28, */ + SQ_PGM_START_ES = 0x00028880, + SQ_PGM_RESOURCES_ES = 0x00028890, +/* NUM_GPRS_mask = 0xff << 0, */ +/* NUM_GPRS_shift = 0, */ +/* STACK_SIZE_mask = 0xff << 8, */ +/* STACK_SIZE_shift = 8, */ + SQ_PGM_RESOURCES_ES__DX10_CLAMP_bit = 1 << 21, +/* FETCH_CACHE_LINES_mask = 0x07 << 24, */ +/* FETCH_CACHE_LINES_shift = 24, */ +/* UNCACHED_FIRST_INST_bit = 1 << 28, */ + SQ_PGM_START_FS = 0x00028894, + SQ_PGM_RESOURCES_FS = 0x000288a4, +/* NUM_GPRS_mask = 0xff << 0, */ +/* NUM_GPRS_shift = 0, */ +/* STACK_SIZE_mask = 0xff << 8, */ +/* STACK_SIZE_shift = 8, */ + SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit = 1 << 21, + SQ_ESGS_RING_ITEMSIZE = 0x000288a8, + ITEMSIZE_mask = 0x7fff << 0, + ITEMSIZE_shift = 0, + SQ_GSVS_RING_ITEMSIZE = 0x000288ac, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_ESTMP_RING_ITEMSIZE = 0x000288b0, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_GSTMP_RING_ITEMSIZE = 0x000288b4, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_VSTMP_RING_ITEMSIZE = 0x000288b8, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_PSTMP_RING_ITEMSIZE = 0x000288bc, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_FBUF_RING_ITEMSIZE = 0x000288c0, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_REDUC_RING_ITEMSIZE = 0x000288c4, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_GS_VERT_ITEMSIZE = 0x000288c8, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_PGM_CF_OFFSET_PS = 0x000288cc, + PGM_CF_OFFSET_mask = 0xfffff << 0, + PGM_CF_OFFSET_shift = 0, + SQ_PGM_CF_OFFSET_VS = 0x000288d0, +/* PGM_CF_OFFSET_mask = 0xfffff << 0, */ +/* PGM_CF_OFFSET_shift = 0, */ + SQ_PGM_CF_OFFSET_GS = 0x000288d4, +/* PGM_CF_OFFSET_mask = 0xfffff << 0, */ +/* PGM_CF_OFFSET_shift = 0, */ + SQ_PGM_CF_OFFSET_ES = 0x000288d8, +/* PGM_CF_OFFSET_mask = 0xfffff << 0, */ +/* PGM_CF_OFFSET_shift = 0, */ + SQ_PGM_CF_OFFSET_FS = 0x000288dc, +/* PGM_CF_OFFSET_mask = 0xfffff << 0, */ +/* PGM_CF_OFFSET_shift = 0, */ + SQ_VTX_SEMANTIC_CLEAR = 0x000288e0, + SQ_ALU_CONST_CACHE_PS_0 = 0x00028940, + SQ_ALU_CONST_CACHE_PS_0_num = 16, + SQ_ALU_CONST_CACHE_VS_0 = 0x00028980, + SQ_ALU_CONST_CACHE_VS_0_num = 16, + SQ_ALU_CONST_CACHE_GS_0 = 0x000289c0, + SQ_ALU_CONST_CACHE_GS_0_num = 16, + PA_SU_POINT_SIZE = 0x00028a00, + PA_SU_POINT_SIZE__HEIGHT_mask = 0xffff << 0, + PA_SU_POINT_SIZE__HEIGHT_shift = 0, + PA_SU_POINT_SIZE__WIDTH_mask = 0xffff << 16, + PA_SU_POINT_SIZE__WIDTH_shift = 16, + PA_SU_POINT_MINMAX = 0x00028a04, + MIN_SIZE_mask = 0xffff << 0, + MIN_SIZE_shift = 0, + MAX_SIZE_mask = 0xffff << 16, + MAX_SIZE_shift = 16, + PA_SU_LINE_CNTL = 0x00028a08, + PA_SU_LINE_CNTL__WIDTH_mask = 0xffff << 0, + PA_SU_LINE_CNTL__WIDTH_shift = 0, + PA_SC_LINE_STIPPLE = 0x00028a0c, + LINE_PATTERN_mask = 0xffff << 0, + LINE_PATTERN_shift = 0, + REPEAT_COUNT_mask = 0xff << 16, + REPEAT_COUNT_shift = 16, + PATTERN_BIT_ORDER_bit = 1 << 28, + AUTO_RESET_CNTL_mask = 0x03 << 29, + AUTO_RESET_CNTL_shift = 29, + VGT_OUTPUT_PATH_CNTL = 0x00028a10, + PATH_SELECT_mask = 0x03 << 0, + PATH_SELECT_shift = 0, + VGT_OUTPATH_VTX_REUSE = 0x00, + VGT_OUTPATH_TESS_EN = 0x01, + VGT_OUTPATH_PASSTHRU = 0x02, + VGT_OUTPATH_GS_BLOCK = 0x03, + VGT_HOS_CNTL = 0x00028a14, + TESS_MODE_mask = 0x03 << 0, + TESS_MODE_shift = 0, + VGT_HOS_MAX_TESS_LEVEL = 0x00028a18, + VGT_HOS_MIN_TESS_LEVEL = 0x00028a1c, + VGT_HOS_REUSE_DEPTH = 0x00028a20, + REUSE_DEPTH_mask = 0xff << 0, + REUSE_DEPTH_shift = 0, + VGT_GROUP_PRIM_TYPE = 0x00028a24, + VGT_GROUP_PRIM_TYPE__PRIM_TYPE_mask = 0x1f << 0, + VGT_GROUP_PRIM_TYPE__PRIM_TYPE_shift = 0, + VGT_GRP_3D_POINT = 0x00, + VGT_GRP_3D_LINE = 0x01, + VGT_GRP_3D_TRI = 0x02, + VGT_GRP_3D_RECT = 0x03, + VGT_GRP_3D_QUAD = 0x04, + VGT_GRP_2D_COPY_RECT_V0 = 0x05, + VGT_GRP_2D_COPY_RECT_V1 = 0x06, + VGT_GRP_2D_COPY_RECT_V2 = 0x07, + VGT_GRP_2D_COPY_RECT_V3 = 0x08, + VGT_GRP_2D_FILL_RECT = 0x09, + VGT_GRP_2D_LINE = 0x0a, + VGT_GRP_2D_TRI = 0x0b, + VGT_GRP_PRIM_INDEX_LINE = 0x0c, + VGT_GRP_PRIM_INDEX_TRI = 0x0d, + VGT_GRP_PRIM_INDEX_QUAD = 0x0e, + VGT_GRP_3D_LINE_ADJ = 0x0f, + VGT_GRP_3D_TRI_ADJ = 0x10, + RETAIN_ORDER_bit = 1 << 14, + RETAIN_QUADS_bit = 1 << 15, + PRIM_ORDER_mask = 0x07 << 16, + PRIM_ORDER_shift = 16, + VGT_GRP_LIST = 0x00, + VGT_GRP_STRIP = 0x01, + VGT_GRP_FAN = 0x02, + VGT_GRP_LOOP = 0x03, + VGT_GRP_POLYGON = 0x04, + VGT_GROUP_FIRST_DECR = 0x00028a28, + FIRST_DECR_mask = 0x0f << 0, + FIRST_DECR_shift = 0, + VGT_GROUP_DECR = 0x00028a2c, + DECR_mask = 0x0f << 0, + DECR_shift = 0, + VGT_GROUP_VECT_0_CNTL = 0x00028a30, + COMP_X_EN_bit = 1 << 0, + COMP_Y_EN_bit = 1 << 1, + COMP_Z_EN_bit = 1 << 2, + COMP_W_EN_bit = 1 << 3, + VGT_GROUP_VECT_0_CNTL__STRIDE_mask = 0xff << 8, + VGT_GROUP_VECT_0_CNTL__STRIDE_shift = 8, + SHIFT_mask = 0xff << 16, + SHIFT_shift = 16, + VGT_GROUP_VECT_1_CNTL = 0x00028a34, +/* COMP_X_EN_bit = 1 << 0, */ +/* COMP_Y_EN_bit = 1 << 1, */ +/* COMP_Z_EN_bit = 1 << 2, */ +/* COMP_W_EN_bit = 1 << 3, */ + VGT_GROUP_VECT_1_CNTL__STRIDE_mask = 0xff << 8, + VGT_GROUP_VECT_1_CNTL__STRIDE_shift = 8, +/* SHIFT_mask = 0xff << 16, */ +/* SHIFT_shift = 16, */ + VGT_GROUP_VECT_0_FMT_CNTL = 0x00028a38, + X_CONV_mask = 0x0f << 0, + X_CONV_shift = 0, + VGT_GRP_INDEX_16 = 0x00, + VGT_GRP_INDEX_32 = 0x01, + VGT_GRP_UINT_16 = 0x02, + VGT_GRP_UINT_32 = 0x03, + VGT_GRP_SINT_16 = 0x04, + VGT_GRP_SINT_32 = 0x05, + VGT_GRP_FLOAT_32 = 0x06, + VGT_GRP_AUTO_PRIM = 0x07, + VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, + X_OFFSET_mask = 0x0f << 4, + X_OFFSET_shift = 4, + Y_CONV_mask = 0x0f << 8, + Y_CONV_shift = 8, +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ + Y_OFFSET_mask = 0x0f << 12, + Y_OFFSET_shift = 12, + Z_CONV_mask = 0x0f << 16, + Z_CONV_shift = 16, +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ + Z_OFFSET_mask = 0x0f << 20, + Z_OFFSET_shift = 20, + W_CONV_mask = 0x0f << 24, + W_CONV_shift = 24, +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ + W_OFFSET_mask = 0x0f << 28, + W_OFFSET_shift = 28, + VGT_GROUP_VECT_1_FMT_CNTL = 0x00028a3c, +/* X_CONV_mask = 0x0f << 0, */ +/* X_CONV_shift = 0, */ +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ +/* X_OFFSET_mask = 0x0f << 4, */ +/* X_OFFSET_shift = 4, */ +/* Y_CONV_mask = 0x0f << 8, */ +/* Y_CONV_shift = 8, */ +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ +/* Y_OFFSET_mask = 0x0f << 12, */ +/* Y_OFFSET_shift = 12, */ +/* Z_CONV_mask = 0x0f << 16, */ +/* Z_CONV_shift = 16, */ +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ +/* Z_OFFSET_mask = 0x0f << 20, */ +/* Z_OFFSET_shift = 20, */ +/* W_CONV_mask = 0x0f << 24, */ +/* W_CONV_shift = 24, */ +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ +/* W_OFFSET_mask = 0x0f << 28, */ +/* W_OFFSET_shift = 28, */ + VGT_GS_MODE = 0x00028a40, + MODE_mask = 0x03 << 0, + MODE_shift = 0, + GS_OFF = 0x00, + GS_SCENARIO_A = 0x01, + GS_SCENARIO_B = 0x02, + GS_SCENARIO_G = 0x03, + ES_PASSTHRU_bit = 1 << 2, + CUT_MODE_mask = 0x03 << 3, + CUT_MODE_shift = 3, + GS_CUT_1024 = 0x00, + GS_CUT_512 = 0x01, + GS_CUT_256 = 0x02, + GS_CUT_128 = 0x03, + PA_SC_MPASS_PS_CNTL = 0x00028a48, + MPASS_PIX_VEC_PER_PASS_mask = 0xfffff << 0, + MPASS_PIX_VEC_PER_PASS_shift = 0, + MPASS_PS_ENA_bit = 1 << 31, + PA_SC_MODE_CNTL = 0x00028a4c, + MSAA_ENABLE_bit = 1 << 0, + CLIPRECT_ENABLE_bit = 1 << 1, + LINE_STIPPLE_ENABLE_bit = 1 << 2, + MULTI_CHIP_PRIM_DISCARD_ENAB_bit = 1 << 3, + WALK_ORDER_ENABLE_bit = 1 << 4, + HALVE_DETAIL_SAMPLE_PERF_bit = 1 << 5, + WALK_SIZE_bit = 1 << 6, + WALK_ALIGNMENT_bit = 1 << 7, + WALK_ALIGN8_PRIM_FITS_ST_bit = 1 << 8, + TILE_COVER_NO_SCISSOR_bit = 1 << 9, + KILL_PIX_POST_HI_Z_bit = 1 << 10, + KILL_PIX_POST_DETAIL_MASK_bit = 1 << 11, + MULTI_CHIP_SUPERTILE_ENABLE_bit = 1 << 12, + TILE_COVER_DISABLE_bit = 1 << 13, + FORCE_EOV_CNTDWN_ENABLE_bit = 1 << 14, + FORCE_EOV_TILE_ENABLE_bit = 1 << 15, + FORCE_EOV_REZ_ENABLE_bit = 1 << 16, + PS_ITER_SAMPLE_bit = 1 << 17, + VGT_ENHANCE = 0x00028a50, + VGT_ENHANCE__MI_TIMESTAMP_RES_mask = 0x03 << 0, + VGT_ENHANCE__MI_TIMESTAMP_RES_shift = 0, + X_0_992_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_32 = 0x00, + X_0_496_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_16 = 0x01, + X_0_248_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_8 = 0x02, + X_0_124_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_4 = 0x03, + MISC_mask = 0x3fffffff << 2, + MISC_shift = 2, + VGT_GS_OUT_PRIM_TYPE = 0x00028a6c, + OUTPRIM_TYPE_mask = 0x3f << 0, + OUTPRIM_TYPE_shift = 0, + POINTLIST = 0x00, + LINESTRIP = 0x01, + TRISTRIP = 0x02, + VGT_DMA_SIZE = 0x00028a74, + VGT_DMA_INDEX_TYPE = 0x00028a7c, +/* INDEX_TYPE_mask = 0x03 << 0, */ +/* INDEX_TYPE_shift = 0, */ + VGT_INDEX_16 = 0x00, + VGT_INDEX_32 = 0x01, + SWAP_MODE_mask = 0x03 << 2, + SWAP_MODE_shift = 2, + VGT_DMA_SWAP_NONE = 0x00, + VGT_DMA_SWAP_16_BIT = 0x01, + VGT_DMA_SWAP_32_BIT = 0x02, + VGT_DMA_SWAP_WORD = 0x03, + VGT_PRIMITIVEID_EN = 0x00028a84, + PRIMITIVEID_EN_bit = 1 << 0, + VGT_DMA_NUM_INSTANCES = 0x00028a88, + VGT_EVENT_INITIATOR = 0x00028a90, + EVENT_TYPE_mask = 0x3f << 0, + EVENT_TYPE_shift = 0, + CACHE_FLUSH_TS = 0x04, + CONTEXT_DONE = 0x05, + CACHE_FLUSH = 0x06, + VIZQUERY_START = 0x07, + VIZQUERY_END = 0x08, + SC_WAIT_WC = 0x09, + MPASS_PS_CP_REFETCH = 0x0a, + MPASS_PS_RST_START = 0x0b, + MPASS_PS_INCR_START = 0x0c, + RST_PIX_CNT = 0x0d, + RST_VTX_CNT = 0x0e, + VS_PARTIAL_FLUSH = 0x0f, + PS_PARTIAL_FLUSH = 0x10, + CACHE_FLUSH_AND_INV_TS_EVENT = 0x14, + ZPASS_DONE = 0x15, + CACHE_FLUSH_AND_INV_EVENT = 0x16, + PERFCOUNTER_START = 0x17, + PERFCOUNTER_STOP = 0x18, + PIPELINESTAT_START = 0x19, + PIPELINESTAT_STOP = 0x1a, + PERFCOUNTER_SAMPLE = 0x1b, + FLUSH_ES_OUTPUT = 0x1c, + FLUSH_GS_OUTPUT = 0x1d, + SAMPLE_PIPELINESTAT = 0x1e, + SO_VGTSTREAMOUT_FLUSH = 0x1f, + SAMPLE_STREAMOUTSTATS = 0x20, + RESET_VTX_CNT = 0x21, + BLOCK_CONTEXT_DONE = 0x22, + CR_CONTEXT_DONE = 0x23, + VGT_FLUSH = 0x24, + CR_DONE_TS = 0x25, + SQ_NON_EVENT = 0x26, + SC_SEND_DB_VPZ = 0x27, + BOTTOM_OF_PIPE_TS = 0x28, + DB_CACHE_FLUSH_AND_INV = 0x2a, + ADDRESS_HI_mask = 0xff << 19, + ADDRESS_HI_shift = 19, + EXTENDED_EVENT_bit = 1 << 27, + VGT_MULTI_PRIM_IB_RESET_EN = 0x00028a94, + RESET_EN_bit = 1 << 0, + VGT_INSTANCE_STEP_RATE_0 = 0x00028aa0, + VGT_INSTANCE_STEP_RATE_1 = 0x00028aa4, + VGT_STRMOUT_EN = 0x00028ab0, + STREAMOUT_bit = 1 << 0, + VGT_REUSE_OFF = 0x00028ab4, + REUSE_OFF_bit = 1 << 0, + VGT_VTX_CNT_EN = 0x00028ab8, + VTX_CNT_EN_bit = 1 << 0, + VGT_STRMOUT_BUFFER_SIZE_0 = 0x00028ad0, + VGT_STRMOUT_VTX_STRIDE_0 = 0x00028ad4, + VGT_STRMOUT_VTX_STRIDE_0__STRIDE_mask = 0x3ff << 0, + VGT_STRMOUT_VTX_STRIDE_0__STRIDE_shift = 0, + VGT_STRMOUT_BUFFER_BASE_0 = 0x00028ad8, + VGT_STRMOUT_BUFFER_OFFSET_0 = 0x00028adc, + VGT_STRMOUT_BUFFER_SIZE_1 = 0x00028ae0, + VGT_STRMOUT_VTX_STRIDE_1 = 0x00028ae4, + VGT_STRMOUT_VTX_STRIDE_1__STRIDE_mask = 0x3ff << 0, + VGT_STRMOUT_VTX_STRIDE_1__STRIDE_shift = 0, + VGT_STRMOUT_BUFFER_BASE_1 = 0x00028ae8, + VGT_STRMOUT_BUFFER_OFFSET_1 = 0x00028aec, + VGT_STRMOUT_BUFFER_SIZE_2 = 0x00028af0, + VGT_STRMOUT_VTX_STRIDE_2 = 0x00028af4, + VGT_STRMOUT_VTX_STRIDE_2__STRIDE_mask = 0x3ff << 0, + VGT_STRMOUT_VTX_STRIDE_2__STRIDE_shift = 0, + VGT_STRMOUT_BUFFER_BASE_2 = 0x00028af8, + VGT_STRMOUT_BUFFER_OFFSET_2 = 0x00028afc, + VGT_STRMOUT_BUFFER_SIZE_3 = 0x00028b00, + VGT_STRMOUT_VTX_STRIDE_3 = 0x00028b04, + VGT_STRMOUT_VTX_STRIDE_3__STRIDE_mask = 0x3ff << 0, + VGT_STRMOUT_VTX_STRIDE_3__STRIDE_shift = 0, + VGT_STRMOUT_BUFFER_BASE_3 = 0x00028b08, + VGT_STRMOUT_BUFFER_OFFSET_3 = 0x00028b0c, + VGT_STRMOUT_BASE_OFFSET_0 = 0x00028b10, + VGT_STRMOUT_BASE_OFFSET_1 = 0x00028b14, + VGT_STRMOUT_BASE_OFFSET_2 = 0x00028b18, + VGT_STRMOUT_BASE_OFFSET_3 = 0x00028b1c, + VGT_STRMOUT_BUFFER_EN = 0x00028b20, + BUFFER_0_EN_bit = 1 << 0, + BUFFER_1_EN_bit = 1 << 1, + BUFFER_2_EN_bit = 1 << 2, + BUFFER_3_EN_bit = 1 << 3, + VGT_STRMOUT_DRAW_OPAQUE_OFFSET = 0x00028b28, + VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE = 0x00028b2c, + VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE = 0x00028b30, + VGT_STRMOUT_BASE_OFFSET_HI_0 = 0x00028b44, + VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_mask = 0x3f << 0, + VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_shift = 0, + VGT_STRMOUT_BASE_OFFSET_HI_1 = 0x00028b48, + VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_mask = 0x3f << 0, + VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_shift = 0, + VGT_STRMOUT_BASE_OFFSET_HI_2 = 0x00028b4c, + VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_mask = 0x3f << 0, + VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_shift = 0, + VGT_STRMOUT_BASE_OFFSET_HI_3 = 0x00028b50, + VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_mask = 0x3f << 0, + VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_shift = 0, + PA_SC_LINE_CNTL = 0x00028c00, + BRES_CNTL_mask = 0xff << 0, + BRES_CNTL_shift = 0, + USE_BRES_CNTL_bit = 1 << 8, + EXPAND_LINE_WIDTH_bit = 1 << 9, + LAST_PIXEL_bit = 1 << 10, + PA_SC_AA_CONFIG = 0x00028c04, + MSAA_NUM_SAMPLES_mask = 0x03 << 0, + MSAA_NUM_SAMPLES_shift = 0, + AA_MASK_CENTROID_DTMN_bit = 1 << 4, + MAX_SAMPLE_DIST_mask = 0x0f << 13, + MAX_SAMPLE_DIST_shift = 13, + PA_SU_VTX_CNTL = 0x00028c08, + PIX_CENTER_bit = 1 << 0, + PA_SU_VTX_CNTL__ROUND_MODE_mask = 0x03 << 1, + PA_SU_VTX_CNTL__ROUND_MODE_shift = 1, + X_TRUNCATE = 0x00, + X_ROUND = 0x01, + X_ROUND_TO_EVEN = 0x02, + X_ROUND_TO_ODD = 0x03, + QUANT_MODE_mask = 0x07 << 3, + QUANT_MODE_shift = 3, + X_1_16TH = 0x00, + X_1_8TH = 0x01, + X_1_4TH = 0x02, + X_1_2 = 0x03, + X_1 = 0x04, + X_1_256TH = 0x05, + PA_CL_GB_VERT_CLIP_ADJ = 0x00028c0c, + PA_CL_GB_VERT_DISC_ADJ = 0x00028c10, + PA_CL_GB_HORZ_CLIP_ADJ = 0x00028c14, + PA_CL_GB_HORZ_DISC_ADJ = 0x00028c18, + PA_SC_AA_SAMPLE_LOCS_MCTX = 0x00028c1c, +/* S0_X_mask = 0x0f << 0, */ +/* S0_X_shift = 0, */ +/* S0_Y_mask = 0x0f << 4, */ +/* S0_Y_shift = 4, */ +/* S1_X_mask = 0x0f << 8, */ +/* S1_X_shift = 8, */ +/* S1_Y_mask = 0x0f << 12, */ +/* S1_Y_shift = 12, */ +/* S2_X_mask = 0x0f << 16, */ +/* S2_X_shift = 16, */ +/* S2_Y_mask = 0x0f << 20, */ +/* S2_Y_shift = 20, */ +/* S3_X_mask = 0x0f << 24, */ +/* S3_X_shift = 24, */ +/* S3_Y_mask = 0x0f << 28, */ +/* S3_Y_shift = 28, */ + PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX = 0x00028c20, +/* S4_X_mask = 0x0f << 0, */ +/* S4_X_shift = 0, */ +/* S4_Y_mask = 0x0f << 4, */ +/* S4_Y_shift = 4, */ +/* S5_X_mask = 0x0f << 8, */ +/* S5_X_shift = 8, */ +/* S5_Y_mask = 0x0f << 12, */ +/* S5_Y_shift = 12, */ +/* S6_X_mask = 0x0f << 16, */ +/* S6_X_shift = 16, */ +/* S6_Y_mask = 0x0f << 20, */ +/* S6_Y_shift = 20, */ +/* S7_X_mask = 0x0f << 24, */ +/* S7_X_shift = 24, */ +/* S7_Y_mask = 0x0f << 28, */ +/* S7_Y_shift = 28, */ + CB_CLRCMP_CONTROL = 0x00028c30, + CLRCMP_FCN_SRC_mask = 0x07 << 0, + CLRCMP_FCN_SRC_shift = 0, + CLRCMP_DRAW_ALWAYS = 0x00, + CLRCMP_DRAW_NEVER = 0x01, + CLRCMP_DRAW_ON_NEQ = 0x04, + CLRCMP_DRAW_ON_EQ = 0x05, + CLRCMP_FCN_DST_mask = 0x07 << 8, + CLRCMP_FCN_DST_shift = 8, +/* CLRCMP_DRAW_ALWAYS = 0x00, */ +/* CLRCMP_DRAW_NEVER = 0x01, */ +/* CLRCMP_DRAW_ON_NEQ = 0x04, */ +/* CLRCMP_DRAW_ON_EQ = 0x05, */ + CLRCMP_FCN_SEL_mask = 0x03 << 24, + CLRCMP_FCN_SEL_shift = 24, + CLRCMP_SEL_DST = 0x00, + CLRCMP_SEL_SRC = 0x01, + CLRCMP_SEL_AND = 0x02, + CB_CLRCMP_SRC = 0x00028c34, + CB_CLRCMP_DST = 0x00028c38, + CB_CLRCMP_MSK = 0x00028c3c, + PA_SC_AA_MASK = 0x00028c48, + VGT_VERTEX_REUSE_BLOCK_CNTL = 0x00028c58, + VTX_REUSE_DEPTH_mask = 0xff << 0, + VTX_REUSE_DEPTH_shift = 0, + VGT_OUT_DEALLOC_CNTL = 0x00028c5c, + DEALLOC_DIST_mask = 0x7f << 0, + DEALLOC_DIST_shift = 0, + DB_RENDER_CONTROL = 0x00028d0c, + DEPTH_CLEAR_ENABLE_bit = 1 << 0, + STENCIL_CLEAR_ENABLE_bit = 1 << 1, + DEPTH_COPY_bit = 1 << 2, + STENCIL_COPY_bit = 1 << 3, + RESUMMARIZE_ENABLE_bit = 1 << 4, + STENCIL_COMPRESS_DISABLE_bit = 1 << 5, + DEPTH_COMPRESS_DISABLE_bit = 1 << 6, + COPY_CENTROID_bit = 1 << 7, + COPY_SAMPLE_mask = 0x07 << 8, + COPY_SAMPLE_shift = 8, + ZPASS_INCREMENT_DISABLE_bit = 1 << 11, + DB_RENDER_OVERRIDE = 0x00028d10, + FORCE_HIZ_ENABLE_mask = 0x03 << 0, + FORCE_HIZ_ENABLE_shift = 0, + FORCE_OFF = 0x00, + FORCE_ENABLE = 0x01, + FORCE_DISABLE = 0x02, + FORCE_RESERVED = 0x03, + FORCE_HIS_ENABLE0_mask = 0x03 << 2, + FORCE_HIS_ENABLE0_shift = 2, +/* FORCE_OFF = 0x00, */ +/* FORCE_ENABLE = 0x01, */ +/* FORCE_DISABLE = 0x02, */ +/* FORCE_RESERVED = 0x03, */ + FORCE_HIS_ENABLE1_mask = 0x03 << 4, + FORCE_HIS_ENABLE1_shift = 4, +/* FORCE_OFF = 0x00, */ +/* FORCE_ENABLE = 0x01, */ +/* FORCE_DISABLE = 0x02, */ +/* FORCE_RESERVED = 0x03, */ + FORCE_SHADER_Z_ORDER_bit = 1 << 6, + FAST_Z_DISABLE_bit = 1 << 7, + FAST_STENCIL_DISABLE_bit = 1 << 8, + NOOP_CULL_DISABLE_bit = 1 << 9, + FORCE_COLOR_KILL_bit = 1 << 10, + FORCE_Z_READ_bit = 1 << 11, + FORCE_STENCIL_READ_bit = 1 << 12, + FORCE_FULL_Z_RANGE_mask = 0x03 << 13, + FORCE_FULL_Z_RANGE_shift = 13, +/* FORCE_OFF = 0x00, */ +/* FORCE_ENABLE = 0x01, */ +/* FORCE_DISABLE = 0x02, */ +/* FORCE_RESERVED = 0x03, */ + FORCE_QC_SMASK_CONFLICT_bit = 1 << 15, + DISABLE_VIEWPORT_CLAMP_bit = 1 << 16, + IGNORE_SC_ZRANGE_bit = 1 << 17, + DB_HTILE_SURFACE = 0x00028d24, + HTILE_WIDTH_bit = 1 << 0, + HTILE_HEIGHT_bit = 1 << 1, + LINEAR_bit = 1 << 2, + FULL_CACHE_bit = 1 << 3, + HTILE_USES_PRELOAD_WIN_bit = 1 << 4, + PRELOAD_bit = 1 << 5, + PREFETCH_WIDTH_mask = 0x3f << 6, + PREFETCH_WIDTH_shift = 6, + PREFETCH_HEIGHT_mask = 0x3f << 12, + PREFETCH_HEIGHT_shift = 12, + DB_SRESULTS_COMPARE_STATE1 = 0x00028d2c, + COMPAREFUNC1_mask = 0x07 << 0, + COMPAREFUNC1_shift = 0, +/* REF_NEVER = 0x00, */ +/* REF_LESS = 0x01, */ +/* REF_EQUAL = 0x02, */ +/* REF_LEQUAL = 0x03, */ +/* REF_GREATER = 0x04, */ +/* REF_NOTEQUAL = 0x05, */ +/* REF_GEQUAL = 0x06, */ +/* REF_ALWAYS = 0x07, */ + COMPAREVALUE1_mask = 0xff << 4, + COMPAREVALUE1_shift = 4, + COMPAREMASK1_mask = 0xff << 12, + COMPAREMASK1_shift = 12, + ENABLE1_bit = 1 << 24, + DB_PRELOAD_CONTROL = 0x00028d30, + START_X_mask = 0xff << 0, + START_X_shift = 0, + START_Y_mask = 0xff << 8, + START_Y_shift = 8, + MAX_X_mask = 0xff << 16, + MAX_X_shift = 16, + MAX_Y_mask = 0xff << 24, + MAX_Y_shift = 24, + DB_PREFETCH_LIMIT = 0x00028d34, + DEPTH_HEIGHT_TILE_MAX_mask = 0x3ff << 0, + DEPTH_HEIGHT_TILE_MAX_shift = 0, + PA_SU_POLY_OFFSET_DB_FMT_CNTL = 0x00028df8, + POLY_OFFSET_NEG_NUM_DB_BITS_mask = 0xff << 0, + POLY_OFFSET_NEG_NUM_DB_BITS_shift = 0, + POLY_OFFSET_DB_IS_FLOAT_FMT_bit = 1 << 8, + PA_SU_POLY_OFFSET_CLAMP = 0x00028dfc, + PA_SU_POLY_OFFSET_FRONT_SCALE = 0x00028e00, + PA_SU_POLY_OFFSET_FRONT_OFFSET = 0x00028e04, + PA_SU_POLY_OFFSET_BACK_SCALE = 0x00028e08, + PA_SU_POLY_OFFSET_BACK_OFFSET = 0x00028e0c, + PA_CL_POINT_X_RAD = 0x00028e10, + PA_CL_POINT_Y_RAD = 0x00028e14, + PA_CL_POINT_SIZE = 0x00028e18, + PA_CL_POINT_CULL_RAD = 0x00028e1c, + PA_CL_UCP_0_X = 0x00028e20, + PA_CL_UCP_0_X_num = 6, + PA_CL_UCP_0_X_offset = 16, + PA_CL_UCP_0_Y = 0x00028e24, + PA_CL_UCP_0_Y_num = 6, + PA_CL_UCP_0_Y_offset = 16, + PA_CL_UCP_0_Z = 0x00028e28, + PA_CL_UCP_0_Z_num = 6, + PA_CL_UCP_0_Z_offset = 16, + SQ_ALU_CONSTANT0_0 = 0x00030000, + SQ_ALU_CONSTANT1_0 = 0x00030004, + SQ_ALU_CONSTANT2_0 = 0x00030008, + SQ_ALU_CONSTANT3_0 = 0x0003000c, + SQ_VTX_CONSTANT_WORD0_0 = 0x00038000, + SQ_TEX_RESOURCE_WORD0_0 = 0x00038000, + DIM_mask = 0x07 << 0, + DIM_shift = 0, + SQ_TEX_DIM_1D = 0x00, + SQ_TEX_DIM_2D = 0x01, + SQ_TEX_DIM_3D = 0x02, + SQ_TEX_DIM_CUBEMAP = 0x03, + SQ_TEX_DIM_1D_ARRAY = 0x04, + SQ_TEX_DIM_2D_ARRAY = 0x05, + SQ_TEX_DIM_2D_MSAA = 0x06, + SQ_TEX_DIM_2D_ARRAY_MSAA = 0x07, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask = 0x0f << 3, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift = 3, + TILE_TYPE_bit = 1 << 7, + PITCH_mask = 0x7ff << 8, + PITCH_shift = 8, + TEX_WIDTH_mask = 0x1fff << 19, + TEX_WIDTH_shift = 19, + SQ_VTX_CONSTANT_WORD1_0 = 0x00038004, + SQ_TEX_RESOURCE_WORD1_0 = 0x00038004, + TEX_HEIGHT_mask = 0x1fff << 0, + TEX_HEIGHT_shift = 0, + TEX_DEPTH_mask = 0x1fff << 13, + TEX_DEPTH_shift = 13, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask = 0x3f << 26, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift = 26, + SQ_VTX_CONSTANT_WORD2_0 = 0x00038008, + BASE_ADDRESS_HI_mask = 0xff << 0, + BASE_ADDRESS_HI_shift = 0, + SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask = 0x7ff << 8, + SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift = 8, + SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit = 1 << 19, + SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20, + SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift = 20, + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask = 0x03 << 26, + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift = 26, +/* SQ_NUM_FORMAT_NORM = 0x00, */ +/* SQ_NUM_FORMAT_INT = 0x01, */ +/* SQ_NUM_FORMAT_SCALED = 0x02, */ + SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit = 1 << 28, + SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit = 1 << 29, + SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_mask = 0x03 << 30, + SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift = 30, +/* SQ_ENDIAN_NONE = 0x00, */ +/* SQ_ENDIAN_8IN16 = 0x01, */ +/* SQ_ENDIAN_8IN32 = 0x02, */ + SQ_TEX_RESOURCE_WORD2_0 = 0x00038008, + SQ_VTX_CONSTANT_WORD3_0 = 0x0003800c, + MEM_REQUEST_SIZE_mask = 0x03 << 0, + MEM_REQUEST_SIZE_shift = 0, + SQ_TEX_RESOURCE_WORD3_0 = 0x0003800c, + SQ_TEX_RESOURCE_WORD4_0 = 0x00038010, + FORMAT_COMP_X_mask = 0x03 << 0, + FORMAT_COMP_X_shift = 0, + SQ_FORMAT_COMP_UNSIGNED = 0x00, + SQ_FORMAT_COMP_SIGNED = 0x01, + SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, + FORMAT_COMP_Y_mask = 0x03 << 2, + FORMAT_COMP_Y_shift = 2, +/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */ +/* SQ_FORMAT_COMP_SIGNED = 0x01, */ +/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */ + FORMAT_COMP_Z_mask = 0x03 << 4, + FORMAT_COMP_Z_shift = 4, +/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */ +/* SQ_FORMAT_COMP_SIGNED = 0x01, */ +/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */ + FORMAT_COMP_W_mask = 0x03 << 6, + FORMAT_COMP_W_shift = 6, +/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */ +/* SQ_FORMAT_COMP_SIGNED = 0x01, */ +/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */ + SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask = 0x03 << 8, + SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift = 8, +/* SQ_NUM_FORMAT_NORM = 0x00, */ +/* SQ_NUM_FORMAT_INT = 0x01, */ +/* SQ_NUM_FORMAT_SCALED = 0x02, */ + SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit = 1 << 10, + SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit = 1 << 11, + SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask = 0x03 << 12, + SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift = 12, +/* SQ_ENDIAN_NONE = 0x00, */ +/* SQ_ENDIAN_8IN16 = 0x01, */ +/* SQ_ENDIAN_8IN32 = 0x02, */ + REQUEST_SIZE_mask = 0x03 << 14, + REQUEST_SIZE_shift = 14, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask = 0x07 << 16, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift = 16, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask = 0x07 << 19, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift = 19, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask = 0x07 << 22, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift = 22, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask = 0x07 << 25, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift = 25, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + BASE_LEVEL_mask = 0x0f << 28, + BASE_LEVEL_shift = 28, + SQ_TEX_RESOURCE_WORD5_0 = 0x00038014, + LAST_LEVEL_mask = 0x0f << 0, + LAST_LEVEL_shift = 0, + BASE_ARRAY_mask = 0x1fff << 4, + BASE_ARRAY_shift = 4, + LAST_ARRAY_mask = 0x1fff << 17, + LAST_ARRAY_shift = 17, + SQ_TEX_RESOURCE_WORD6_0 = 0x00038018, + MPEG_CLAMP_mask = 0x03 << 0, + MPEG_CLAMP_shift = 0, + SQ_TEX_MPEG_CLAMP_OFF = 0x00, + SQ_TEX_MPEG_9 = 0x01, + SQ_TEX_MPEG_10 = 0x02, + PERF_MODULATION_mask = 0x07 << 5, + PERF_MODULATION_shift = 5, + INTERLACED_bit = 1 << 8, + SQ_TEX_RESOURCE_WORD6_0__TYPE_mask = 0x03 << 30, + SQ_TEX_RESOURCE_WORD6_0__TYPE_shift = 30, + SQ_TEX_VTX_INVALID_TEXTURE = 0x00, + SQ_TEX_VTX_INVALID_BUFFER = 0x01, + SQ_TEX_VTX_VALID_TEXTURE = 0x02, + SQ_TEX_VTX_VALID_BUFFER = 0x03, + SQ_VTX_CONSTANT_WORD6_0 = 0x00038018, + SQ_VTX_CONSTANT_WORD6_0__TYPE_mask = 0x03 << 30, + SQ_VTX_CONSTANT_WORD6_0__TYPE_shift = 30, +/* SQ_TEX_VTX_INVALID_TEXTURE = 0x00, */ +/* SQ_TEX_VTX_INVALID_BUFFER = 0x01, */ +/* SQ_TEX_VTX_VALID_TEXTURE = 0x02, */ +/* SQ_TEX_VTX_VALID_BUFFER = 0x03, */ + SQ_TEX_SAMPLER_WORD0_0 = 0x0003c000, + SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask = 0x07 << 0, + SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift = 0, + SQ_TEX_WRAP = 0x00, + SQ_TEX_MIRROR = 0x01, + SQ_TEX_CLAMP_LAST_TEXEL = 0x02, + SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, + SQ_TEX_CLAMP_HALF_BORDER = 0x04, + SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, + SQ_TEX_CLAMP_BORDER = 0x06, + SQ_TEX_MIRROR_ONCE_BORDER = 0x07, + CLAMP_Y_mask = 0x07 << 3, + CLAMP_Y_shift = 3, +/* SQ_TEX_WRAP = 0x00, */ +/* SQ_TEX_MIRROR = 0x01, */ +/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */ +/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */ +/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */ +/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */ +/* SQ_TEX_CLAMP_BORDER = 0x06, */ +/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */ + CLAMP_Z_mask = 0x07 << 6, + CLAMP_Z_shift = 6, +/* SQ_TEX_WRAP = 0x00, */ +/* SQ_TEX_MIRROR = 0x01, */ +/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */ +/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */ +/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */ +/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */ +/* SQ_TEX_CLAMP_BORDER = 0x06, */ +/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */ + XY_MAG_FILTER_mask = 0x07 << 9, + XY_MAG_FILTER_shift = 9, + SQ_TEX_XY_FILTER_POINT = 0x00, + SQ_TEX_XY_FILTER_BILINEAR = 0x01, + SQ_TEX_XY_FILTER_BICUBIC = 0x02, + XY_MIN_FILTER_mask = 0x07 << 12, + XY_MIN_FILTER_shift = 12, +/* SQ_TEX_XY_FILTER_POINT = 0x00, */ +/* SQ_TEX_XY_FILTER_BILINEAR = 0x01, */ +/* SQ_TEX_XY_FILTER_BICUBIC = 0x02, */ + Z_FILTER_mask = 0x03 << 15, + Z_FILTER_shift = 15, + SQ_TEX_Z_FILTER_NONE = 0x00, + SQ_TEX_Z_FILTER_POINT = 0x01, + SQ_TEX_Z_FILTER_LINEAR = 0x02, + MIP_FILTER_mask = 0x03 << 17, + MIP_FILTER_shift = 17, +/* SQ_TEX_Z_FILTER_NONE = 0x00, */ +/* SQ_TEX_Z_FILTER_POINT = 0x01, */ +/* SQ_TEX_Z_FILTER_LINEAR = 0x02, */ + BORDER_COLOR_TYPE_mask = 0x03 << 22, + BORDER_COLOR_TYPE_shift = 22, + SQ_TEX_BORDER_COLOR_TRANS_BLACK = 0x00, + SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x01, + SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x02, + SQ_TEX_BORDER_COLOR_REGISTER = 0x03, + POINT_SAMPLING_CLAMP_bit = 1 << 24, + TEX_ARRAY_OVERRIDE_bit = 1 << 25, + DEPTH_COMPARE_FUNCTION_mask = 0x07 << 26, + DEPTH_COMPARE_FUNCTION_shift = 26, + SQ_TEX_DEPTH_COMPARE_NEVER = 0x00, + SQ_TEX_DEPTH_COMPARE_LESS = 0x01, + SQ_TEX_DEPTH_COMPARE_EQUAL = 0x02, + SQ_TEX_DEPTH_COMPARE_LESSEQUAL = 0x03, + SQ_TEX_DEPTH_COMPARE_GREATER = 0x04, + SQ_TEX_DEPTH_COMPARE_NOTEQUAL = 0x05, + SQ_TEX_DEPTH_COMPARE_GREATEREQUAL = 0x06, + SQ_TEX_DEPTH_COMPARE_ALWAYS = 0x07, + CHROMA_KEY_mask = 0x03 << 29, + CHROMA_KEY_shift = 29, + SQ_TEX_CHROMA_KEY_DISABLED = 0x00, + SQ_TEX_CHROMA_KEY_KILL = 0x01, + SQ_TEX_CHROMA_KEY_BLEND = 0x02, + LOD_USES_MINOR_AXIS_bit = 1 << 31, + SQ_TEX_SAMPLER_WORD1_0 = 0x0003c004, + MIN_LOD_mask = 0x3ff << 0, + MIN_LOD_shift = 0, + MAX_LOD_mask = 0x3ff << 10, + MAX_LOD_shift = 10, + SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_mask = 0xfff << 20, + SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift = 20, + SQ_TEX_SAMPLER_WORD2_0 = 0x0003c008, + LOD_BIAS_SEC_mask = 0xfff << 0, + LOD_BIAS_SEC_shift = 0, + MC_COORD_TRUNCATE_bit = 1 << 12, + SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit = 1 << 13, + HIGH_PRECISION_FILTER_bit = 1 << 14, + PERF_MIP_mask = 0x07 << 15, + PERF_MIP_shift = 15, + PERF_Z_mask = 0x03 << 18, + PERF_Z_shift = 18, + FETCH_4_bit = 1 << 26, + SAMPLE_IS_PCF_bit = 1 << 27, + SQ_TEX_SAMPLER_WORD2_0__TYPE_bit = 1 << 31, + SQ_VTX_BASE_VTX_LOC = 0x0003cff0, + SQ_VTX_START_INST_LOC = 0x0003cff4, + SQ_LOOP_CONST_DX10_0 = 0x0003e200, + SQ_LOOP_CONST_0 = 0x0003e200, + SQ_LOOP_CONST_0__COUNT_mask = 0xfff << 0, + SQ_LOOP_CONST_0__COUNT_shift = 0, + INIT_mask = 0xfff << 12, + INIT_shift = 12, + INC_mask = 0xff << 24, + INC_shift = 24, + SQ_BOOL_CONST_0 = 0x0003e380, + SQ_BOOL_CONST_0_num = 3, + +} ; + +#endif /* _AUTOREGS */ + diff --git a/src/mesa/drivers/dri/r600/r600_reg_r6xx.h b/src/mesa/drivers/dri/r600/r600_reg_r6xx.h new file mode 100644 index 0000000000..f7702c46de --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_reg_r6xx.h @@ -0,0 +1,492 @@ +/* + * RadeonHD R6xx, R7xx Register documentation + * + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _R600_REG_R6xx_H_ +#define _R600_REG_R6xx_H_ + +/* + * Registers for R6xx chips that are not documented yet + */ + +enum { + + MM_INDEX = 0x0000, + MM_DATA = 0x0004, + + SRBM_STATUS = 0x0e50, + RLC_RQ_PENDING_bit = 1 << 3, + RCU_RQ_PENDING_bit = 1 << 4, + GRBM_RQ_PENDING_bit = 1 << 5, + HI_RQ_PENDING_bit = 1 << 6, + IO_EXTERN_SIGNAL_bit = 1 << 7, + VMC_BUSY_bit = 1 << 8, + MCB_BUSY_bit = 1 << 9, + MCDZ_BUSY_bit = 1 << 10, + MCDY_BUSY_bit = 1 << 11, + MCDX_BUSY_bit = 1 << 12, + MCDW_BUSY_bit = 1 << 13, + SEM_BUSY_bit = 1 << 14, + SRBM_STATUS__RLC_BUSY_bit = 1 << 15, + PDMA_BUSY_bit = 1 << 16, + IH_BUSY_bit = 1 << 17, + CSC_BUSY_bit = 1 << 20, + CMC7_BUSY_bit = 1 << 21, + CMC6_BUSY_bit = 1 << 22, + CMC5_BUSY_bit = 1 << 23, + CMC4_BUSY_bit = 1 << 24, + CMC3_BUSY_bit = 1 << 25, + CMC2_BUSY_bit = 1 << 26, + CMC1_BUSY_bit = 1 << 27, + CMC0_BUSY_bit = 1 << 28, + BIF_BUSY_bit = 1 << 29, + IDCT_BUSY_bit = 1 << 30, + + SRBM_READ_ERROR = 0x0e98, + READ_ADDRESS_mask = 0xffff << 2, + READ_ADDRESS_shift = 2, + READ_REQUESTER_HI_bit = 1 << 24, + READ_REQUESTER_GRBM_bit = 1 << 25, + READ_REQUESTER_RCU_bit = 1 << 26, + READ_REQUESTER_RLC_bit = 1 << 27, + READ_ERROR_bit = 1 << 31, + + SRBM_INT_STATUS = 0x0ea4, + RDERR_INT_STAT_bit = 1 << 0, + GFX_CNTX_SWITCH_INT_STAT_bit = 1 << 1, + SRBM_INT_ACK = 0x0ea8, + RDERR_INT_ACK_bit = 1 << 0, + GFX_CNTX_SWITCH_INT_ACK_bit = 1 << 1, + + R6XX_MC_VM_FB_LOCATION = 0x2180, + + VENDOR_DEVICE_ID = 0x4000, + + D1GRPH_PRIMARY_SURFACE_ADDRESS = 0x6110, + D1GRPH_PITCH = 0x6120, + D1GRPH_Y_END = 0x6138, + + GRBM_STATUS = 0x8010, + CMDFIFO_AVAIL_mask = 0x1f << 0, + CMDFIFO_AVAIL_shift = 0, + SRBM_RQ_PENDING_bit = 1 << 5, + CP_RQ_PENDING_bit = 1 << 6, + CF_RQ_PENDING_bit = 1 << 7, + PF_RQ_PENDING_bit = 1 << 8, + GRBM_EE_BUSY_bit = 1 << 10, + GRBM_STATUS__VC_BUSY_bit = 1 << 11, + DB03_CLEAN_bit = 1 << 12, + CB03_CLEAN_bit = 1 << 13, + VGT_BUSY_NO_DMA_bit = 1 << 16, + GRBM_STATUS__VGT_BUSY_bit = 1 << 17, + TA03_BUSY_bit = 1 << 18, + GRBM_STATUS__TC_BUSY_bit = 1 << 19, + SX_BUSY_bit = 1 << 20, + SH_BUSY_bit = 1 << 21, + SPI03_BUSY_bit = 1 << 22, + SMX_BUSY_bit = 1 << 23, + SC_BUSY_bit = 1 << 24, + PA_BUSY_bit = 1 << 25, + DB03_BUSY_bit = 1 << 26, + CR_BUSY_bit = 1 << 27, + CP_COHERENCY_BUSY_bit = 1 << 28, + GRBM_STATUS__CP_BUSY_bit = 1 << 29, + CB03_BUSY_bit = 1 << 30, + GUI_ACTIVE_bit = 1 << 31, + GRBM_STATUS2 = 0x8014, + CR_CLEAN_bit = 1 << 0, + SMX_CLEAN_bit = 1 << 1, + SPI0_BUSY_bit = 1 << 8, + SPI1_BUSY_bit = 1 << 9, + SPI2_BUSY_bit = 1 << 10, + SPI3_BUSY_bit = 1 << 11, + TA0_BUSY_bit = 1 << 12, + TA1_BUSY_bit = 1 << 13, + TA2_BUSY_bit = 1 << 14, + TA3_BUSY_bit = 1 << 15, + DB0_BUSY_bit = 1 << 16, + DB1_BUSY_bit = 1 << 17, + DB2_BUSY_bit = 1 << 18, + DB3_BUSY_bit = 1 << 19, + CB0_BUSY_bit = 1 << 20, + CB1_BUSY_bit = 1 << 21, + CB2_BUSY_bit = 1 << 22, + CB3_BUSY_bit = 1 << 23, + GRBM_SOFT_RESET = 0x8020, + SOFT_RESET_CP_bit = 1 << 0, + SOFT_RESET_CB_bit = 1 << 1, + SOFT_RESET_CR_bit = 1 << 2, + SOFT_RESET_DB_bit = 1 << 3, + SOFT_RESET_PA_bit = 1 << 5, + SOFT_RESET_SC_bit = 1 << 6, + SOFT_RESET_SMX_bit = 1 << 7, + SOFT_RESET_SPI_bit = 1 << 8, + SOFT_RESET_SH_bit = 1 << 9, + SOFT_RESET_SX_bit = 1 << 10, + SOFT_RESET_TC_bit = 1 << 11, + SOFT_RESET_TA_bit = 1 << 12, + SOFT_RESET_VC_bit = 1 << 13, + SOFT_RESET_VGT_bit = 1 << 14, + SOFT_RESET_GRBM_GCA_bit = 1 << 15, + + WAIT_UNTIL = 0x8040, + WAIT_CP_DMA_IDLE_bit = 1 << 8, + WAIT_CMDFIFO_bit = 1 << 10, + WAIT_2D_IDLE_bit = 1 << 14, + WAIT_3D_IDLE_bit = 1 << 15, + WAIT_2D_IDLECLEAN_bit = 1 << 16, + WAIT_3D_IDLECLEAN_bit = 1 << 17, + WAIT_EXTERN_SIG_bit = 1 << 19, + CMDFIFO_ENTRIES_mask = 0x1f << 20, + CMDFIFO_ENTRIES_shift = 20, + + GRBM_READ_ERROR = 0x8058, +/* READ_ADDRESS_mask = 0xffff << 2, */ +/* READ_ADDRESS_shift = 2, */ + READ_REQUESTER_SRBM_bit = 1 << 28, + READ_REQUESTER_CP_bit = 1 << 29, + READ_REQUESTER_WU_POLL_bit = 1 << 30, +/* READ_ERROR_bit = 1 << 31, */ + + SCRATCH_REG0 = 0x8500, + SCRATCH_REG1 = 0x8504, + SCRATCH_REG2 = 0x8508, + SCRATCH_REG3 = 0x850c, + SCRATCH_REG4 = 0x8510, + SCRATCH_REG5 = 0x8514, + SCRATCH_REG6 = 0x8518, + SCRATCH_REG7 = 0x851c, + SCRATCH_UMSK = 0x8540, + SCRATCH_ADDR = 0x8544, + + CP_COHER_CNTL = 0x85f0, + DEST_BASE_0_ENA_bit = 1 << 0, + DEST_BASE_1_ENA_bit = 1 << 1, + SO0_DEST_BASE_ENA_bit = 1 << 2, + SO1_DEST_BASE_ENA_bit = 1 << 3, + SO2_DEST_BASE_ENA_bit = 1 << 4, + SO3_DEST_BASE_ENA_bit = 1 << 5, + CB0_DEST_BASE_ENA_bit = 1 << 6, + CB1_DEST_BASE_ENA_bit = 1 << 7, + CB2_DEST_BASE_ENA_bit = 1 << 8, + CB3_DEST_BASE_ENA_bit = 1 << 9, + CB4_DEST_BASE_ENA_bit = 1 << 10, + CB5_DEST_BASE_ENA_bit = 1 << 11, + CB6_DEST_BASE_ENA_bit = 1 << 12, + CB7_DEST_BASE_ENA_bit = 1 << 13, + DB_DEST_BASE_ENA_bit = 1 << 14, + CR_DEST_BASE_ENA_bit = 1 << 15, + TC_ACTION_ENA_bit = 1 << 23, + VC_ACTION_ENA_bit = 1 << 24, + CB_ACTION_ENA_bit = 1 << 25, + DB_ACTION_ENA_bit = 1 << 26, + SH_ACTION_ENA_bit = 1 << 27, + SMX_ACTION_ENA_bit = 1 << 28, + CR0_ACTION_ENA_bit = 1 << 29, + CR1_ACTION_ENA_bit = 1 << 30, + CR2_ACTION_ENA_bit = 1 << 31, + CP_COHER_SIZE = 0x85f4, + CP_COHER_BASE = 0x85f8, + CP_COHER_STATUS = 0x85fc, + MATCHING_GFX_CNTX_mask = 0xff << 0, + MATCHING_GFX_CNTX_shift = 0, + MATCHING_CR_CNTX_mask = 0xffff << 8, + MATCHING_CR_CNTX_shift = 8, + STATUS_bit = 1 << 31, + + CP_STALLED_STAT1 = 0x8674, + RBIU_TO_DMA_NOT_RDY_TO_RCV_bit = 1 << 0, + RBIU_TO_IBS_NOT_RDY_TO_RCV_bit = 1 << 1, + RBIU_TO_SEM_NOT_RDY_TO_RCV_bit = 1 << 2, + RBIU_TO_2DREGS_NOT_RDY_TO_RCV_bit = 1 << 3, + RBIU_TO_MEMWR_NOT_RDY_TO_RCV_bit = 1 << 4, + RBIU_TO_MEMRD_NOT_RDY_TO_RCV_bit = 1 << 5, + RBIU_TO_EOPD_NOT_RDY_TO_RCV_bit = 1 << 6, + RBIU_TO_RECT_NOT_RDY_TO_RCV_bit = 1 << 7, + RBIU_TO_STRMO_NOT_RDY_TO_RCV_bit = 1 << 8, + RBIU_TO_PSTAT_NOT_RDY_TO_RCV_bit = 1 << 9, + MIU_WAITING_ON_RDREQ_FREE_bit = 1 << 16, + MIU_WAITING_ON_WRREQ_FREE_bit = 1 << 17, + MIU_NEEDS_AVAIL_WRREQ_PHASE_bit = 1 << 18, + RCIU_WAITING_ON_GRBM_FREE_bit = 1 << 24, + RCIU_WAITING_ON_VGT_FREE_bit = 1 << 25, + RCIU_STALLED_ON_ME_READ_bit = 1 << 26, + RCIU_STALLED_ON_DMA_READ_bit = 1 << 27, + RCIU_HALTED_BY_REG_VIOLATION_bit = 1 << 28, + CP_STALLED_STAT2 = 0x8678, + PFP_TO_CSF_NOT_RDY_TO_RCV_bit = 1 << 0, + PFP_TO_MEQ_NOT_RDY_TO_RCV_bit = 1 << 1, + PFP_TO_VGT_NOT_RDY_TO_RCV_bit = 1 << 2, + PFP_HALTED_BY_INSTR_VIOLATION_bit = 1 << 3, + MULTIPASS_IB_PENDING_IN_PFP_bit = 1 << 4, + ME_BRUSH_WC_NOT_RDY_TO_RCV_bit = 1 << 8, + ME_STALLED_ON_BRUSH_LOGIC_bit = 1 << 9, + CR_CNTX_NOT_AVAIL_TO_ME_bit = 1 << 10, + GFX_CNTX_NOT_AVAIL_TO_ME_bit = 1 << 11, + ME_RCIU_NOT_RDY_TO_RCV_bit = 1 << 12, + ME_TO_CONST_NOT_RDY_TO_RCV_bit = 1 << 13, + ME_WAITING_DATA_FROM_PFP_bit = 1 << 14, + ME_WAITING_ON_PARTIAL_FLUSH_bit = 1 << 15, + RECT_FIFO_NEEDS_CR_RECT_DONE_bit = 1 << 16, + RECT_FIFO_NEEDS_WR_CONFIRM_bit = 1 << 17, + EOPD_FIFO_NEEDS_SC_EOP_DONE_bit = 1 << 18, + EOPD_FIFO_NEEDS_SMX_EOP_DONE_bit = 1 << 19, + EOPD_FIFO_NEEDS_WR_CONFIRM_bit = 1 << 20, + EOPD_FIFO_NEEDS_SIGNAL_SEM_bit = 1 << 21, + SO_NUMPRIM_FIFO_NEEDS_SOADDR_bit = 1 << 22, + SO_NUMPRIM_FIFO_NEEDS_NUMPRIM_bit = 1 << 23, + PIPE_STATS_FIFO_NEEDS_SAMPLE_bit = 1 << 24, + SURF_SYNC_NEEDS_IDLE_CNTXS_bit = 1 << 30, + SURF_SYNC_NEEDS_ALL_CLEAN_bit = 1 << 31, + CP_BUSY_STAT = 0x867c, + REG_BUS_FIFO_BUSY_bit = 1 << 0, + RING_FETCHING_DATA_bit = 1 << 1, + INDR1_FETCHING_DATA_bit = 1 << 2, + INDR2_FETCHING_DATA_bit = 1 << 3, + STATE_FETCHING_DATA_bit = 1 << 4, + PRED_FETCHING_DATA_bit = 1 << 5, + COHER_CNTR_NEQ_ZERO_bit = 1 << 6, + PFP_PARSING_PACKETS_bit = 1 << 7, + ME_PARSING_PACKETS_bit = 1 << 8, + RCIU_PFP_BUSY_bit = 1 << 9, + RCIU_ME_BUSY_bit = 1 << 10, + OUTSTANDING_READ_TAGS_bit = 1 << 11, + SEM_CMDFIFO_NOT_EMPTY_bit = 1 << 12, + SEM_FAILED_AND_HOLDING_bit = 1 << 13, + SEM_POLLING_FOR_PASS_bit = 1 << 14, + _3D_BUSY_bit = 1 << 15, + _2D_BUSY_bit = 1 << 16, + CP_STAT = 0x8680, + CSF_RING_BUSY_bit = 1 << 0, + CSF_WPTR_POLL_BUSY_bit = 1 << 1, + CSF_INDIRECT1_BUSY_bit = 1 << 2, + CSF_INDIRECT2_BUSY_bit = 1 << 3, + CSF_STATE_BUSY_bit = 1 << 4, + CSF_PREDICATE_BUSY_bit = 1 << 5, + CSF_BUSY_bit = 1 << 6, + MIU_RDREQ_BUSY_bit = 1 << 7, + MIU_WRREQ_BUSY_bit = 1 << 8, + ROQ_RING_BUSY_bit = 1 << 9, + ROQ_INDIRECT1_BUSY_bit = 1 << 10, + ROQ_INDIRECT2_BUSY_bit = 1 << 11, + ROQ_STATE_BUSY_bit = 1 << 12, + ROQ_PREDICATE_BUSY_bit = 1 << 13, + ROQ_ALIGN_BUSY_bit = 1 << 14, + PFP_BUSY_bit = 1 << 15, + MEQ_BUSY_bit = 1 << 16, + ME_BUSY_bit = 1 << 17, + QUERY_BUSY_bit = 1 << 18, + SEMAPHORE_BUSY_bit = 1 << 19, + INTERRUPT_BUSY_bit = 1 << 20, + SURFACE_SYNC_BUSY_bit = 1 << 21, + DMA_BUSY_bit = 1 << 22, + RCIU_BUSY_bit = 1 << 23, + CP_STAT__CP_BUSY_bit = 1 << 31, + + CP_ME_CNTL = 0x86d8, + ME_STATMUX_mask = 0xff << 0, + ME_STATMUX_shift = 0, + ME_HALT_bit = 1 << 28, + CP_ME_STATUS = 0x86dc, + + CP_RB_RPTR = 0x8700, + RB_RPTR_mask = 0xfffff << 0, + RB_RPTR_shift = 0, + CP_RB_WPTR_DELAY = 0x8704, + PRE_WRITE_TIMER_mask = 0xfffffff << 0, + PRE_WRITE_TIMER_shift = 0, + PRE_WRITE_LIMIT_mask = 0x0f << 28, + PRE_WRITE_LIMIT_shift = 28, + + CP_ROQ_RB_STAT = 0x8780, + ROQ_RPTR_PRIMARY_mask = 0x3ff << 0, + ROQ_RPTR_PRIMARY_shift = 0, + ROQ_WPTR_PRIMARY_mask = 0x3ff << 16, + ROQ_WPTR_PRIMARY_shift = 16, + CP_ROQ_IB1_STAT = 0x8784, + ROQ_RPTR_INDIRECT1_mask = 0x3ff << 0, + ROQ_RPTR_INDIRECT1_shift = 0, + ROQ_WPTR_INDIRECT1_mask = 0x3ff << 16, + ROQ_WPTR_INDIRECT1_shift = 16, + CP_ROQ_IB2_STAT = 0x8788, + ROQ_RPTR_INDIRECT2_mask = 0x3ff << 0, + ROQ_RPTR_INDIRECT2_shift = 0, + ROQ_WPTR_INDIRECT2_mask = 0x3ff << 16, + ROQ_WPTR_INDIRECT2_shift = 16, + + CP_MEQ_STAT = 0x8794, + MEQ_RPTR_mask = 0x3ff << 0, + MEQ_RPTR_shift = 0, + MEQ_WPTR_mask = 0x3ff << 16, + MEQ_WPTR_shift = 16, + + CC_GC_SHADER_PIPE_CONFIG = 0x8950, + INACTIVE_QD_PIPES_mask = 0xff << 8, + INACTIVE_QD_PIPES_shift = 8, + R6XX_MAX_QD_PIPES = 8, + INACTIVE_SIMDS_mask = 0xff << 16, + INACTIVE_SIMDS_shift = 16, + R6XX_MAX_SIMDS = 8, + GC_USER_SHADER_PIPE_CONFIG = 0x8954, + + VC_ENHANCE = 0x9714, + DB_DEBUG = 0x9830, + PREZ_MUST_WAIT_FOR_POSTZ_DONE = 1 << 31, + + DB_WATERMARKS = 0x00009838, + DEPTH_FREE_mask = 0x1f << 0, + DEPTH_FREE_shift = 0, + DEPTH_FLUSH_mask = 0x3f << 5, + DEPTH_FLUSH_shift = 5, + FORCE_SUMMARIZE_mask = 0x0f << 11, + FORCE_SUMMARIZE_shift = 11, + DEPTH_PENDING_FREE_mask = 0x1f << 15, + DEPTH_PENDING_FREE_shift = 15, + DEPTH_CACHELINE_FREE_mask = 0x1f << 20, + DEPTH_CACHELINE_FREE_shift = 20, + EARLY_Z_PANIC_DISABLE_bit = 1 << 25, + LATE_Z_PANIC_DISABLE_bit = 1 << 26, + RE_Z_PANIC_DISABLE_bit = 1 << 27, + DB_EXTRA_DEBUG_mask = 0x0f << 28, + DB_EXTRA_DEBUG_shift = 28, + + CP_RB_BASE = 0xc100, + CP_RB_CNTL = 0xc104, + RB_BUFSZ_mask = 0x3f << 0, + CP_RB_WPTR = 0xc114, + RB_WPTR_mask = 0xfffff << 0, + RB_WPTR_shift = 0, + CP_RB_RPTR_WR = 0xc108, + RB_RPTR_WR_mask = 0xfffff << 0, + RB_RPTR_WR_shift = 0, + + CP_INT_STATUS = 0xc128, + DISABLE_CNTX_SWITCH_INT_STAT_bit = 1 << 0, + ENABLE_CNTX_SWITCH_INT_STAT_bit = 1 << 1, + SEM_SIGNAL_INT_STAT_bit = 1 << 18, + CNTX_BUSY_INT_STAT_bit = 1 << 19, + CNTX_EMPTY_INT_STAT_bit = 1 << 20, + WAITMEM_SEM_INT_STAT_bit = 1 << 21, + PRIV_INSTR_INT_STAT_bit = 1 << 22, + PRIV_REG_INT_STAT_bit = 1 << 23, + OPCODE_ERROR_INT_STAT_bit = 1 << 24, + SCRATCH_INT_STAT_bit = 1 << 25, + TIME_STAMP_INT_STAT_bit = 1 << 26, + RESERVED_BIT_ERROR_INT_STAT_bit = 1 << 27, + DMA_INT_STAT_bit = 1 << 28, + IB2_INT_STAT_bit = 1 << 29, + IB1_INT_STAT_bit = 1 << 30, + RB_INT_STAT_bit = 1 << 31, + +// SX_ALPHA_TEST_CONTROL = 0x00028410, + ALPHA_FUNC__REF_NEVER = 0, + ALPHA_FUNC__REF_ALWAYS = 7, +// DB_SHADER_CONTROL = 0x0002880c, + Z_ORDER__EARLY_Z_THEN_LATE_Z = 2, +// PA_SU_SC_MODE_CNTL = 0x00028814, +// POLY_MODE_mask = 0x03 << 3, + POLY_MODE__TRIANGLES = 0, POLY_MODE__DUAL_MODE, +// POLYMODE_FRONT_PTYPE_mask = 0x07 << 5, + POLYMODE_PTYPE__POINTS = 0, POLYMODE_PTYPE__LINES, POLYMODE_PTYPE__TRIANGLES, + PA_SC_AA_SAMPLE_LOCS_8S_WD1_M = 0x00028c20, + DB_SRESULTS_COMPARE_STATE0 = 0x00028d28, /* See autoregs: DB_SRESULTS_COMPARE_STATE1 */ +// DB_SRESULTS_COMPARE_STATE1 = 0x00028d2c, + DB_ALPHA_TO_MASK = 0x00028d44, + ALPHA_TO_MASK_ENABLE = 1 << 0, + ALPHA_TO_MASK_OFFSET0_mask = 0x03 << 8, + ALPHA_TO_MASK_OFFSET0_shift = 8, + ALPHA_TO_MASK_OFFSET1_mask = 0x03 << 8, + ALPHA_TO_MASK_OFFSET1_shift = 10, + ALPHA_TO_MASK_OFFSET2_mask = 0x03 << 8, + ALPHA_TO_MASK_OFFSET2_shift = 12, + ALPHA_TO_MASK_OFFSET3_mask = 0x03 << 8, + ALPHA_TO_MASK_OFFSET3_shift = 14, + +// SQ_VTX_CONSTANT_WORD2_0 = 0x00038008, +// SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20, + FMT_INVALID=0, FMT_8, FMT_4_4, FMT_3_3_2, + FMT_16=5, FMT_16_FLOAT, FMT_8_8, + FMT_5_6_5, FMT_6_5_5, FMT_1_5_5_5, FMT_4_4_4_4, + FMT_5_5_5_1, FMT_32, FMT_32_FLOAT, FMT_16_16, + FMT_16_16_FLOAT=16, FMT_8_24, FMT_8_24_FLOAT, FMT_24_8, + FMT_24_8_FLOAT, FMT_10_11_11, FMT_10_11_11_FLOAT, FMT_11_11_10, + FMT_11_11_10_FLOAT, FMT_2_10_10_10, FMT_8_8_8_8, FMT_10_10_10_2, + FMT_X24_8_32_FLOAT, FMT_32_32, FMT_32_32_FLOAT, FMT_16_16_16_16, + FMT_16_16_16_16_FLOAT=32, FMT_32_32_32_32=34, FMT_32_32_32_32_FLOAT, + FMT_1 = 37, FMT_GB_GR=39, + FMT_BG_RG, FMT_32_AS_8, FMT_32_AS_8_8, FMT_5_9_9_9_SHAREDEXP, + FMT_8_8_8, FMT_16_16_16, FMT_16_16_16_FLOAT, FMT_32_32_32, + FMT_32_32_32_FLOAT=48, + +// High level register file lengths + SQ_ALU_CONSTANT = SQ_ALU_CONSTANT0_0, /* 256 PS, 256 VS */ + SQ_ALU_CONSTANT_ps_num = 256, + SQ_ALU_CONSTANT_vs_num = 256, + SQ_ALU_CONSTANT_all_num = 512, + SQ_ALU_CONSTANT_offset = 16, + SQ_ALU_CONSTANT_ps = 0, + SQ_ALU_CONSTANT_vs = SQ_ALU_CONSTANT_ps + SQ_ALU_CONSTANT_ps_num, + SQ_TEX_RESOURCE = SQ_TEX_RESOURCE_WORD0_0, /* 160 PS, 160 VS, 16 FS, 160 GS */ + SQ_TEX_RESOURCE_ps_num = 160, + SQ_TEX_RESOURCE_vs_num = 160, + SQ_TEX_RESOURCE_fs_num = 16, + SQ_TEX_RESOURCE_gs_num = 160, + SQ_TEX_RESOURCE_all_num = 496, + SQ_TEX_RESOURCE_offset = 28, + SQ_TEX_RESOURCE_ps = 0, + SQ_TEX_RESOURCE_vs = SQ_TEX_RESOURCE_ps + SQ_TEX_RESOURCE_ps_num, + SQ_TEX_RESOURCE_fs = SQ_TEX_RESOURCE_vs + SQ_TEX_RESOURCE_vs_num, + SQ_TEX_RESOURCE_gs = SQ_TEX_RESOURCE_fs + SQ_TEX_RESOURCE_fs_num, + SQ_VTX_RESOURCE = SQ_VTX_CONSTANT_WORD0_0, /* 160 PS, 160 VS, 16 FS, 160 GS */ + SQ_VTX_RESOURCE_ps_num = 160, + SQ_VTX_RESOURCE_vs_num = 160, + SQ_VTX_RESOURCE_fs_num = 16, + SQ_VTX_RESOURCE_gs_num = 160, + SQ_VTX_RESOURCE_all_num = 496, + SQ_VTX_RESOURCE_offset = 28, + SQ_VTX_RESOURCE_ps = 0, + SQ_VTX_RESOURCE_vs = SQ_VTX_RESOURCE_ps + SQ_VTX_RESOURCE_ps_num, + SQ_VTX_RESOURCE_fs = SQ_VTX_RESOURCE_vs + SQ_VTX_RESOURCE_vs_num, + SQ_VTX_RESOURCE_gs = SQ_VTX_RESOURCE_fs + SQ_VTX_RESOURCE_fs_num, + SQ_TEX_SAMPLER_WORD = SQ_TEX_SAMPLER_WORD0_0, /* 18 per PS, VS, GS */ + SQ_TEX_SAMPLER_WORD_ps_num = 18, + SQ_TEX_SAMPLER_WORD_vs_num = 18, + SQ_TEX_SAMPLER_WORD_gs_num = 18, + SQ_TEX_SAMPLER_WORD_all_num = 54, + SQ_TEX_SAMPLER_WORD_offset = 12, + SQ_TEX_SAMPLER_WORD_ps = 0, + SQ_TEX_SAMPLER_WORD_vs = SQ_TEX_SAMPLER_WORD_ps + SQ_TEX_SAMPLER_WORD_ps_num, + SQ_TEX_SAMPLER_WORD_gs = SQ_TEX_SAMPLER_WORD_vs + SQ_TEX_SAMPLER_WORD_vs_num, + SQ_LOOP_CONST = SQ_LOOP_CONST_0, /* 32 per PS, VS, GS */ + SQ_LOOP_CONST_ps_num = 32, + SQ_LOOP_CONST_vs_num = 32, + SQ_LOOP_CONST_gs_num = 32, + SQ_LOOP_CONST_all_num = 96, + SQ_LOOP_CONST_offset = 4, + SQ_LOOP_CONST_ps = 0, + SQ_LOOP_CONST_vs = SQ_LOOP_CONST_ps + SQ_LOOP_CONST_ps_num, + SQ_LOOP_CONST_gs = SQ_LOOP_CONST_vs + SQ_LOOP_CONST_vs_num, +} ; + + +#endif diff --git a/src/mesa/drivers/dri/r600/r600_reg_r7xx.h b/src/mesa/drivers/dri/r600/r600_reg_r7xx.h new file mode 100644 index 0000000000..e5c01c861a --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_reg_r7xx.h @@ -0,0 +1,149 @@ +/* + * RadeonHD R6xx, R7xx Register documentation + * + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _R600_REG_R7xx_H_ +#define _R600_REG_R7xx_H_ + +/* + * Register update for R7xx chips + */ + +enum { + + R7XX_MC_VM_FB_LOCATION = 0x00002024, + +// GRBM_STATUS = 0x00008010, + R7XX_TA_BUSY_bit = 1 << 14, + + R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ = 0x00008d8c, + RING0_OFFSET_mask = 0xff << 0, + RING0_OFFSET_shift = 0, + ISOLATE_ES_ENABLE_bit = 1 << 12, + ISOLATE_GS_ENABLE_bit = 1 << 13, + VS_PC_LIMIT_ENABLE_bit = 1 << 14, + +// SQ_ALU_WORD0 = 0x00008dfc, +// SRC0_SEL_mask = 0x1ff << 0, +// SRC1_SEL_mask = 0x1ff << 13, + R7xx_SQ_ALU_SRC_1_DBL_L = 0xf4, + R7xx_SQ_ALU_SRC_1_DBL_M = 0xf5, + R7xx_SQ_ALU_SRC_0_5_DBL_L = 0xf6, + R7xx_SQ_ALU_SRC_0_5_DBL_M = 0xf7, +// INDEX_MODE_mask = 0x07 << 26, + R7xx_SQ_INDEX_GLOBAL = 0x05, + R7xx_SQ_INDEX_GLOBAL_AR_X = 0x06, + R6xx_SQ_ALU_WORD1_OP2 = 0x00008dfc, + R7xx_SQ_ALU_WORD1_OP2_V2 = 0x00008dfc, + R6xx_FOG_MERGE_bit = 1 << 5, + R6xx_OMOD_mask = 0x03 << 6, + R7xx_OMOD_mask = 0x03 << 5, + R6xx_OMOD_shift = 6, + R7xx_OMOD_shift = 5, + R6xx_SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x3ff << 8, + R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_mask = 0x7ff << 7, + R6xx_SQ_ALU_WORD1_OP2__ALU_INST_shift = 8, + R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_shift = 7, + R7xx_SQ_OP2_INST_FREXP_64 = 0x07, + R7xx_SQ_OP2_INST_ADD_64 = 0x17, + R7xx_SQ_OP2_INST_MUL_64 = 0x1b, + R7xx_SQ_OP2_INST_FLT64_TO_FLT32 = 0x1c, + R7xx_SQ_OP2_INST_FLT32_TO_FLT64 = 0x1d, + R7xx_SQ_OP2_INST_LDEXP_64 = 0x7a, + R7xx_SQ_OP2_INST_FRACT_64 = 0x7b, + R7xx_SQ_OP2_INST_PRED_SETGT_64 = 0x7c, + R7xx_SQ_OP2_INST_PRED_SETE_64 = 0x7d, + R7xx_SQ_OP2_INST_PRED_SETGE_64 = 0x7e, +// SQ_ALU_WORD1_OP3 = 0x00008dfc, +// SRC2_SEL_mask = 0x1ff << 0, +// R7xx_SQ_ALU_SRC_1_DBL_L = 0xf4, +// R7xx_SQ_ALU_SRC_1_DBL_M = 0xf5, +// R7xx_SQ_ALU_SRC_0_5_DBL_L = 0xf6, +// R7xx_SQ_ALU_SRC_0_5_DBL_M = 0xf7, +// SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13, + R7xx_SQ_OP3_INST_MULADD_64 = 0x08, + R7xx_SQ_OP3_INST_MULADD_64_M2 = 0x09, + R7xx_SQ_OP3_INST_MULADD_64_M4 = 0x0a, + R7xx_SQ_OP3_INST_MULADD_64_D2 = 0x0b, +// SQ_CF_ALU_WORD1 = 0x00008dfc, + R6xx_USES_WATERFALL_bit = 1 << 25, + R7xx_SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25, +// SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc, +// ARRAY_BASE_mask = 0x1fff << 0, +// TYPE_mask = 0x03 << 13, +// SQ_EXPORT_PARAM = 0x02, +// X_UNUSED_FOR_SX_EXPORTS = 0x03, +// ELEM_SIZE_mask = 0x03 << 30, +// SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc, +// SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0x7f << 23, + R7xx_SQ_CF_INST_MEM_EXPORT = 0x3a, +// SQ_CF_WORD1 = 0x00008dfc, +// SQ_CF_WORD1__COUNT_mask = 0x07 << 10, + R7xx_COUNT_3_bit = 1 << 19, +// SQ_CF_WORD1__CF_INST_mask = 0x7f << 23, + R7xx_SQ_CF_INST_END_PROGRAM = 0x19, + R7xx_SQ_CF_INST_WAIT_ACK = 0x1a, + R7xx_SQ_CF_INST_TEX_ACK = 0x1b, + R7xx_SQ_CF_INST_VTX_ACK = 0x1c, + R7xx_SQ_CF_INST_VTX_TC_ACK = 0x1d, +// SQ_VTX_WORD0 = 0x00008dfc, +// VTX_INST_mask = 0x1f << 0, + R7xx_SQ_VTX_INST_MEM = 0x02, +// SQ_VTX_WORD2 = 0x00008dfc, + R7xx_SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20, + +// SQ_TEX_WORD0 = 0x00008dfc, +// TEX_INST_mask = 0x1f << 0, + R7xx_X_MEMORY_READ = 0x02, + R7xx_SQ_TEX_INST_KEEP_GRADIENTS = 0x0a, + R7xx_X_FETCH4_LOAD4_INSTRUCTION_FOR_DX10_1 = 0x0f, + R7xx_SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24, + + R7xx_PA_SC_EDGERULE = 0x00028230, + R7xx_SPI_THREAD_GROUPING = 0x000286c8, + PS_GROUPING_mask = 0x1f << 0, + PS_GROUPING_shift = 0, + VS_GROUPING_mask = 0x1f << 8, + VS_GROUPING_shift = 8, + GS_GROUPING_mask = 0x1f << 16, + GS_GROUPING_shift = 16, + ES_GROUPING_mask = 0x1f << 24, + ES_GROUPING_shift = 24, + R7xx_CB_SHADER_CONTROL = 0x000287a0, + RT0_ENABLE_bit = 1 << 0, + RT1_ENABLE_bit = 1 << 1, + RT2_ENABLE_bit = 1 << 2, + RT3_ENABLE_bit = 1 << 3, + RT4_ENABLE_bit = 1 << 4, + RT5_ENABLE_bit = 1 << 5, + RT6_ENABLE_bit = 1 << 6, + RT7_ENABLE_bit = 1 << 7, +// DB_ALPHA_TO_MASK = 0x00028d44, + R7xx_OFFSET_ROUND_bit = 1 << 16, +// SQ_TEX_SAMPLER_MISC_0 = 0x0003d03c, + R7xx_TRUNCATE_COORD_bit = 1 << 9, + R7xx_DISABLE_CUBE_WRAP_bit = 1 << 10, + +} ; + +#endif /* _R600_REG_R7xx_H_ */ diff --git a/src/mesa/drivers/dri/r600/r600_screen.c b/src/mesa/drivers/dri/r600/r600_screen.c new file mode 100644 index 0000000000..94323d0f05 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_screen.c @@ -0,0 +1,534 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Matthias Hopf + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com> + */ + +#include "r600_screen.h" +#include "r600_context.h" +#include "r600_id.h" +#include "r600_span.h" + +#include "server/radeon_dri.h" + +#include "utils.h" +#include "drirenderbuffer.h" +#include "vblank.h" + +#include "framebuffer.h" +#include "renderbuffer.h" + +int debug = 1; + + +/* Get 64bit param (currently drm interface is only 32bit, though). Returns -1 on error. */ +static uint64_t +r600GetParam (__DRIscreenPrivate *sPriv, int param, char *what) +{ + int r; + drm_radeon_getparam_t gp; + uint32_t val; + + gp.param = param; + gp.value = &val; + + if ( (r = drmCommandWriteRead (sPriv->fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp))) >= 0) + return val; + + if (what) + drmError (r, what); + return -1; +} + +/* Map memory */ +static int +r600DrmMap (__DRIscreenPrivate *sPriv, memmap_t *map, + drm_handle_t handle, drmSize size, uint64_t offset, char *what) +{ + int r; + map->handle = handle; + map->size = size; + map->gpu = offset; + if ( (r = drmMap (sPriv->fd, handle, size, &map->cpu)) < 0) + __driUtilMessage ("%s: drmMap(%s) failed\n", __FUNCTION__, what); + return r; +} + +/* UnMap memory */ +static int +r600DrmUnMap (drmAddress address, drmSize size) +{ + int r; + if ( (r = drmUnmap(address, size)) < 0) + { + __driUtilMessage ("%s: drmMap failed\n", __FUNCTION__); + } + return r; +} + + +/* + * Free the device specific screen private data struct and close everything associated. + */ +static void * +r600FreeScreen (screen_t *screen) +{ + if (! screen) + return NULL; + + if (screen->regs.cpu) + drmUnmap (screen->regs.cpu, screen->regs.size); + if (screen->status.cpu) + drmUnmap (screen->status.cpu, screen->status.size); + if (screen->textures.cpu) + drmUnmap (screen->status.cpu, screen->status.size); + if (screen->buffers) + drmUnmapBufs (screen->buffers); + FREE (screen); + + return NULL; +} + + +/* + * Create the device specific screen private data struct. + */ +static screen_t * +r600CreateScreen (__DRIscreenPrivate *sPriv) +{ + screen_t *screen; + RADEONDRIPtr driPriv = (RADEONDRIPtr)sPriv->pDevPriv; + int i; + + if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) { + fprintf (stderr,"\nERROR! sizeof(RADEONDRIRec) does not match passed size from device driver\n"); + return NULL; + } + + /* Allocate the private area */ + if (! (screen = (screen_t *) CALLOC (sizeof(*screen))) ) { + __driUtilMessage ("%s: Could not allocate memory for screen structure", + __FUNCTION__); + return NULL; + } + screen->driScreen = sPriv; + screen->sareaPrivOffset = driPriv->sarea_priv_offset; + + /* Verify chipset */ + for (i = 0; r600_chips[i].type; i++) + if (r600_chips[i].id == driPriv->deviceID) { + memcpy (&screen->chip, &r600_chips[i], sizeof (chip_t)); + break; + } + if (! r600_chips[i].type) { + fprintf (stderr, "unknown chip id 0x%x.\n", driPriv->deviceID); + return r600FreeScreen (screen); + } + + /* Framebuffer location */ + if ( (screen->fb.gpu = r600GetParam (sPriv, RADEON_PARAM_FB_LOCATION, + "RADEON_PARAM_FB_LOCATION")) == -1) + return r600FreeScreen (screen); + screen->fb.gpu = (screen->fb.gpu & 0xffff) << 24; + screen->fb.cpu = sPriv->pFB; + screen->fb.size = sPriv->fbSize; + + /* GART location */ + if ( (screen->gart.gpu = r600GetParam (sPriv, RADEON_PARAM_GART_BASE, + "RADEON_PARAM_GART_BASE")) == -1) + return r600FreeScreen (screen); + + /* Register map */ + if (r600DrmMap (sPriv, &screen->regs, + driPriv->registerHandle, driPriv->registerSize, + 0, "regs") < 0) + return r600FreeScreen (screen); + + /* Memory copies of ring read ptr, scratch regs */ + if (r600DrmMap (sPriv, &screen->status, + driPriv->statusHandle, driPriv->statusSize, + 0, "status") < 0) + return r600FreeScreen (screen); + screen->scratch = (__volatile__ u_int32_t *) + ((GLubyte *)screen->status.cpu + R600_SCRATCH_REG_OFFSET); + + /* Textures */ + /* BTW - RADEON_PARAM_GART_TEX_HANDLE does not work yet */ + if (r600DrmMap (sPriv, &screen->gartTextures, + driPriv->gartTexHandle, driPriv->gartTexMapSize, + driPriv->gartTexOffset + screen->gart.gpu, "textures") < 0) + return r600FreeScreen (screen); + screen->textures.gpu = driPriv->textureOffset + screen->fb.gpu; + screen->textures.size = driPriv->textureSize; + + /* DRM buffers */ + if (! (screen->buffers = drmMapBufs (sPriv->fd)) ) { + __driUtilMessage ("%s: drmMapBufs failed\n", __FUNCTION__); + return r600FreeScreen (screen); + } + if ( (screen->bufs.gpu = r600GetParam (sPriv, RADEON_PARAM_GART_BUFFER_OFFSET, + "RADEON_PARAM_GART_BUFFER_OFFSET")) == -1) + return r600FreeScreen (screen); + screen->bufs.cpu = screen->buffers->list[0].address; + screen->bufs.size = screen->buffers->count * screen->buffers->list[0].total; + + /* Interrupt */ + /* TODO (RADEON_PARAM_IRQ_NR) */ + + /* Targets */ + screen->cpp = driPriv->bpp / 8; /* still bpp 15 there? */ + screen->frontBuffer.gpu = driPriv->frontOffset + screen->fb.gpu; + screen->frontBuffer.cpu = driPriv->frontOffset + screen->fb.cpu; + screen->frontBuffer.size = driPriv->frontPitch * screen->cpp * driPriv->height; + screen->frontBuffer.pitch = driPriv->frontPitch; + screen->backBuffer.gpu = driPriv->backOffset + screen->fb.gpu; + screen->backBuffer.cpu = driPriv->backOffset + screen->fb.cpu; + screen->backBuffer.size = driPriv->backPitch * screen->cpp * driPriv->height; + screen->backBuffer.pitch = driPriv->backPitch; + /* TODO: currently same cpp as for front/back buffer */ + screen->depthBuffer.gpu = driPriv->depthOffset + screen->fb.gpu; + screen->depthBuffer.cpu = driPriv->depthOffset + screen->fb.cpu; + screen->depthBuffer.size = driPriv->depthPitch * screen->cpp * driPriv->height; + screen->depthBuffer.pitch = driPriv->depthPitch; + + screen->width = driPriv->width; + screen->height = driPriv->height; + + if (debug >= 1) { + fprintf (stderr, "[r600] Mappings:\n" + "\tRegisters cpu %p size 0x%08x handle 0x%08x\n" + "\tStatus cpu %p size 0x%08x handle 0x%08x\n" + "\tGART gpu 0x" PRINTF_UINT64_HEX "\n" + "\tDRM buffers gpu 0x" PRINTF_UINT64_HEX " cpu %p size 0x%08x\n" + "\tGART Textures gpu 0x" PRINTF_UINT64_HEX " cpu %p size 0x%08x handle 0x%08x\n" + "\tFramebuffer gpu 0x" PRINTF_UINT64_HEX " cpu %p size 0x%08x\n" + "\tFront Target gpu 0x" PRINTF_UINT64_HEX " cpu %p size 0x%08x pitch 0x%04x\n" + "\tBack Target gpu 0x" PRINTF_UINT64_HEX " cpu %p size 0x%08x pitch 0x%04x\n" + "\tDepth Target gpu 0x" PRINTF_UINT64_HEX " cpu %p size 0x%08x pitch 0x%04x\n\n", + screen->regs.cpu, screen->regs.size, screen->regs.handle, + screen->status.cpu, screen->status.size, screen->status.handle, + screen->gart.gpu, + screen->bufs.gpu, screen->bufs.cpu, screen->bufs.size, + screen->gartTextures.gpu, screen->gartTextures.cpu, screen->gartTextures.size, screen->gartTextures.handle, + screen->fb.gpu, screen->fb.cpu, screen->fb.size, + screen->frontBuffer.gpu, screen->frontBuffer.cpu, screen->frontBuffer.size, screen->frontBuffer.pitch, + screen->backBuffer.gpu, screen->backBuffer.cpu, screen->backBuffer.size, screen->backBuffer.pitch, + screen->depthBuffer.gpu, screen->depthBuffer.cpu, screen->depthBuffer.size, screen->depthBuffer.pitch); + } + + screen->gart_texture_offset = driPriv->gartTexOffset + screen->gart.gpu; + + if ( driPriv->textureSize == 0 ) + { + screen->texOffset[RADEON_LOCAL_TEX_HEAP] = screen->gart_texture_offset; + screen->texSize[RADEON_LOCAL_TEX_HEAP] = driPriv->gartTexMapSize; + screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] = driPriv->log2GARTTexGran; + } + else + { + screen->texOffset[RADEON_LOCAL_TEX_HEAP] = driPriv->textureOffset + screen->fb.gpu; + screen->texSize[RADEON_LOCAL_TEX_HEAP] = driPriv->textureSize; + screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] = driPriv->log2TexGran; + } + if ( !screen->gartTextures.cpu || driPriv->textureSize == 0 + || getenv( "RADEON_GARTTEXTURING_FORCE_DISABLE" ) ) + { + screen->numTexHeaps = RADEON_NR_TEX_HEAPS - 1; + screen->texOffset[RADEON_GART_TEX_HEAP] = 0; + screen->texSize[RADEON_GART_TEX_HEAP] = 0; + screen->logTexGranularity[RADEON_GART_TEX_HEAP] = 0; + } + else + { + screen->numTexHeaps = RADEON_NR_TEX_HEAPS; + screen->texOffset[RADEON_GART_TEX_HEAP] = screen->gart_texture_offset; + screen->texSize[RADEON_GART_TEX_HEAP] = driPriv->gartTexMapSize; + screen->logTexGranularity[RADEON_GART_TEX_HEAP] = driPriv->log2GARTTexGran; + } + + return screen; +} + + +/* + * Create visual list + */ +static const __DRIconfig ** +r600FillInModes (__DRIscreenPrivate *psp, + unsigned pixel_bits, unsigned depth_bits, + unsigned stencil_bits) +{ + __DRIconfig **configs; + GLenum fb_format; + GLenum fb_type; + + uint8_t depth_bits_array[1]; + uint8_t stencil_bits_array[1]; + + static const GLenum back_buffer_modes[] = { + GLX_NONE, GLX_SWAP_UNDEFINED_OML + }; + + /* Only support a single buffered and a double buffered mode, with + * everything enabled including depth and stencil buffer. Features + * not used won't use computation time, and we have a shared depth + * buffer anyway. */ + + depth_bits_array[0] = depth_bits; + stencil_bits_array[0] = (stencil_bits == 0) ? 8 : stencil_bits; + + if ( pixel_bits == 16 ) { + fb_format = GL_RGB; + fb_type = GL_UNSIGNED_SHORT_5_6_5; + } + else { + fb_format = GL_BGRA; + fb_type = GL_UNSIGNED_INT_8_8_8_8_REV; + } + + configs = driCreateConfigs (fb_format, fb_type, + depth_bits_array, stencil_bits_array, + 1, back_buffer_modes, 2); + if (configs == NULL) { + fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", + __func__, __LINE__ ); + return NULL; + } + + return (const __DRIconfig **) configs; +} + +/* + * API + */ + +static void +r600DestroyScreen (__DRIscreenPrivate *sPriv) +{ + screen_t *screen = (screen_t *)sPriv->private; + if (screen->gartTextures.cpu) + { + r600DrmUnMap(screen->gartTextures.cpu, screen->gartTextures.size); + } + + r600FreeScreen (sPriv->private); + sPriv->private = NULL; +} + +static const __DRIconfig ** +r600InitScreen (__DRIscreenPrivate *sPriv) +{ + RADEONDRIPtr driPriv = (RADEONDRIPtr)sPriv->pDevPriv; + static const char *driver_name = "R600"; + static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 }; + static const __DRIversion dri_expected = { 4, 0, 0 }; + static const __DRIversion drm_expected = { 1, 29, 0 }; + static const struct dri_extension no_extensions[] = { { NULL, NULL } }; + + if (! driCheckDriDdxDrmVersions3 (driver_name, + &sPriv->dri_version, &dri_expected, + &sPriv->ddx_version, &ddx_expected, + &sPriv->drm_version, &drm_expected) ) { + return NULL; + } + /* Even if we don't support any extensions, this has to be called + * in order to init driDispatchRemapTable + * (otherwise glNewList dispatch vector gets nuked) */ + driInitExtensions (NULL, no_extensions, GL_FALSE); + + if (! (sPriv->private = r600CreateScreen (sPriv)) ) + return NULL; + +#if 0 + if (! r600InitDriver (sPriv)) { + r600DestroyScreen (sPriv); + return NULL; + } +#endif + + return r600FillInModes (sPriv, + driPriv->bpp, + (driPriv->bpp == 16) ? 16 : 24, + (driPriv->bpp == 16) ? 0 : 8); +} + + +static GLboolean +r600CreateBuffer (__DRIscreenPrivate *sPriv, + __DRIdrawablePrivate *dPriv, + const __GLcontextModes *mesaVis, + GLboolean isPixmap) +{ + screen_t *screen = (screen_t *) sPriv->private; + const GLboolean swAccum = mesaVis->accumRedBits > 0; + const GLboolean swStencil = mesaVis->stencilBits > 0 && mesaVis->depthBits != 24; + const int mesaDepth = mesaVis->depthBits == 24 ? + GL_DEPTH_COMPONENT24 : GL_DEPTH_COMPONENT16; + struct gl_framebuffer *fb; + + + if (isPixmap) + return GL_FALSE; /* not implemented */ + + fb = _mesa_create_framebuffer(mesaVis); + + /* front color renderbuffer */ + { + driRenderbuffer *frontRb + = driNewRenderbuffer (GL_RGBA, + screen->frontBuffer.cpu, screen->cpp, + screen->frontBuffer.cpu - screen->fb.cpu, + screen->frontBuffer.pitch, dPriv); + r600SetSpanFunctions (frontRb, mesaVis); + _mesa_add_renderbuffer (fb, BUFFER_FRONT_LEFT, &frontRb->Base); + } + + /* back color renderbuffer */ + if (mesaVis->doubleBufferMode) { + driRenderbuffer *backRb + = driNewRenderbuffer (GL_RGBA, + screen->backBuffer.cpu, screen->cpp, + screen->backBuffer.cpu - screen->fb.cpu, + screen->backBuffer.pitch, dPriv); + r600SetSpanFunctions (backRb, mesaVis); + _mesa_add_renderbuffer (fb, BUFFER_BACK_LEFT, &backRb->Base); + } + + /* depth renderbuffer */ + if (mesaVis->depthBits == 16 || mesaVis->depthBits == 24) { + driRenderbuffer *depthRb + = driNewRenderbuffer (mesaDepth, + screen->depthBuffer.cpu, screen->cpp, + screen->depthBuffer.cpu - screen->fb.cpu, + screen->depthBuffer.pitch, dPriv); + r600SetSpanFunctions (depthRb, mesaVis); + _mesa_add_renderbuffer (fb, BUFFER_DEPTH, &depthRb->Base); + depthRb->depthHasSurface = 1; + } + + /* stencil renderbuffer */ + if (mesaVis->stencilBits > 0 && !swStencil) { + driRenderbuffer *stencilRb + = driNewRenderbuffer (GL_STENCIL_INDEX8_EXT, + screen->depthBuffer.cpu, screen->cpp, + screen->depthBuffer.cpu - screen->fb.cpu, + screen->depthBuffer.pitch, dPriv); + r600SetSpanFunctions (stencilRb, mesaVis); + _mesa_add_renderbuffer (fb, BUFFER_STENCIL, &stencilRb->Base); + stencilRb->depthHasSurface = 1; + } + + _mesa_add_soft_renderbuffers (fb, + GL_FALSE, /* color */ + GL_FALSE, /* depth */ + swStencil, + swAccum, + GL_FALSE, /* alpha */ + GL_FALSE); /* aux */ + + _mesa_resize_framebuffer (NULL, fb, screen->driScreen->fbWidth, screen->driScreen->fbHeight); + + dPriv->driverPrivate = (void *) fb; + + return (fb != NULL); +} + + +static void +r600DestroyBuffer (__DRIdrawablePrivate *dPriv) +{ + _mesa_unreference_framebuffer ((GLframebuffer **) (&(dPriv->driverPrivate))); +} + + +/* TODO: temporary in software */ +static void +r600CopyBuffer (__DRIdrawablePrivate *dPriv) +{ + context_t *context = (context_t *) dPriv->driContextPriv->driverPrivate; + screen_t *screen = context->screen; + int i, y; + unsigned int frontPitch = screen->frontBuffer.pitch * screen->cpp; + unsigned int backPitch = screen->backBuffer.pitch * screen->cpp; + + GLcontext *ctx = ((context_t *) dPriv->driContextPriv->driverPrivate)->ctx; + + DEBUG_FUNC; + +/* LOCK_HARDWARE (context); */ + for (i = 0; i < dPriv->numClipRects; i++) + { + unsigned int size = (dPriv->pClipRects[i].x2 - dPriv->pClipRects[i].x1) * screen->cpp; + void *front = screen->frontBuffer.cpu + dPriv->pClipRects[i].x1 * screen->cpp; + void *back = screen->backBuffer.cpu + dPriv->pClipRects[i].x1 * screen->cpp; + DEBUGF ("ClipRect %d:%d-%d:%d\n", + dPriv->pClipRects[i].x1, dPriv->pClipRects[i].y1, + dPriv->pClipRects[i].x2, dPriv->pClipRects[i].y2); + for (y = dPriv->pClipRects[i].y1; y < dPriv->pClipRects[i].y2; y++) + { + memcpy (front + frontPitch * y, back + backPitch * y, size); + } + } +/* UNLOCK_HARDWARE(radeon); */ +} + +static void +r600SwapBuffers (__DRIdrawablePrivate *dPriv) +{ + GLcontext *ctx;; + + if (! dPriv->driContextPriv || !dPriv->driContextPriv->driverPrivate) { + _mesa_problem (NULL, "%s: drawable has no context!", __FUNCTION__); + return; + } + + ctx = ((context_t *) dPriv->driContextPriv->driverPrivate) ->ctx; + + if (ctx->Visual.doubleBufferMode) { + _mesa_notifySwapBuffers (ctx); /* flush pending rendering comands */ + r600CopyBuffer (dPriv); + } +} + + +const struct __DriverAPIRec driDriverAPI = { + .InitScreen = r600InitScreen, + .DestroyScreen = r600DestroyScreen, + .CreateContext = r600CreateContext, + .DestroyContext = r600DestroyContext, + .CreateBuffer = r600CreateBuffer, + .DestroyBuffer = r600DestroyBuffer, + .SwapBuffers = r600SwapBuffers, + .MakeCurrent = r600MakeCurrent, + .UnbindContext = r600UnbindContext, +// .GetSwapInfo = getSwapInfo, + .GetDrawableMSC = driDrawableGetMSC32, + .WaitForMSC = driWaitForMSC32, +// .WaitForSBC = NULL, +// .SwapBuffersMSC = NULL, +// .CopySubBuffer = NULL, /* driCopySubBufferExtension.base */ +}; + diff --git a/src/mesa/drivers/dri/r600/r600_screen.h b/src/mesa/drivers/dri/r600/r600_screen.h new file mode 100644 index 0000000000..83f11f848b --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_screen.h @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _R600_SCREEN_H +#define _R600_SCREEN_H + +#include "r600_common.h" +#include "r600_id.h" + +#include "radeon_drm.h" +#include "dri_util.h" + + +struct screen_s { + __DRIscreenPrivate *driScreen; + uint32_t sareaPrivOffset; + chip_t chip; + + /* DRM mapped memory by ourself */ + memmap_t regs; + memmap_t status; /* memory copy of ring read ptr, scratch regs */ + memmap_t gartTextures; + drmBufMapPtr buffers; + + /* Known pointers and externally mapped */ + mem_t fb; + mem_t bufs; + gpu_t gart; + __volatile__ uint32_t *scratch; + + /* Buffers */ + buffer_t vertBuffer; /* buffer for sending vertices */ + buffer_t ioBuffer; /* buffer for up/downloading data (textures) */ + + /* Targets */ + int cpp; + target_t frontBuffer; + target_t backBuffer; + target_t depthBuffer; + + /* Dimension */ + int width; + int height; + + /* Textures */ + mem_t textures; /* .gpu and .size only */ + + /* Shared texture data */ + int numTexHeaps; + int texOffset[RADEON_NR_TEX_HEAPS]; + int texSize[RADEON_NR_TEX_HEAPS]; + int logTexGranularity[RADEON_NR_TEX_HEAPS]; + + unsigned int gart_buffer_offset; /* offset in card memory space */ + unsigned int gart_texture_offset; /* offset in card memory space */ + +} ; + + +#endif /* _R600_SCREEN_H */ diff --git a/src/mesa/drivers/dri/r600/r600_span.c b/src/mesa/drivers/dri/r600/r600_span.c new file mode 100644 index 0000000000..a0e877bdbf --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_span.c @@ -0,0 +1,198 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: Matthias Hopf + */ + +#include "r600_span.h" + +#include "swrast/swrast.h" + +/* #include "r600_state.h" */ +/* #include "r600_ioctl.h" */ + +#define DBG 0 + +/* + * Note that all information needed to access pixels in a renderbuffer + * should be obtained through the gl_renderbuffer parameter, not per-context + * information. + */ +#define LOCAL_VARS \ + driRenderbuffer *drb = (driRenderbuffer *) rb; \ + const __DRIdrawablePrivate *dPriv = drb->dPriv; \ + const GLuint bottom = dPriv->h - 1; \ + GLubyte *buf = (GLubyte *) drb->flippedData \ + + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \ + GLuint p; \ + (void) p; + +#define LOCAL_DEPTH_VARS \ + driRenderbuffer *drb = (driRenderbuffer *) rb; \ + const __DRIdrawablePrivate *dPriv = drb->dPriv; \ + const GLuint bottom = dPriv->h - 1; \ + GLuint xo = dPriv->x; \ + GLuint yo = dPriv->y; \ + GLubyte *buf = (GLubyte *) drb->Base.Data; + +#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS + +#define Y_FLIP(Y) (bottom - (Y)) + +#define HW_LOCK() + +#define HW_UNLOCK() + +/* ================================================================ + * Color buffer + */ + +/* 16 bit, RGB565 color spanline and pixel functions */ +#define SPANTMP_PIXEL_FMT GL_RGB +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 + +#define TAG(x) r600##x##_RGB565 +#define TAG2(x,y) r600##x##_RGB565##y +#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2) +#include "spantmp2.h" + +/* 32 bit, ARGB8888 color spanline and pixel functions */ +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV + +#define TAG(x) r600##x##_ARGB8888 +#define TAG2(x,y) r600##x##_ARGB8888##y +#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4) +#include "spantmp2.h" + +/* ================================================================ + * Depth buffer + */ + +/* 16-bit depth buffer functions */ +#define VALUE_TYPE GLushort + +#define GET_PTR(X,Y) ((GLushort *)(buf + ((Y) * drb->pitch + (X)) * 2)) + +#define WRITE_DEPTH( _x, _y, d ) \ + *(GET_PTR ((_x)+xo, (_y)+yo)) = d; + +#define READ_DEPTH( d, _x, _y ) \ + d = *(GET_PTR ((_x)+xo, (_y)+yo)); + +#define TAG(x) r600##x##_z16 +#include "depthtmp.h" +#undef GET_PTR + +/* 24 bit depth, 8 bit stencil depthbuffer functions */ +#define VALUE_TYPE GLuint + +#define GET_PTR(X,Y) ((GLuint *)(buf + ((Y) * drb->pitch + (X)) * 4)) +#define WRITE_DEPTH( _x, _y, d ) \ + do { \ + GLuint *ptr = GET_PTR ((_x)+xo, (_y)+yo); \ + *ptr = (*ptr & 0xff) | ((d) << 8); \ + } while (0) + +#define READ_DEPTH( d, _x, _y ) \ + d = (*(GET_PTR ((_x)+xo, (_y)+yo))) >> 8 + +#define TAG(x) r600##x##_z24_s8 +#include "depthtmp.h" +#undef GET_PTR + +/* ================================================================ + * Stencil buffer + */ + +/* 24 bit depth, 8 bit stencil depthbuffer functions */ +#define GET_PTR(X,Y) ((GLuint *)(buf + ((Y) * drb->pitch + (X)) * 4)) +#define WRITE_STENCIL( _x, _y, d ) \ + do { \ + GLuint *ptr = GET_PTR ((_x)+xo, (_y)+yo); \ + *ptr = (*ptr & 0xffffff00) | ((d) & 0xff); \ + } while (0) + +#define READ_STENCIL( d, _x, _y ) \ + d = *(GET_PTR ((_x)+xo, (_y)+yo)) & 0xff + +#define TAG(x) r600##x##_z24_s8 +#include "stenciltmp.h" +#undef GET_PTR + +/* Move locking out to get reasonable span performance (10x better + * than doing this in HW_LOCK above). WaitForIdle() is the main + * culprit. */ + +static void +r600SpanRenderStart (GLcontext * ctx) +{ + R600_CONTEXT; +#if 0 + r600WaitForIdle (context); /* emit flush cache, emit fence, flush buffer, wait */ + LOCK_HARDWARE (context); +#endif +} + +static void +r600SpanRenderFinish(GLcontext * ctx) +{ + R600_CONTEXT; + +// _swrast_flush (ctx); // implicit +#if 0 + r600EmitFlushInputCache (context, context->target.rt.gpu, context->target.rt.size); + UNLOCK_HARDWARE (context); +#endif +} + +void +r600InitSpanFuncs (GLcontext * ctx) +{ + struct swrast_device_driver *swdd = + _swrast_GetDeviceDriverReference (ctx); + swdd->SpanRenderStart = r600SpanRenderStart; + swdd->SpanRenderFinish = r600SpanRenderFinish; +} + +/** + * Plug in the Get/Put routines for the given driRenderbuffer. + */ +void +r600SetSpanFunctions (driRenderbuffer * drb, const GLvisual * vis) +{ + if (drb->Base.InternalFormat == GL_RGBA) { + if (vis->redBits == 5 && vis->greenBits == 6 + && vis->blueBits == 5) { + r600InitPointers_RGB565(&drb->Base); + } else { + r600InitPointers_ARGB8888(&drb->Base); + } + } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) { + r600InitDepthPointers_z16(&drb->Base); + } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) { + r600InitDepthPointers_z24_s8(&drb->Base); + } else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) { + r600InitStencilPointers_z24_s8(&drb->Base); + } +} diff --git a/src/mesa/drivers/dri/r600/r600_span.h b/src/mesa/drivers/dri/r600/r600_span.h new file mode 100644 index 0000000000..6b0bf05caa --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_span.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Matthias Hopf + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com> + */ + +#ifndef __R600_SPAN_H__ +#define __R600_SPAN_H__ + +#include "r600_common.h" + +#include "drirenderbuffer.h" + +extern void r600InitSpanFuncs (GLcontext * ctx); +extern void r600SetSpanFunctions (driRenderbuffer * rb, const GLvisual * vis); + +#endif diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c new file mode 100644 index 0000000000..ac9487c5d2 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -0,0 +1,4067 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + */ + +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> + +#include "main/imports.h" + +#include "r700_interface.h" +#include "r700_chip.h" +#include "r700_debug.h" + +#include "r700_assembler.h" + +BITS addrmode_PVSDST(PVSDST * pPVSDST) +{ + return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1); +} + +void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode) +{ + pPVSDST->addrmode0 = addrmode & 1; + pPVSDST->addrmode1 = (addrmode >> 1) & 1; +} + +void nomask_PVSDST(PVSDST * pPVSDST) +{ + pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1; +} + +BITS addrmode_PVSSRC(PVSSRC* pPVSSRC) +{ + return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1); +} + +void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode) +{ + pPVSSRC->addrmode0 = addrmode & 1; + pPVSSRC->addrmode1 = (addrmode >> 1) & 1; +} + + +void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz) +{ + pPVSSRC->swizzlex = + pPVSSRC->swizzley = + pPVSSRC->swizzlez = + pPVSSRC->swizzlew = swz; +} + +void noswizzle_PVSSRC(PVSSRC* pPVSSRC) +{ + pPVSSRC->swizzlex = SQ_SEL_X; + pPVSSRC->swizzley = SQ_SEL_Y; + pPVSSRC->swizzlez = SQ_SEL_Z; + pPVSSRC->swizzlew = SQ_SEL_W; +} + +void +swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w) +{ + switch (x) + { + case SQ_SEL_X: x = pPVSSRC->swizzlex; + break; + case SQ_SEL_Y: x = pPVSSRC->swizzley; + break; + case SQ_SEL_Z: x = pPVSSRC->swizzlez; + break; + case SQ_SEL_W: x = pPVSSRC->swizzlew; + break; + default:; + } + + switch (y) + { + case SQ_SEL_X: y = pPVSSRC->swizzlex; + break; + case SQ_SEL_Y: y = pPVSSRC->swizzley; + break; + case SQ_SEL_Z: y = pPVSSRC->swizzlez; + break; + case SQ_SEL_W: y = pPVSSRC->swizzlew; + break; + default:; + } + + switch (z) + { + case SQ_SEL_X: z = pPVSSRC->swizzlex; + break; + case SQ_SEL_Y: z = pPVSSRC->swizzley; + break; + case SQ_SEL_Z: z = pPVSSRC->swizzlez; + break; + case SQ_SEL_W: z = pPVSSRC->swizzlew; + break; + default:; + } + + switch (w) + { + case SQ_SEL_X: w = pPVSSRC->swizzlex; + break; + case SQ_SEL_Y: w = pPVSSRC->swizzley; + break; + case SQ_SEL_Z: w = pPVSSRC->swizzlez; + break; + case SQ_SEL_W: w = pPVSSRC->swizzlew; + break; + default:; + } + + pPVSSRC->swizzlex = x; + pPVSSRC->swizzley = y; + pPVSSRC->swizzlez = z; + pPVSSRC->swizzlew = w; +} + +void neg_PVSSRC(PVSSRC* pPVSSRC) +{ + pPVSSRC->negx = 1; + pPVSSRC->negy = 1; + pPVSSRC->negz = 1; + pPVSSRC->negw = 1; +} + +void noneg_PVSSRC(PVSSRC* pPVSSRC) +{ + pPVSSRC->negx = 0; + pPVSSRC->negy = 0; + pPVSSRC->negz = 0; + pPVSSRC->negw = 0; +} + +// negate argument (for SUB instead of ADD and alike) +void flipneg_PVSSRC(PVSSRC* pPVSSRC) +{ + pPVSSRC->negx = !pPVSSRC->negx; + pPVSSRC->negy = !pPVSSRC->negy; + pPVSSRC->negz = !pPVSSRC->negz; + pPVSSRC->negw = !pPVSSRC->negw; +} + +void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c) +{ + switch (c) + { + case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break; + case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break; + case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break; + case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break; + default:; + } +} + +void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c) +{ + switch (c) + { + case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break; + case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break; + case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break; + case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break; + default:; + } +} + +BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0) +{ + return (pOutVTXFmt0->point_size | + pOutVTXFmt0->edge_flag | + pOutVTXFmt0->rta_index | + pOutVTXFmt0->kill_flag | + pOutVTXFmt0->viewport_index); +} + +BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) +{ + return (pFPOutFmt->depth | + pFPOutFmt->stencil_ref | + pFPOutFmt->mask | + pFPOutFmt->coverage_to_mask); +} + +GLboolean is_reduction_opcode(PVSDWORD* dest) +{ + if (dest->dst.op3 == 0) + { + if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE) ) + { + return GL_TRUE; + } + } + return GL_FALSE; +} + +GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size) +{ + GLuint format = FMT_INVALID; + GLuint uiElemSize = 0; + + switch (eType) + { + case GL_BYTE: + case GL_UNSIGNED_BYTE: + uiElemSize = 1; + switch(nChannels) + { + case 1: + format = FMT_8; break; + case 2: + format = FMT_8_8; break; + case 3: + format = FMT_8_8_8; break; + case 4: + format = FMT_8_8_8_8; break; + default: + break; + } + break; + + case GL_UNSIGNED_SHORT: + case GL_SHORT: + uiElemSize = 2; + switch(nChannels) + { + case 1: + format = FMT_16; break; + case 2: + format = FMT_16_16; break; + case 3: + format = FMT_16_16_16; break; + case 4: + format = FMT_16_16_16_16; break; + default: + break; + } + break; + + case GL_UNSIGNED_INT: + case GL_INT: + uiElemSize = 4; + switch(nChannels) + { + case 1: + format = FMT_32; break; + case 2: + format = FMT_32_32; break; + case 3: + format = FMT_32_32_32; break; + case 4: + format = FMT_32_32_32_32; break; + default: + break; + } + break; + + case GL_FLOAT: + uiElemSize = 4; + switch(nChannels) + { + case 1: + format = FMT_32_FLOAT; break; + case 2: + format = FMT_32_32_FLOAT; break; + case 3: + format = FMT_32_32_32_FLOAT; break; + case 4: + format = FMT_32_32_32_32_FLOAT; break; + default: + break; + } + break; + case GL_DOUBLE: + uiElemSize = 8; + switch(nChannels) + { + case 1: + format = FMT_32_FLOAT; break; + case 2: + format = FMT_32_32_FLOAT; break; + case 3: + format = FMT_32_32_32_FLOAT; break; + case 4: + format = FMT_32_32_32_32_FLOAT; break; + default: + break; + } + break; + default: + ; + //GL_ASSERT_NO_CASE(); + } + + if(NULL != pClient_size) + { + *pClient_size = uiElemSize * nChannels; + } + + return(format); +} + +unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) +{ + if(pAsm->D.dst.op3) + { + return 3; + } + + switch (pAsm->D.dst.opcode) + { + case SQ_OP2_INST_ADD: + case SQ_OP2_INST_MUL: + case SQ_OP2_INST_MAX: + case SQ_OP2_INST_MIN: + //case SQ_OP2_INST_MAX_DX10: + //case SQ_OP2_INST_MIN_DX10: + case SQ_OP2_INST_SETGT: + case SQ_OP2_INST_SETGE: + case SQ_OP2_INST_PRED_SETE: + case SQ_OP2_INST_PRED_SETGT: + case SQ_OP2_INST_PRED_SETGE: + case SQ_OP2_INST_PRED_SETNE: + case SQ_OP2_INST_DOT4: + case SQ_OP2_INST_DOT4_IEEE: + return 2; + + case SQ_OP2_INST_MOV: + case SQ_OP2_INST_FRACT: + case SQ_OP2_INST_FLOOR: + case SQ_OP2_INST_KILLGT: + case SQ_OP2_INST_EXP_IEEE: + case SQ_OP2_INST_LOG_CLAMPED: + case SQ_OP2_INST_LOG_IEEE: + case SQ_OP2_INST_RECIP_IEEE: + case SQ_OP2_INST_RECIPSQRT_IEEE: + case SQ_OP2_INST_FLT_TO_INT: + case SQ_OP2_INST_SIN: + case SQ_OP2_INST_COS: + return 1; + + default: r700_error(TODO_ASM_NEEDIMPINST, + "Need instruction operand number. \n");; + }; + + return 3; +} + +int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader) +{ + GLuint i; + + Init_R700_Shader(pShader); + pAsm->pR700Shader = pShader; + pAsm->currentShaderType = spt; + + pAsm->cf_last_export_ptr = NULL; + + pAsm->cf_current_export_clause_ptr = NULL; + pAsm->cf_current_alu_clause_ptr = NULL; + pAsm->cf_current_tex_clause_ptr = NULL; + pAsm->cf_current_vtx_clause_ptr = NULL; + pAsm->cf_current_cf_clause_ptr = NULL; + + // No clause has been created yet + pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; + + pAsm->number_of_colorandz_exports = 0; + pAsm->number_of_exports = 0; + pAsm->number_of_export_opcodes = 0; + + + pAsm->D.bits = 0; + pAsm->S[0].bits = 0; + pAsm->S[1].bits = 0; + pAsm->S[2].bits = 0; + + pAsm->uLastPosUpdate = 0; + + *(BITS *) &pAsm->fp_stOutFmt0 = 0; + + pAsm->uIIns = 0; + pAsm->uOIns = 0; + pAsm->number_used_registers = 0; + pAsm->uUsedConsts = 256; + + + // Fragment programs + pAsm->uBoolConsts = 0; + pAsm->uIntConsts = 0; + pAsm->uInsts = 0; + pAsm->uConsts = 0; + + pAsm->FCSP = 0; + pAsm->fc_stack[0].type = FC_NONE; + + pAsm->branch_depth = 0; + pAsm->max_branch_depth = 0; + + pAsm->aArgSubst[0] = + pAsm->aArgSubst[1] = + pAsm->aArgSubst[2] = + pAsm->aArgSubst[3] = (-1); + + pAsm->uOutputs = 0; + + for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++) + { + pAsm->color_export_register_number[i] = (-1); + } + + + pAsm->depth_export_register_number = (-1); + pAsm->stencil_export_register_number = (-1); + pAsm->coverage_to_mask_export_register_number = (-1); + pAsm->mask_export_register_number = (-1); + + pAsm->starting_export_register_number = 0; + pAsm->starting_vfetch_register_number = 0; + pAsm->starting_temp_register_number = 0; + pAsm->uFirstHelpReg = 0; + + + pAsm->input_position_is_used = GL_FALSE; + pAsm->input_normal_is_used = GL_FALSE; + + + for (i=0; i<NUMBER_OF_INPUT_COLORS; i++) + { + pAsm->input_color_is_used[ i ] = GL_FALSE; + } + + for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++) + { + pAsm->input_texture_unit_is_used[ i ] = GL_FALSE; + } + + for (i=0; i<VERT_ATTRIB_MAX; i++) + { + pAsm->vfetch_instruction_ptr_array[ i ] = NULL; + } + + pAsm->number_of_inputs = 0; + + return 0; +} + +GLboolean IsTex(gl_inst_opcode Opcode) +{ + if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) ) + { + return GL_TRUE; + } + return GL_FALSE; +} + +GLboolean IsAlu(gl_inst_opcode Opcode) +{ + //TODO : more for fc and ex for higher spec. + if( IsTex(Opcode) ) + { + return GL_FALSE; + } + return GL_TRUE; +} + +int check_current_clause(r700_AssemblerBase* pAsm, + CF_CLAUSE_TYPE new_clause_type) +{ + if (pAsm->cf_current_clause_type != new_clause_type) + { //Close last open clause + switch (pAsm->cf_current_clause_type) + { + case CF_ALU_CLAUSE: + if ( pAsm->cf_current_alu_clause_ptr != NULL) + { + pAsm->cf_current_alu_clause_ptr = NULL; + } + break; + case CF_VTX_CLAUSE: + if ( pAsm->cf_current_vtx_clause_ptr != NULL) + { + pAsm->cf_current_vtx_clause_ptr = NULL; + } + break; + case CF_TEX_CLAUSE: + if ( pAsm->cf_current_tex_clause_ptr != NULL) + { + pAsm->cf_current_tex_clause_ptr = NULL; + } + break; + case CF_EXPORT_CLAUSE: + if ( pAsm->cf_current_export_clause_ptr != NULL) + { + pAsm->cf_current_export_clause_ptr = NULL; + } + break; + case CF_OTHER_CLAUSE: + if ( pAsm->cf_current_cf_clause_ptr != NULL) + { + pAsm->cf_current_cf_clause_ptr = NULL; + } + break; + case CF_EMPTY_CLAUSE: + break; + default: + r700_error(ERROR_ASM_VTX_CLAUSE, + "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type); + return GL_FALSE; + } + + pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; + + // Create new clause + switch (new_clause_type) + { + case CF_ALU_CLAUSE: + pAsm->cf_current_clause_type = CF_ALU_CLAUSE; + break; + case CF_VTX_CLAUSE: + pAsm->cf_current_clause_type = CF_VTX_CLAUSE; + break; + case CF_TEX_CLAUSE: + pAsm->cf_current_clause_type = CF_TEX_CLAUSE; + break; + case CF_EXPORT_CLAUSE: + { + R700ControlFlowSXClause* pR700ControlFlowSXClause + = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause); + + // Add new export instruction to control flow program + if (pR700ControlFlowSXClause != 0) + { + pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause; + Init_R700ControlFlowSXClause(pR700ControlFlowSXClause); + AddCFInstruction( pAsm->pR700Shader, + (R700ControlFlowInstruction *)pR700ControlFlowSXClause ); + } + else + { + r700_error(ERROR_ASM_ALLOCEXPORTCF, + "Error allocating new EXPORT CF instruction in check_current_clause. \n"); + return GL_FALSE; + } + pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE; + } + break; + case CF_EMPTY_CLAUSE: + break; + case CF_OTHER_CLAUSE: + pAsm->cf_current_clause_type = CF_OTHER_CLAUSE; + break; + default: + r700_error(ERROR_ASM_UNKOWNCLAUSE, + "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type); + return GL_FALSE; + } + } + + return GL_TRUE; +} + +GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm, + R700VertexInstruction* vertex_instruction_ptr) +{ + if( GL_FALSE == check_current_clause(pAsm, CF_VTX_CLAUSE) ) + { + return GL_FALSE; + } + + if( pAsm->cf_current_vtx_clause_ptr == NULL || + ( (pAsm->cf_current_vtx_clause_ptr != NULL) && + (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1) + ) ) + { + // Create new Vfetch control flow instruction for this new clause + pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause); + + if (pAsm->cf_current_vtx_clause_ptr != NULL) + { + Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr); + AddCFInstruction( pAsm->pR700Shader, + (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr ); + } + else + { + r700_error(ERROR_ASM_ALLOCVTXCF, "Could not allocate a new VFetch CF instruction."); + return GL_FALSE; + } + + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1; + + LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr ); + } + else + { + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++; + } + + AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr); + + return GL_TRUE; +} + +GLboolean add_tex_instruction(r700_AssemblerBase* pAsm, + R700TextureInstruction* tex_instruction_ptr) +{ + if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) ) + { + return GL_FALSE; + } + + if ( pAsm->cf_current_tex_clause_ptr == NULL || + ( (pAsm->cf_current_tex_clause_ptr != NULL) && + (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1) + ) ) + { + // new tex cf instruction for this new clause + pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause); + + if (pAsm->cf_current_tex_clause_ptr != NULL) + { + Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr); + AddCFInstruction( pAsm->pR700Shader, + (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr ); + } + else + { + r700_error(ERROR_ASM_ALLOCTEXCF, "Could not allocate a new TEX CF instruction."); + return GL_FALSE; + } + + pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1; + } + else + { + pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++; + } + + // If this clause constains any TEX instruction that is dependent on a previous instruction, + // set the barrier bit + if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) ) + { + pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1; + } + + if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction) + { + pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr; + tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr; + } + + AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr); + + return GL_TRUE; +} + +GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm, + GLuint gl_client_id, + GLuint destination_register, + GLuint number_of_elements, + GLenum dataElementType, + VTX_FETCH_METHOD* pFetchMethod) +{ + GLuint client_size_inbyte; + GLuint data_format; + GLuint mega_fetch_count; + GLuint is_mega_fetch_flag; + + R700VertexGenericFetch* vfetch_instruction_ptr; + R700VertexGenericFetch* assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ]; + + if (assembled_vfetch_instruction_ptr == NULL) + { + vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch); + if (vfetch_instruction_ptr == NULL) + { + return GL_FALSE; + } + Init_R700VertexGenericFetch(vfetch_instruction_ptr); + } + else + { + vfetch_instruction_ptr = assembled_vfetch_instruction_ptr; + } + + data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte); + + if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here + { + //TODO : mini fetch + } + else + { + mega_fetch_count = MEGA_FETCH_BYTES - 1; + is_mega_fetch_flag = 0x1; + pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte; + } + + vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH; + vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA; + vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0; + + vfetch_instruction_ptr->m_Word0.f.buffer_id = gl_client_id; + vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0; + vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE; + vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X; + vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count; + + vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X; + vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y; + vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z; + vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W; + + vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1; + + // Destination register + vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register; + vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE; + + vfetch_instruction_ptr->m_Word2.f.offset = 0; + vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0; + + vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag; + + if (assembled_vfetch_instruction_ptr == NULL) + { + if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) ) + { + return GL_FALSE; + } + + if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL) + { + return GL_FALSE; + } + else + { + pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr; + } + } + + return GL_TRUE; +} + +GLuint gethelpr(r700_AssemblerBase* pAsm) +{ + GLuint r = pAsm->uHelpReg; + pAsm->uHelpReg++; + if (pAsm->uHelpReg > pAsm->number_used_registers) + { + pAsm->number_used_registers = pAsm->uHelpReg; + } + return r; +} +void resethelpr(r700_AssemblerBase* pAsm) +{ + pAsm->uHelpReg = pAsm->uFirstHelpReg; +} + +void checkop_init(r700_AssemblerBase* pAsm) +{ + resethelpr(pAsm); + pAsm->aArgSubst[0] = + pAsm->aArgSubst[1] = + pAsm->aArgSubst[2] = + pAsm->aArgSubst[3] = -1; +} + +GLboolean mov_temp(r700_AssemblerBase* pAsm, int src) +{ + GLuint tmp = gethelpr(pAsm); + + //mov src to temp helper gpr. + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + + nomask_PVSDST(&(pAsm->D.dst)); + + if( GL_FALSE == assemble_src(pAsm, src, 0) ) + { + return GL_FALSE; + } + + noswizzle_PVSSRC(&(pAsm->S[0].src)); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->aArgSubst[1 + src] = tmp; + + return GL_TRUE; +} + +GLboolean checkop1(r700_AssemblerBase* pAsm) +{ + checkop_init(pAsm); + return GL_TRUE; +} + +GLboolean checkop2(r700_AssemblerBase* pAsm) +{ + GLboolean bSrcConst[2]; + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + checkop_init(pAsm); + + if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || + (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) || + (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) || + (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) ) + { + bSrcConst[0] = GL_TRUE; + } + else + { + bSrcConst[0] = GL_FALSE; + } + if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || + (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) || + (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) || + (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) ) + { + bSrcConst[1] = GL_TRUE; + } + else + { + bSrcConst[1] = GL_FALSE; + } + + if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) ) + { + if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index) + { + if( GL_FALSE == mov_temp(pAsm, 1) ) + { + return GL_FALSE; + } + } + } + + return GL_TRUE; +} + +GLboolean checkop3(r700_AssemblerBase* pAsm) +{ + GLboolean bSrcConst[3]; + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + checkop_init(pAsm); + + if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || + (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) || + (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) || + (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) ) + { + bSrcConst[0] = GL_TRUE; + } + else + { + bSrcConst[0] = GL_FALSE; + } + if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || + (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) || + (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) || + (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) ) + { + bSrcConst[1] = GL_TRUE; + } + else + { + bSrcConst[1] = GL_FALSE; + } + if( (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) || + (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) || + (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) || + (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) ) + { + bSrcConst[2] = GL_TRUE; + } + else + { + bSrcConst[2] = GL_FALSE; + } + + if( (GL_TRUE == bSrcConst[0]) && + (GL_TRUE == bSrcConst[1]) && + (GL_TRUE == bSrcConst[2]) ) + { + if( GL_FALSE == mov_temp(pAsm, 1) ) + { + return GL_FALSE; + } + if( GL_FALSE == mov_temp(pAsm, 2) ) + { + return GL_FALSE; + } + + return GL_TRUE; + } + else if( (GL_TRUE == bSrcConst[0]) && + (GL_TRUE == bSrcConst[1]) ) + { + if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index) + { + if( GL_FALSE == mov_temp(pAsm, 1) ) + { + return 1; + } + } + + return GL_TRUE; + } + else if ( (GL_TRUE == bSrcConst[0]) && + (GL_TRUE == bSrcConst[2]) ) + { + if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index) + { + if( GL_FALSE == mov_temp(pAsm, 2) ) + { + return GL_FALSE; + } + } + + return GL_TRUE; + } + else if( (GL_TRUE == bSrcConst[1]) && + (GL_TRUE == bSrcConst[2]) ) + { + if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index) + { + if( GL_FALSE == mov_temp(pAsm, 2) ) + { + return GL_FALSE; + } + } + + return GL_TRUE; + } + + return GL_TRUE; +} + +GLboolean assemble_src(r700_AssemblerBase *pAsm, + int src, + int fld) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + if (fld == -1) + { + fld = src; + } + + if(pAsm->aArgSubst[1+src] >= 0) + { + setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); + pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[fld].src.reg = pAsm->aArgSubst[1+src]; + } + else + { + switch (pILInst->SrcReg[src].File) + { + case PROGRAM_TEMPORARY: + setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); + pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number; + break; + case PROGRAM_CONSTANT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + if (1 == pILInst->SrcReg[src].RelAddr) + { + setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0); + } + else + { + setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); + } + + pAsm->S[fld].src.rtype = SRC_REG_CONSTANT; + pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index; + break; + case PROGRAM_INPUT: + setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); + pAsm->S[fld].src.rtype = SRC_REG_INPUT; + switch (pAsm->currentShaderType) + { + case SPT_FP: + pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index]; + break; + case SPT_VP: + pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index]; + break; + } + break; + default: + r700_error(ERROR_ASM_SRCARGUMENT, "Invalid source argument type"); + return GL_FALSE; + } + } + + pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7; + pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7; + pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7; + pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7; + + pAsm->S[fld].src.negx = pILInst->SrcReg[src].NegateBase & 0x1; + pAsm->S[fld].src.negy = (pILInst->SrcReg[src].NegateBase >> 1) & 0x1; + pAsm->S[fld].src.negz = (pILInst->SrcReg[src].NegateBase >> 2) & 0x1; + pAsm->S[fld].src.negw = (pILInst->SrcReg[src].NegateBase >> 3) & 0x1; + + return GL_TRUE; +} + +GLboolean assemble_dst(r700_AssemblerBase *pAsm) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + switch (pILInst->DstReg.File) + { + case PROGRAM_TEMPORARY: + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number; + break; + case PROGRAM_ADDRESS: + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_A0; + pAsm->D.dst.reg = 0; + break; + case PROGRAM_OUTPUT: + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_OUT; + switch (pAsm->currentShaderType) + { + case SPT_FP: + pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index]; + break; + case SPT_VP: + pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index]; + break; + } + break; + default: + r700_error(ERROR_ASM_DSTARGUMENT, "Invalid destination output argument type"); + return GL_FALSE; + } + + pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1; + pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1; + pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1; + pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1; + + return GL_TRUE; +} + +GLboolean tex_dst(r700_AssemblerBase *pAsm) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + if(PROGRAM_TEMPORARY == pILInst->DstReg.File) + { + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + } + else if(PROGRAM_OUTPUT == pILInst->DstReg.File) + { + pAsm->D.dst.rtype = DST_REG_OUT; + switch (pAsm->currentShaderType) + { + case SPT_FP: + pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index]; + break; + case SPT_VP: + pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index]; + break; + } + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + } + else + { + r700_error(ERROR_ASM_DSTARGUMENT, "Invalid destination output argument type"); + return GL_FALSE; + } + + pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1; + pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1; + pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1; + pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1; + + return GL_TRUE; +} + +GLboolean tex_src(r700_AssemblerBase *pAsm) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + GLboolean bValidTexCoord = GL_FALSE; + + switch (pILInst->SrcReg[0].File) + { + case PROGRAM_TEMPORARY: + bValidTexCoord = GL_TRUE; + + pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + pAsm->starting_temp_register_number; + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + + break; + case PROGRAM_INPUT: + switch (pILInst->SrcReg[0].Index) + { + case FRAG_ATTRIB_COL0: + case FRAG_ATTRIB_COL1: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + + pAsm->S[0].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + } + break; + } + + if(GL_TRUE == bValidTexCoord) + { + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + } + else + { + r700_error(ERROR_ASM_BADTEXSRC, "Invalid source texcoord for TEX instruction"); + return GL_FALSE; + } + + pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7; + pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7; + pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7; + pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7; + + pAsm->S[0].src.negx = pILInst->SrcReg[0].NegateBase & 0x1; + pAsm->S[0].src.negy = (pILInst->SrcReg[0].NegateBase >> 1) & 0x1; + pAsm->S[0].src.negz = (pILInst->SrcReg[0].NegateBase >> 2) & 0x1; + pAsm->S[0].src.negw = (pILInst->SrcReg[0].NegateBase >> 3) & 0x1; + + return GL_TRUE; +} + +GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm) +{ + PVSSRC * texture_coordinate_source; + PVSSRC * texture_unit_source; + + R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction); + if (tex_instruction_ptr == NULL) + { + return GL_FALSE; + } + Init_R700TextureInstruction(tex_instruction_ptr); + + texture_coordinate_source = &(pAsm->S[0].src); + texture_unit_source = &(pAsm->S[1].src); + + tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode; + tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0; + tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0; + + tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg; + + tex_instruction_ptr->m_Word1.f.lod_bias = 0x0; + tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED; + + tex_instruction_ptr->m_Word2.f.offset_x = 0x0; + tex_instruction_ptr->m_Word2.f.offset_y = 0x0; + tex_instruction_ptr->m_Word2.f.offset_z = 0x0; + + tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg; + + // dst + if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || + (pAsm->D.dst.rtype == DST_REG_OUT) ) + { + tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg; + tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE; + + tex_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; + tex_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; + + tex_instruction_ptr->m_Word1.f.dst_sel_x = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK); + tex_instruction_ptr->m_Word1.f.dst_sel_y = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK); + tex_instruction_ptr->m_Word1.f.dst_sel_z = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK); + tex_instruction_ptr->m_Word1.f.dst_sel_w = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK); + + + tex_instruction_ptr->m_Word2.f.src_sel_x = texture_coordinate_source->swizzlex; + tex_instruction_ptr->m_Word2.f.src_sel_y = texture_coordinate_source->swizzley; + tex_instruction_ptr->m_Word2.f.src_sel_z = texture_coordinate_source->swizzlez; + tex_instruction_ptr->m_Word2.f.src_sel_w = texture_coordinate_source->swizzlew; + } + else + { + r700_error(ERROR_ASM_TEXDSTBADTYPE, "Only temp destination registers supported for TEX dest regs."); + return GL_FALSE; + } + + if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +void initialize(r700_AssemblerBase *pAsm) +{ + GLuint cycle, component; + + for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++) + { + for (component=0; component<NUMBER_OF_COMPONENTS; component++) + { + pAsm->hw_gpr[cycle][component] = (-1); + } + } + for (component=0; component<NUMBER_OF_COMPONENTS; component++) + { + pAsm->hw_cfile_addr[component] = (-1); + pAsm->hw_cfile_chan[component] = (-1); + } +} + +GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, + int source_index, + PVSSRC* pSource, + BITS scalar_channel_index) +{ + BITS src_sel; + BITS src_rel; + BITS src_chan; + BITS src_neg; + + //-------------------------------------------------------------------------- + // Source for operands src0, src1. + // Values [0,127] correspond to GPR[0..127]. + // Values [256,511] correspond to cfile constants c[0..255]. + + //-------------------------------------------------------------------------- + // Other special values are shown in the list below. + + // 248 SQ_ALU_SRC_0: special constant 0.0. + // 249 SQ_ALU_SRC_1: special constant 1.0 float. + + // 250 SQ_ALU_SRC_1_INT: special constant 1 integer. + // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. + + // 252 SQ_ALU_SRC_0_5: special constant 0.5 float. + // 253 SQ_ALU_SRC_LITERAL: literal constant. + + // 254 SQ_ALU_SRC_PV: previous vector result. + // 255 SQ_ALU_SRC_PS: previous scalar result. + //-------------------------------------------------------------------------- + + BITS channel_swizzle; + switch (scalar_channel_index) + { + case 0: channel_swizzle = pSource->swizzlex; break; + case 1: channel_swizzle = pSource->swizzley; break; + case 2: channel_swizzle = pSource->swizzlez; break; + case 3: channel_swizzle = pSource->swizzlew; break; + default: channel_swizzle = SQ_SEL_MASK; break; + } + + if(channel_swizzle == SQ_SEL_0) + { + src_sel = SQ_ALU_SRC_0; + } + else if (channel_swizzle == SQ_SEL_1) + { + src_sel = SQ_ALU_SRC_1; + } + else + { + if ( (pSource->rtype == SRC_REG_TEMPORARY) || + (pSource->rtype == SRC_REG_INPUT) + ) + { + src_sel = pSource->reg; + } + else if (pSource->rtype == SRC_REG_CONSTANT) + { + src_sel = pSource->reg + CFILE_REGISTER_OFFSET; + } + else + { + r700_error(ERROR_ASM_ALUSRCBADTYPE, "Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.", + source_index, pSource->rtype); + return GL_FALSE; + } + } + + if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) ) + { + src_rel = SQ_ABSOLUTE; + } + else + { + src_rel = SQ_RELATIVE; + } + + switch (channel_swizzle) + { + case SQ_SEL_X: + src_chan = SQ_CHAN_X; + break; + case SQ_SEL_Y: + src_chan = SQ_CHAN_Y; + break; + case SQ_SEL_Z: + src_chan = SQ_CHAN_Z; + break; + case SQ_SEL_W: + src_chan = SQ_CHAN_W; + break; + case SQ_SEL_0: + case SQ_SEL_1: + // Does not matter since src_sel controls + src_chan = SQ_CHAN_X; + break; + default: + r700_error(ERROR_ASM_ALUSRCSELECT, "Unknown source select value (%d) in assemble_alu_src()."); + return GL_FALSE; + break; + } + + switch (scalar_channel_index) + { + case 0: src_neg = pSource->negx; break; + case 1: src_neg = pSource->negy; break; + case 2: src_neg = pSource->negz; break; + case 3: src_neg = pSource->negw; break; + default: src_neg = 0; break; + } + + switch (source_index) + { + case 0: + alu_instruction_ptr->m_Word0.f.src0_sel = src_sel; + alu_instruction_ptr->m_Word0.f.src0_rel = src_rel; + alu_instruction_ptr->m_Word0.f.src0_chan = src_chan; + alu_instruction_ptr->m_Word0.f.src0_neg = src_neg; + break; + case 1: + alu_instruction_ptr->m_Word0.f.src1_sel = src_sel; + alu_instruction_ptr->m_Word0.f.src1_rel = src_rel; + alu_instruction_ptr->m_Word0.f.src1_chan = src_chan; + alu_instruction_ptr->m_Word0.f.src1_neg = src_neg; + break; + case 2: + alu_instruction_ptr->m_Word1_OP3.f.src2_sel = src_sel; + alu_instruction_ptr->m_Word1_OP3.f.src2_rel = src_rel; + alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan; + alu_instruction_ptr->m_Word1_OP3.f.src2_neg = src_neg; + break; + default: + r700_error(ERROR_ASM_ALUSRCNUMBER, "Only three sources allowed in ALU opcodes."); + return GL_FALSE; + break; + } + + return GL_TRUE; +} + +GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, + R700ALUInstruction* alu_instruction_ptr, + GLuint contiguous_slots_needed) +{ + if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) ) + { + return GL_FALSE; + } + + if ( pAsm->cf_current_alu_clause_ptr == NULL || + ( (pAsm->cf_current_alu_clause_ptr != NULL) && + (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) ) + ) ) + { + + //new cf inst for this clause + pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause); + + // link the new cf to cf segment + if(NULL != pAsm->cf_current_alu_clause_ptr) + { + Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr); + AddCFInstruction( pAsm->pR700Shader, + (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr ); + } + else + { + r700_error(ERROR_ASM_ALLOCALUCF, "Could not allocate a new ALU CF instruction."); + return GL_FALSE; + } + + pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0; + pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0; + pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP; + + pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP; + pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0; + pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0; + + //cf_current_alu_clause_ptr->m_Word1.f.count = number_of_scalar_operations - 1; + pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0; + pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU; + + pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier = 0x1; + } + else + { + pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++; + } + + // If this clause constains any instruction that is forward dependent on a TEX instruction, + // set the whole_quad_mode for this clause + if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) ) + { + pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1; + } + + if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) ) + { + alu_instruction_ptr->m_Word0.f.last = 1; + } + + if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction) + { + pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr; + alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr; + } + + AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr); + + return GL_TRUE; +} + +void get_src_properties(R700ALUInstruction* alu_instruction_ptr, + int source_index, + BITS* psrc_sel, + BITS* psrc_rel, + BITS* psrc_chan, + BITS* psrc_neg) +{ + switch (source_index) + { + case 0: + *psrc_sel = alu_instruction_ptr->m_Word0.f.src0_sel ; + *psrc_rel = alu_instruction_ptr->m_Word0.f.src0_rel ; + *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan; + *psrc_neg = alu_instruction_ptr->m_Word0.f.src0_neg ; + break; + + case 1: + *psrc_sel = alu_instruction_ptr->m_Word0.f.src1_sel ; + *psrc_rel = alu_instruction_ptr->m_Word0.f.src1_rel ; + *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan; + *psrc_neg = alu_instruction_ptr->m_Word0.f.src1_neg ; + break; + + case 2: + *psrc_sel = alu_instruction_ptr->m_Word1_OP3.f.src2_sel; + *psrc_rel = alu_instruction_ptr->m_Word1_OP3.f.src2_rel; + *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan; + *psrc_neg = alu_instruction_ptr->m_Word1_OP3.f.src2_neg; + break; + } +} + +int is_cfile(BITS sel) +{ + if (sel > 255 && sel < 512) + { + return 1; + } + return 0; +} + +int is_const(BITS sel) +{ + if (is_cfile(sel)) + { + return 1; + } + else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL) + { + return 1; + } + return 0; +} + +int is_gpr(BITS sel) +{ + if (sel >= 0 && sel < 128) + { + return 1; + } + return 0; +} + +const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210, //000 + SQ_ALU_VEC_120, //001 + SQ_ALU_VEC_102, //010 + + SQ_ALU_VEC_201, //011 + SQ_ALU_VEC_012, //100 + SQ_ALU_VEC_021, //101 + + SQ_ALU_VEC_012, //110 + SQ_ALU_VEC_012}; //111 + +const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210, //000 + SQ_ALU_SCL_122, //001 + SQ_ALU_SCL_122, //010 + + SQ_ALU_SCL_221, //011 + SQ_ALU_SCL_212, //100 + SQ_ALU_SCL_122, //101 + + SQ_ALU_SCL_122, //110 + SQ_ALU_SCL_122}; //111 + +GLboolean reserve_cfile(r700_AssemblerBase* pAsm, + GLuint sel, + GLuint chan) +{ + int res_match = (-1); + int res_empty = (-1); + + GLint res; + + for (res=3; res>=0; res--) + { + if(pAsm->hw_cfile_addr[ res] < 0) + { + res_empty = res; + } + else if( (pAsm->hw_cfile_addr[res] == (int)sel) + && + (pAsm->hw_cfile_chan[ res ] == (int) chan) ) + { + res_match = res; + } + } + + if(res_match >= 0) + { + // Read for this scalar component already reserved, nothing to do here. + ; + } + else if(res_empty >= 0) + { + pAsm->hw_cfile_addr[ res_empty ] = sel; + pAsm->hw_cfile_chan[ res_empty ] = chan; + } + else + { + r700_error(ERROR_ASM_CONSTCHANNEL, "All cfile read ports are used, cannot reference C$sel, channel $chan."); + return GL_FALSE; + } + return GL_TRUE; +} + +GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle) +{ + if(pAsm->hw_gpr[cycle][chan] < 0) + { + pAsm->hw_gpr[cycle][chan] = sel; + } + else if(pAsm->hw_gpr[cycle][chan] != (int)sel) + { + r700_error(ERROR_ASM_BADGPRRESERVE, "Another scalar operation has already used GPR read port for given channel"); + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle) +{ + switch (swiz) + { + case SQ_ALU_SCL_210: + { + int table[3] = {2, 1, 0}; + *pCycle = table[sel]; + return GL_TRUE; + } + break; + case SQ_ALU_SCL_122: + { + int table[3] = {1, 2, 2}; + *pCycle = table[sel]; + return GL_TRUE; + } + break; + case SQ_ALU_SCL_212: + { + int table[3] = {2, 1, 2}; + *pCycle = table[sel]; + return GL_TRUE; + } + break; + case SQ_ALU_SCL_221: + { + int table[3] = {2, 2, 1}; + *pCycle = table[sel]; + return GL_TRUE; + } + break; + default: + r700_error(ERROR_ASM_BADSCALARBZ, "Bad Scalar bank swizzle value"); + break; + } + + return GL_FALSE; +} + +GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle) +{ + switch (swiz) + { + case SQ_ALU_VEC_012: + { + int table[3] = {0, 1, 2}; + *pCycle = table[sel]; + } + break; + case SQ_ALU_VEC_021: + { + int table[3] = {0, 2, 1}; + *pCycle = table[sel]; + } + break; + case SQ_ALU_VEC_120: + { + int table[3] = {1, 2, 0}; + *pCycle = table[sel]; + } + break; + case SQ_ALU_VEC_102: + { + int table[3] = {1, 0, 2}; + *pCycle = table[sel]; + } + break; + case SQ_ALU_VEC_201: + { + int table[3] = {2, 0, 1}; + *pCycle = table[sel]; + } + break; + case SQ_ALU_VEC_210: + { + int table[3] = {2, 1, 0}; + *pCycle = table[sel]; + } + break; + default: + r700_error(ERROR_ASM_BADVECTORBZ, "Bad Vec bank swizzle value"); + return GL_FALSE; + break; + } + + return GL_TRUE; +} + +GLboolean check_scalar(r700_AssemblerBase* pAsm, + R700ALUInstruction* alu_instruction_ptr) +{ + GLuint cycle; + GLuint bank_swizzle; + GLuint const_count = 0; + + BITS sel; + BITS chan; + BITS rel; + BITS neg; + + GLuint src; + + BITS src_sel [3] = {0,0,0}; + BITS src_chan[3] = {0,0,0}; + BITS src_rel [3] = {0,0,0}; + BITS src_neg [3] = {0,0,0}; + + GLuint swizzle_key; + + GLuint number_of_operands = r700GetNumOperands(pAsm); + + for (src=0; src<number_of_operands; src++) + { + get_src_properties(alu_instruction_ptr, + src, + &(src_sel[src]), + &(src_rel[src]), + &(src_chan[src]), + &(src_neg[src]) ); + } + + + swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) + + (is_const( src_sel[1] ) ? 2 : 0) + + (is_const( src_sel[2] ) ? 1 : 0) ); + + alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ]; + + for (src=0; src<number_of_operands; src++) + { + sel = src_sel [src]; + chan = src_chan[src]; + rel = src_rel [src]; + neg = src_neg [src]; + + if (is_const( sel )) + { + // Any constant, including literal and inline constants + const_count++; + + if (is_cfile( sel )) + { + reserve_cfile(pAsm, sel, chan); + } + + } + } + + for (src=0; src<number_of_operands; src++) + { + sel = src_sel [src]; + chan = src_chan[src]; + rel = src_rel [src]; + neg = src_neg [src]; + + if( is_gpr(sel) ) + { + bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle; + + if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) ) + { + return GL_FALSE; + } + + if(cycle < const_count) + { + if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) ) + { + return GL_FALSE; + } + } + } + } + + return GL_TRUE; +} + +GLboolean check_vector(r700_AssemblerBase* pAsm, + R700ALUInstruction* alu_instruction_ptr) +{ + GLuint cycle; + GLuint bank_swizzle; + GLuint const_count = 0; + + GLuint src; + + BITS sel; + BITS chan; + BITS rel; + BITS neg; + + BITS src_sel [3] = {0,0,0}; + BITS src_chan[3] = {0,0,0}; + BITS src_rel [3] = {0,0,0}; + BITS src_neg [3] = {0,0,0}; + + GLuint swizzle_key; + + GLuint number_of_operands = r700GetNumOperands(pAsm); + + for (src=0; src<number_of_operands; src++) + { + get_src_properties(alu_instruction_ptr, + src, + &(src_sel[src]), + &(src_rel[src]), + &(src_chan[src]), + &(src_neg[src]) ); + } + + + swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) + + (is_const( src_sel[1] ) ? 2 : 0) + + (is_const( src_sel[2] ) ? 1 : 0) + ); + + alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key]; + + for (src=0; src<number_of_operands; src++) + { + sel = src_sel [src]; + chan = src_chan[src]; + rel = src_rel [src]; + neg = src_neg [src]; + + + bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle; + + if( is_gpr(sel) ) + { + if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) ) + { + return GL_FALSE; + } + + if ( (src == 1) && + (sel == src_sel[0]) && + (chan == src_chan[0]) ) + { + } + else + { + if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) ) + { + return GL_FALSE; + } + } + } + else if( is_const(sel) ) + { + const_count++; + + if( is_cfile(sel) ) + { + if( GL_FALSE == reserve_cfile(pAsm, sel, chan) ) + { + return GL_FALSE; + } + } + } + } + + return GL_TRUE; +} + +GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) +{ + GLuint number_of_scalar_operations; + GLboolean is_single_scalar_operation; + GLuint scalar_channel_index; + + PVSSRC * pcurrent_source; + int current_source_index; + GLuint contiguous_slots_needed; + + GLuint uNumSrc = r700GetNumOperands(pAsm); + GLuint channel_swizzle, j; + GLuint chan_counter[4] = {0, 0, 0, 0}; + PVSSRC * pSource[3]; + GLboolean bSplitInst = GL_FALSE; + + if (1 == pAsm->D.dst.math) + { + is_single_scalar_operation = GL_TRUE; + number_of_scalar_operations = 1; + } + else + { + is_single_scalar_operation = GL_FALSE; + number_of_scalar_operations = 4; + + /* check read port, only very preliminary algorithm, not count in + src0/1 same comp case and prev slot repeat case; also not count relative + addressing. TODO: improve performance. */ + for(j=0; j<uNumSrc; j++) + { + pSource[j] = &(pAsm->S[j].src); + } + for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++) + { + for(j=0; j<uNumSrc; j++) + { + switch (scalar_channel_index) + { + case 0: channel_swizzle = pSource[j]->swizzlex; break; + case 1: channel_swizzle = pSource[j]->swizzley; break; + case 2: channel_swizzle = pSource[j]->swizzlez; break; + case 3: channel_swizzle = pSource[j]->swizzlew; break; + default: channel_swizzle = SQ_SEL_MASK; break; + } + if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) || + (pSource[j]->rtype == SRC_REG_INPUT)) + && (channel_swizzle <= SQ_SEL_W) ) + { + chan_counter[channel_swizzle]++; + } + } + } + if( (chan_counter[SQ_SEL_X] > 3) + || (chan_counter[SQ_SEL_Y] > 3) + || (chan_counter[SQ_SEL_Z] > 3) + || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */ + { + bSplitInst = GL_TRUE; + } + } + + contiguous_slots_needed = 0; + + if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) ) + { + contiguous_slots_needed = 4; + } + + initialize(pAsm); + + for (scalar_channel_index=0; + scalar_channel_index < number_of_scalar_operations; + scalar_channel_index++) + { + R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + if (alu_instruction_ptr == NULL) + { + return GL_FALSE; + } + Init_R700ALUInstruction(alu_instruction_ptr); + + //src 0 + current_source_index = 0; + pcurrent_source = &(pAsm->S[0].src); + + if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, + current_source_index, + pcurrent_source, + scalar_channel_index) ) + { + return GL_FALSE; + } + + if (pAsm->D.dst.math == 0) + { + // Process source 1 + current_source_index = 1; + pcurrent_source = &(pAsm->S[current_source_index].src); + + if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, + current_source_index, + pcurrent_source, + scalar_channel_index) ) + { + return GL_FALSE; + } + } + + //other bits + alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP; + + if( (is_single_scalar_operation == GL_TRUE) + || (GL_TRUE == bSplitInst) ) + { + alu_instruction_ptr->m_Word0.f.last = 1; + } + else + { + alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0; + } + + alu_instruction_ptr->m_Word0.f.pred_sel = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; + + // dst + if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || + (pAsm->D.dst.rtype == DST_REG_OUT) ) + { + alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; + } + else + { + r700_error(ERROR_ASM_ALUDSTBADTYPE, "Only temp destination registers supported for ALU dest regs."); + return GL_FALSE; + } + + alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype + + if ( is_single_scalar_operation == GL_TRUE ) + { + // Override scalar_channel_index since only one scalar value will be written + if(pAsm->D.dst.writex) + { + scalar_channel_index = 0; + } + else if(pAsm->D.dst.writey) + { + scalar_channel_index = 1; + } + else if(pAsm->D.dst.writez) + { + scalar_channel_index = 2; + } + else if(pAsm->D.dst.writew) + { + scalar_channel_index = 3; + } + } + + alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index; + + alu_instruction_ptr->m_Word1.f.clamp = pAsm->pILInst[pAsm->uiCurInst].SaturateMode; + + if (pAsm->D.dst.op3) + { + //op3 + + alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode; + + //There's 3rd src for op3 + current_source_index = 2; + pcurrent_source = &(pAsm->S[current_source_index].src); + + if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr, + current_source_index, + pcurrent_source, + scalar_channel_index) ) + { + return GL_FALSE; + } + } + else + { + //op2 + if (pAsm->bR6xx) + { + alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode; + + alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0; + + //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0; + //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0; + switch (scalar_channel_index) + { + case 0: + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex; + break; + case 1: + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey; + break; + case 2: + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez; + break; + case 3: + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew; + break; + default: + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = SQ_SEL_MASK; + break; + } + alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF; + } + else + { + alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode; + + alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0; + + //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; + //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; + switch (scalar_channel_index) + { + case 0: + alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex; + break; + case 1: + alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey; + break; + case 2: + alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez; + break; + case 3: + alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew; + break; + default: + alu_instruction_ptr->m_Word1_OP2.f.write_mask = SQ_SEL_MASK; + break; + } + alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF; + } + } + + if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) ) + { + return GL_FALSE; + } + + /* + * Judge the type of current instruction, is it vector or scalar + * instruction. + */ + if (is_single_scalar_operation) + { + if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) ) + { + return GL_FALSE; + } + } + else + { + if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) ) + { + return 1; + } + } + + contiguous_slots_needed = 0; + } + + return GL_TRUE; +} + +GLboolean next_ins(r700_AssemblerBase *pAsm) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + if( GL_TRUE == IsTex(pILInst->Opcode) ) + { + if( GL_FALSE == assemble_tex_instruction(pAsm) ) + { + r700_error(ERROR_ASM_TEXINSTRUCTION, "Error assembling TEX instruction"); + return GL_FALSE; + } + } + else + { //ALU + if( GL_FALSE == assemble_alu_instruction(pAsm) ) + { + r700_error(ERROR_ASM_TEXINSTRUCTION, "Error assembling ALU instruction"); + return GL_FALSE; + } + } + + if(pAsm->D.dst.rtype == DST_REG_OUT) + { + if(pAsm->D.dst.op3) + { + // There is no mask for OP3 instructions, so all channels are written + pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF; + } + else + { + pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] + |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask; + } + } + + //reset for next inst. + pAsm->D.bits = 0; + pAsm->S[0].bits = 0; + pAsm->S[1].bits = 0; + pAsm->S[2].bits = 0; + + return GL_TRUE; +} + +GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode) +{ + BITS tmp; + + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + // opcode tmp.x, a.x + // MOV dst, tmp.x + + pAsm->D.dst.opcode = opcode; + pAsm->D.dst.math = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // Now replicate result to all necessary channels in destination + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_ABS(r700_AssemblerBase *pAsm) +{ + checkop1(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_MAX; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + pAsm->S[1].bits = pAsm->S[0].bits; + flipneg_PVSSRC(&(pAsm->S[1].src)); + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_ADD(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB) + { + flipneg_PVSSRC(&(pAsm->S[1].src)); + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_BAD(char *opcode_str) +{ + r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction (%s)", opcode_str); + return GL_FALSE; +} + +GLboolean assemble_CMP(r700_AssemblerBase *pAsm) +{ + int tmp; + + if( GL_FALSE == checkop3(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE; + pAsm->D.dst.op3 = 1; + + tmp = (-1); + + if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) + { + //OP3 has no support for write mask + tmp = gethelpr(pAsm); + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + + nomask_PVSDST(&(pAsm->D.dst)); + } + else + { + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 2, 1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, 2) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) + { + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + //tmp for source + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + noneg_PVSSRC(&(pAsm->S[0].src)); + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + return GL_TRUE; +} + +GLboolean assemble_COS(r700_AssemblerBase *pAsm) +{ + return assemble_math_function(pAsm, SQ_OP2_INST_COS); +} + +GLboolean assemble_DOT(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_DOT4; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode) + { + zerocomp_PVSSRC(&(pAsm->S[0].src), 3); + zerocomp_PVSSRC(&(pAsm->S[1].src), 3); + } + else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH) + { + onecomp_PVSSRC(&(pAsm->S[1].src), 3); + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_DST(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + onecomp_PVSSRC(&(pAsm->S[0].src), 0); + onecomp_PVSSRC(&(pAsm->S[0].src), 3); + + onecomp_PVSSRC(&(pAsm->S[1].src), 0); + onecomp_PVSSRC(&(pAsm->S[1].src), 2); + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_EX2(r700_AssemblerBase *pAsm) +{ + return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE); +} + +GLboolean assemble_FLR(r700_AssemblerBase *pAsm) +{ + checkop1(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR; + + if ( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm) +{ + return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT); +} + +GLboolean assemble_FRC(r700_AssemblerBase *pAsm) +{ + checkop1(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_FRACT; + + if ( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_KIL(r700_AssemblerBase *pAsm) +{ + checkop1(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT; + + if ( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = 0; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0); + noneg_PVSSRC(&(pAsm->S[0].src)); + + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + + if(PROGRAM_TEMPORARY == pAsm->pILInst[pAsm->uiCurInst].DstReg.File) + { + pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number; + } + else + { //PROGRAM_OUTPUT + pAsm->S[1].src.reg = pAsm->uiFP_OutputMap[pAsm->pILInst[pAsm->uiCurInst].DstReg.Index]; + } + + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noswizzle_PVSSRC(&(pAsm->S[1].src)); + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->pR700Shader->killIsUsed = GL_TRUE; + + return GL_TRUE; +} + +GLboolean assemble_LG2(r700_AssemblerBase *pAsm) +{ + return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE); +} + +GLboolean assemble_LRP(r700_AssemblerBase *pAsm) +{ + BITS tmp; + + if( GL_FALSE == checkop3(pAsm) ) + { + return GL_FALSE; + } + + tmp = gethelpr(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + nomask_PVSDST(&(pAsm->D.dst)); + + + if( GL_FALSE == assemble_src(pAsm, 1, 0) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == assemble_src(pAsm, 2, 1) ) + { + return GL_FALSE; + } + + neg_PVSSRC(&(pAsm->S[1].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + nomask_PVSDST(&(pAsm->D.dst)); + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + { + return GL_FALSE; + } + if( GL_FALSE == assemble_src(pAsm, 2, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm) +{ + int tmp, ii; + GLboolean bReplaceDst = GL_FALSE; + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + if( GL_FALSE == checkop3(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + + tmp = (-1); + + if(PROGRAM_TEMPORARY == pILInst->DstReg.File) + { /* TODO : more investigation on MAD src and dst using same register */ + for(ii=0; ii<3; ii++) + { + if( (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File) + && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) ) + { + bReplaceDst = GL_TRUE; + break; + } + } + } + if(0xF != pILInst->DstReg.WriteMask) + { /* OP3 has no support for write mask */ + bReplaceDst = GL_TRUE; + } + + if(GL_TRUE == bReplaceDst) + { + tmp = gethelpr(pAsm); + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + + nomask_PVSDST(&(pAsm->D.dst)); + } + else + { + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 2, -1) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + if (GL_TRUE == bReplaceDst) + { + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + //tmp for source + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + noneg_PVSSRC(&(pAsm->S[0].src)); + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + return GL_TRUE; +} + +/* LIT dst, src */ +GLboolean assemble_LIT(r700_AssemblerBase *pAsm) +{ + unsigned int dstReg; + unsigned int dstType; + unsigned int srcReg; + unsigned int srcType; + checkop1(pAsm); + int tmp = gethelpr(pAsm); + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + dstReg = pAsm->D.dst.reg; + dstType = pAsm->D.dst.rtype; + srcReg = pAsm->S[0].src.reg; + srcType = pAsm->S[0].src.rtype; + + /* dst.xw, <- 1.0 */ + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + pAsm->D.dst.rtype = dstType; + pAsm->D.dst.reg = dstReg; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 1; + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_1; + pAsm->S[0].src.swizzley = SQ_SEL_1; + pAsm->S[0].src.swizzlez = SQ_SEL_1; + pAsm->S[0].src.swizzlew = SQ_SEL_1; + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + /* dst.y = max(src.x, 0.0) */ + pAsm->D.dst.opcode = SQ_OP2_INST_MAX; + pAsm->D.dst.rtype = dstType; + pAsm->D.dst.reg = dstReg; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + pAsm->S[0].src.rtype = srcType; + pAsm->S[0].src.reg = srcReg; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_X; + pAsm->S[0].src.swizzley = SQ_SEL_X; + pAsm->S[0].src.swizzlez = SQ_SEL_X; + pAsm->S[0].src.swizzlew = SQ_SEL_X; + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = tmp; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_0; + pAsm->S[1].src.swizzley = SQ_SEL_0; + pAsm->S[1].src.swizzlez = SQ_SEL_0; + pAsm->S[1].src.swizzlew = SQ_SEL_0; + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + /* dst.w = log(src.y) */ + pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED; + pAsm->D.dst.math = 1; + pAsm->D.dst.rtype = dstType; + pAsm->D.dst.reg = dstReg; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 1; + pAsm->S[0].src.rtype = srcType; + pAsm->S[0].src.reg = srcReg; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_Y; + pAsm->S[0].src.swizzley = SQ_SEL_Y; + pAsm->S[0].src.swizzlez = SQ_SEL_Y; + pAsm->S[0].src.swizzlew = SQ_SEL_Y; + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + /* tmp.x = amd MUL_LIT(src.w, dst.w, src.x ) */ + pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT; + pAsm->D.dst.op3 = 1; + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + pAsm->S[0].src.rtype = srcType; + pAsm->S[0].src.reg = srcReg; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_W; + pAsm->S[0].src.swizzley = SQ_SEL_W; + pAsm->S[0].src.swizzlez = SQ_SEL_W; + pAsm->S[0].src.swizzlew = SQ_SEL_W; + + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = dstReg; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_W; + pAsm->S[1].src.swizzley = SQ_SEL_W; + pAsm->S[1].src.swizzlez = SQ_SEL_W; + pAsm->S[1].src.swizzlew = SQ_SEL_W; + + pAsm->S[2].src.rtype = srcType; + pAsm->S[2].src.reg = srcReg; + setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[2].src)); + pAsm->S[2].src.swizzlex = SQ_SEL_X; + pAsm->S[2].src.swizzley = SQ_SEL_X; + pAsm->S[2].src.swizzlez = SQ_SEL_X; + pAsm->S[2].src.swizzlew = SQ_SEL_X; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + /* dst.z = exp(tmp.x) */ + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + pAsm->D.dst.math = 1; + pAsm->D.dst.rtype = dstType; + pAsm->D.dst.reg = dstReg; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 1; + pAsm->D.dst.writew = 0; + + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_X; + pAsm->S[0].src.swizzley = SQ_SEL_X; + pAsm->S[0].src.swizzlez = SQ_SEL_X; + pAsm->S[0].src.swizzlew = SQ_SEL_X; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_MAX(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MAX; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_MIN(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MIN; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_MOV(r700_AssemblerBase *pAsm) +{ + checkop1(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if (GL_FALSE == assemble_dst(pAsm)) + { + return GL_FALSE; + } + + if (GL_FALSE == assemble_src(pAsm, 0, -1)) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_MUL(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_POW(r700_AssemblerBase *pAsm) +{ + BITS tmp; + + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + // LG2 tmp.x, a.swizzle + pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE; + pAsm->D.dst.math = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + nomask_PVSDST(&(pAsm->D.dst)); + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // MUL tmp.x, tmp.x, b.swizzle + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + nomask_PVSDST(&(pAsm->D.dst)); + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // EX2 dst.mask, tmp.x + // EX2 tmp.x, tmp.x + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + pAsm->D.dst.math = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + nomask_PVSDST(&(pAsm->D.dst)); + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // Now replicate result to all necessary channels in destination + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_RCP(r700_AssemblerBase *pAsm) +{ + return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE); +} + +GLboolean assemble_RSQ(r700_AssemblerBase *pAsm) +{ + return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE); +} + +GLboolean assemble_SIN(r700_AssemblerBase *pAsm) +{ + return assemble_math_function(pAsm, SQ_OP2_INST_SIN); +} + +GLboolean assemble_SCS(r700_AssemblerBase *pAsm) +{ + BITS tmp; + + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + // COS tmp.x, a.x + pAsm->D.dst.opcode = SQ_OP2_INST_COS; + pAsm->D.dst.math = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // SIN tmp.y, a.x + pAsm->D.dst.opcode = SQ_OP2_INST_SIN; + pAsm->D.dst.math = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writey = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // MOV dst.mask, tmp + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + noswizzle_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlez = SQ_SEL_0; + pAsm->S[0].src.swizzlew = SQ_SEL_0; + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_SGE(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_SETGE; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_SLT(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_SETGT; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, 0) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_STP(r700_AssemblerBase *pAsm) +{ + return GL_TRUE; +} + +GLboolean assemble_TEX(r700_AssemblerBase *pAsm) +{ + GLboolean src_const; + + switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File) + { + case PROGRAM_CONSTANT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + src_const = GL_TRUE; + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + src_const = GL_FALSE; + } + + if (GL_TRUE == src_const) + { + r700_error(TODO_ASM_CONSTTEXADDR, "TODO: Texture coordinates from a constant register not supported."); + return GL_FALSE; + } + + switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) + { + case OPCODE_TEX: + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + break; + case OPCODE_TXB: + r700_error(TODO_ASM_TXB, "do not support TXB yet"); + return GL_FALSE; + break; + case OPCODE_TXP: + /* TODO : tex proj version : divid first 3 components by 4th */ + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + break; + default: + r700_error(ERROR_ASM_BADTEXINST, "Internal error: bad texture op (not TEX)"); + return GL_FALSE; + break; + } + + // Set src1 to tex unit id + pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit; + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + + //No sw info from mesa compiler, so hard code here. + pAsm->S[1].src.swizzlex = SQ_SEL_X; + pAsm->S[1].src.swizzley = SQ_SEL_Y; + pAsm->S[1].src.swizzlez = SQ_SEL_Z; + pAsm->S[1].src.swizzlew = SQ_SEL_W; + + if( GL_FALSE == tex_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == tex_src(pAsm) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_XPD(r700_AssemblerBase *pAsm) +{ + BITS tmp; + + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + tmp = gethelpr(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + nomask_PVSDST(&(pAsm->D.dst)); + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0); + swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + + if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) + { + tmp = gethelpr(pAsm); + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + + nomask_PVSDST(&(pAsm->D.dst)); + } + else + { + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0); + swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0); + + // result1 + (neg) result0 + setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE); + pAsm->S[2].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[2].src.reg = tmp; + + neg_PVSSRC(&(pAsm->S[2].src)); + noswizzle_PVSSRC(&(pAsm->S[2].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + + if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) + { + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + // Use tmp as source + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + noneg_PVSSRC(&(pAsm->S[0].src)); + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + return GL_TRUE; +} + +GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm) +{ + return GL_TRUE; +} + +GLboolean assemble_IF(r700_AssemblerBase *pAsm) +{ + return GL_TRUE; +} + +GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm) +{ + return GL_TRUE; +} + +GLboolean AssembleInstr(GLuint uiNumberInsts, + struct prog_instruction *pILInst, + r700_AssemblerBase *pR700AsmCode) +{ + GLuint i; + + pR700AsmCode->pILInst = pILInst; + for(i=0; i<uiNumberInsts; i++) + { + pR700AsmCode->uiCurInst = i; + + switch (pILInst[i].Opcode) + { + case OPCODE_ABS: + if ( GL_FALSE == assemble_ABS(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_ADD: + case OPCODE_SUB: + if ( GL_FALSE == assemble_ADD(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_ARL: + r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_ARL "); + //if ( GL_FALSE == assemble_BAD("ARL") ) + return GL_FALSE; + break; + case OPCODE_ARR: + r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_ARR "); + //if ( GL_FALSE == assemble_BAD("ARR") ) + return GL_FALSE; + break; + + case OPCODE_CMP: + if ( GL_FALSE == assemble_CMP(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_COS: + if ( GL_FALSE == assemble_COS(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_DPH: + if ( GL_FALSE == assemble_DOT(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_DST: + if ( GL_FALSE == assemble_DST(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_EX2: + if ( GL_FALSE == assemble_EX2(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_EXP: + r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_EXP "); + //if ( GL_FALSE == assemble_BAD("EXP") ) + return GL_FALSE; + break; // approx of EX2 + + case OPCODE_FLR: + if ( GL_FALSE == assemble_FLR(pR700AsmCode) ) + return GL_FALSE; + break; + //case OP_FLR_INT: + // if ( GL_FALSE == assemble_FLR_INT() ) + // return GL_FALSE; + // break; + + case OPCODE_FRC: + if ( GL_FALSE == assemble_FRC(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_KIL: + if ( GL_FALSE == assemble_KIL(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_LG2: + if ( GL_FALSE == assemble_LG2(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_LIT: + if ( GL_FALSE == assemble_LIT(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_LRP: + if ( GL_FALSE == assemble_LRP(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_LOG: + r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_LOG "); + //if ( GL_FALSE == assemble_BAD("LOG") ) + return GL_FALSE; + break; // approx of LG2 + + case OPCODE_MAD: + if ( GL_FALSE == assemble_MAD(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_MAX: + if ( GL_FALSE == assemble_MAX(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_MIN: + if ( GL_FALSE == assemble_MIN(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_MOV: + if ( GL_FALSE == assemble_MOV(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_MUL: + if ( GL_FALSE == assemble_MUL(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_POW: + if ( GL_FALSE == assemble_POW(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_RCP: + if ( GL_FALSE == assemble_RCP(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_RSQ: + if ( GL_FALSE == assemble_RSQ(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_SIN: + if ( GL_FALSE == assemble_SIN(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_SCS: + if ( GL_FALSE == assemble_SCS(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_SGE: + if ( GL_FALSE == assemble_SGE(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_SLT: + if ( GL_FALSE == assemble_SLT(pR700AsmCode) ) + return GL_FALSE; + break; + + //case OP_STP: + // if ( GL_FALSE == assemble_STP(pR700AsmCode) ) + // return GL_FALSE; + // break; + + case OPCODE_SWZ: + if ( GL_FALSE == assemble_MOV(pR700AsmCode) ) + { + return GL_FALSE; + } + else + { + if( (i+1)<uiNumberInsts ) + { + if(OPCODE_END != pILInst[i+1].Opcode) + { + if( GL_TRUE == IsTex(pILInst[i+1].Opcode) ) + { + pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1? + } + } + } + } + break; + + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXP: + if ( GL_FALSE == assemble_TEX(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_XPD: + if ( GL_FALSE == assemble_XPD(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_IF : + if ( GL_FALSE == assemble_IF(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_ELSE : + r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_ELSE "); + //if ( GL_FALSE == assemble_BAD("ELSE") ) + return GL_FALSE; + break; + case OPCODE_ENDIF: + if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) ) + return GL_FALSE; + break; + + //case OPCODE_EXPORT: + // if ( GL_FALSE == assemble_EXPORT() ) + // return GL_FALSE; + // break; + + case OPCODE_END: + //pR700AsmCode->uiCurInst = i; + //This is to remaind that if in later exoort there is depth/stencil + //export, we need a mov to re-arrange DST channel, where using a + //psuedo inst, we will use this end inst to do it. + return GL_TRUE; + + default: + r700_error(ERROR_ASM_UNKNOWNILINST, "internal: unknown instruction"); + return GL_FALSE; + } + } + + return GL_TRUE; +} + +GLboolean Process_Export(r700_AssemblerBase* pAsm, + GLuint type, + GLuint export_starting_index, + GLuint export_count, + GLuint starting_register_number, + GLboolean is_depth_export) +{ + unsigned char ucWriteMask; + + check_current_clause(pAsm, CF_EMPTY_CLAUSE); + check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr + + pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type; + + switch (type) + { + case SQ_EXPORT_PIXEL: + if(GL_TRUE == is_depth_export) + { + pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_Z; + } + else + { + pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_MRT0 + export_starting_index; + } + break; + + case SQ_EXPORT_POS: + pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_POS_0 + export_starting_index; + break; + + case SQ_EXPORT_PARAM: + pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = 0x0 + export_starting_index; + break; + + default: + r700_error(ERROR_ASM_BADEXPORTTYPE, "Unknown export type: %d", type); + return GL_FALSE; + break; + } + + pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr = starting_register_number; + + pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel = SQ_ABSOLUTE; + pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr = 0x0; + pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size = 0x3; + + pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1); + pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE + pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1; + + if (export_count == 1) + { + ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number]; + + if( (ucWriteMask & 0x1) != 0) + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X; + } + else + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_MASK; + } + if( ((ucWriteMask>>1) & 0x1) != 0) + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y; + } + else + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK; + } + if( ((ucWriteMask>>2) & 0x1) != 0) + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z; + } + else + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK; + } + if( ((ucWriteMask>>3) & 0x1) != 0) + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W; + } + else + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK; + } + } + else + { + // This should only be used if all components for all registers have been written + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X; + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y; + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z; + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W; + } + + pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr; + + return GL_TRUE; +} + +GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select) +{ + gl_inst_opcode Opcode_save = pAsm->pILInst[pAsm->uiCurInst].Opcode; //Should be OPCODE_END + pAsm->pILInst[pAsm->uiCurInst].Opcode = OPCODE_MOV; + + // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pAsm->depth_export_register_number; + + pAsm->D.dst.writex = 1; // depth goes in R channel for HW + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->depth_export_register_number; + + setswizzle_PVSSRC(&(pAsm->S[0].src), depth_channel_select); + + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->pILInst[pAsm->uiCurInst].Opcode = Opcode_save; + + return GL_TRUE; +} + +GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, + GLbitfield OutputsWritten) +{ + unsigned int unBit; + + if(pR700AsmCode->depth_export_register_number >= 0) + { + if( GL_FALSE == Move_Depth_Exports_To_Correct_Channels(pR700AsmCode, SQ_SEL_Z) ) // depth + { + return GL_FALSE; + } + } + + unBit = 1 << FRAG_RESULT_COLR; + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PIXEL, + 0, + 1, + pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_COLR], + GL_FALSE) ) + { + return GL_FALSE; + } + } + unBit = 1 << FRAG_RESULT_DEPR; + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PIXEL, + 0, + 1, + pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_DEPR], + GL_TRUE)) + { + return GL_FALSE; + } + } + + if(pR700AsmCode->cf_last_export_ptr != NULL) + { + pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; + pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1; + } + + return GL_TRUE; +} + +GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, + GLbitfield OutputsWritten) +{ + unsigned int unBit; + unsigned int i; + + GLuint export_starting_index = 0; + GLuint export_count = pR700AsmCode->number_of_exports; + + unBit = 1 << VERT_RESULT_HPOS; + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_POS, + export_starting_index, + 1, + pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS], + GL_FALSE) ) + { + return GL_FALSE; + } + + export_count--; + + pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; + } + + pR700AsmCode->number_of_exports = export_count; + + unBit = 1 << VERT_RESULT_COL0; + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PARAM, + export_starting_index, + 1, + pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0], + GL_FALSE) ) + { + return GL_FALSE; + } + + export_starting_index++; + } + + unBit = 1 << VERT_RESULT_COL1; + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PARAM, + export_starting_index, + 1, + pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1], + GL_FALSE) ) + { + return GL_FALSE; + } + + export_starting_index++; + } + + for(i=0; i<8; i++) + { + unBit = 1 << (VERT_RESULT_TEX0 + i); + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PARAM, + export_starting_index, + 1, + pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i], + GL_FALSE) ) + { + return GL_FALSE; + } + + export_starting_index++; + } + } + + // At least one param should be exported + if (export_count) + { + pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; + } + else + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PARAM, + 0, + 1, + 0, + GL_FALSE) ) + { + return GL_FALSE; + } + + pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0; + pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0; + pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0; + pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1; + pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; + } + + pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1; + + return GL_TRUE; +} + +GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode) +{ + FREE(pR700AsmCode->pucOutMask); + FREE(pR700AsmCode->pInstDeps); + return GL_TRUE; +} + diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h new file mode 100644 index 0000000000..e9b21b802e --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -0,0 +1,512 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + */ + +#ifndef _R700_ASSEMBLER_H_ +#define _R700_ASSEMBLER_H_ + +#include "main/mtypes.h" +#include "shader/prog_instruction.h" + +#include "r700_chip.h" +#include "r700_shaderinst.h" +#include "r700_shader.h" + +typedef enum SHADER_PIPE_TYPE +{ + SPT_VP = 0, + SPT_FP = 1 +} SHADER_PIPE_TYPE; + +typedef enum ConstantCycles +{ + NUMBER_OF_CYCLES = 3, + NUMBER_OF_COMPONENTS = 4 +} ConstantCycles; + +typedef enum HARDWARE_LIMIT_VALUES +{ + TEMPORARY_REGISTER_OFFSET = SQ_ALU_SRC_GPR_BASE, + MAX_TEMPORARY_REGISTERS = SQ_ALU_SRC_GPR_SIZE, + MAX_CONSTANT_REGISTERS = SQ_ALU_SRC_CFILE_SIZE, + CFILE_REGISTER_OFFSET = SQ_ALU_SRC_CFILE_BASE, + NUMBER_OF_INPUT_COLORS = 2, + NUMBER_OF_OUTPUT_COLORS = 8, + NUMBER_OF_TEXTURE_UNITS = 16, + MEGA_FETCH_BYTES = 32 +} HARDWARE_LIMIT_VALUES; + +typedef enum AddressMode +{ + ADDR_ABSOLUTE = 0, + ADDR_RELATIVE_A0 = 1, + ADDR_RELATIVE_FLI_0 = 2, + NUMBER_OF_ADDR_MOD = 3 +} AddressMode; + +typedef enum SrcRegisterType +{ + SRC_REG_TEMPORARY = 0, + SRC_REG_INPUT = 1, + SRC_REG_CONSTANT = 2, + SRC_REG_ALT_TEMPORARY = 3, + NUMBER_OF_SRC_REG_TYPE = 4 +} SrcRegisterType; + +typedef enum DstRegisterType +{ + DST_REG_TEMPORARY = 0, + DST_REG_A0 = 1, + DST_REG_OUT = 2, + DST_REG_OUT_X_REPL = 3, + DST_REG_ALT_TEMPORARY = 4, + DST_REG_INPUT = 5, + NUMBER_OF_DST_REG_TYPE = 6 +} DstRegisterType; + +typedef unsigned int BITS; + +typedef struct PVSDSTtag +{ + BITS opcode:8; //(:6) //@@@ really should be 10 bits for OP2 + BITS math:1; + BITS predicated:1; //10 //8 + BITS pred_inv :1; //11 //8 + + BITS rtype:3; + BITS reg:10; //24 //20 + + BITS writex:1; + BITS writey:1; + BITS writez:1; + BITS writew:1; //28 + + BITS op3:1; // 29 Represents *_OP3_* ALU opcode + + BITS dualop:1; // 30 //26 + + BITS addrmode0:1; //31 //29 + BITS addrmode1:1; //32 +} PVSDST; + +typedef struct PVSSRCtag +{ + BITS rtype:4; + BITS addrmode0:1; + BITS reg:10; //15 (8) + BITS swizzlex:3; + BITS swizzley:3; + BITS swizzlez:3; + BITS swizzlew:3; //27 + + BITS negx:1; + BITS negy:1; + BITS negz:1; + BITS negw:1; //31 + //BITS addrsel:2; + BITS addrmode1:1; //32 +} PVSSRC; + +typedef struct PVSMATHtag +{ + BITS rtype:4; + BITS spare:1; + BITS reg:8; + BITS swizzlex:3; + BITS swizzley:3; + BITS dstoff:2; // 2 bits of dest offset into alt ram + BITS opcode:4; + BITS negx:1; + BITS negy:1; + BITS dstcomp:2; // select dest component + BITS spare2:3; +} PVSMATH; + +typedef union PVSDWORDtag +{ + BITS bits; + PVSDST dst; + PVSSRC src; + PVSMATH math; + float f; +} PVSDWORD; + +typedef struct VAP_OUT_VTX_FMT_0tag +{ + BITS pos:1; // 0 + BITS misc:1; + BITS clip_dist0:1; + BITS clip_dist1:1; + BITS pos_param:1; // 4 + + BITS color0:1; // 5 + BITS color1:1; + BITS color2:1; + BITS color3:1; + BITS color4:1; + BITS color5:1; + BITS color6:1; + BITS color7:1; + + BITS normal:1; + + BITS depth:1; // 14 + + BITS point_size:1; // 15 + BITS edge_flag:1; + BITS rta_index:1; // shares same channel as kill_flag + BITS kill_flag:1; + BITS viewport_index:1; // 19 + + BITS resvd1:12; // 20 +} VAP_OUT_VTX_FMT_0; + +typedef struct VAP_OUT_VTX_FMT_1tag +{ + BITS tex0comp:3; + BITS tex1comp:3; + BITS tex2comp:3; + BITS tex3comp:3; + BITS tex4comp:3; + BITS tex5comp:3; + BITS tex6comp:3; + BITS tex7comp:3; + + BITS resvd:8; +} VAP_OUT_VTX_FMT_1; + +typedef struct VAP_OUT_VTX_FMT_2tag +{ + BITS tex8comp :3; + BITS tex9comp :3; + BITS tex10comp:3; + BITS tex11comp:3; + BITS tex12comp:3; + BITS tex13comp:3; + BITS tex14comp:3; + BITS tex15comp:3; + + BITS resvd:8; +} VAP_OUT_VTX_FMT_2; + +typedef struct OUT_FRAGMENT_FMT_0tag +{ + BITS color0:1; + BITS color1:1; + BITS color2:1; + BITS color3:1; + BITS color4:1; + BITS color5:1; + BITS color6:1; + BITS color7:1; + + BITS depth:1; + BITS stencil_ref:1; + BITS coverage_to_mask:1; + BITS mask:1; + + BITS resvd1:20; +} OUT_FRAGMENT_FMT_0; + +typedef enum CF_CLAUSE_TYPE +{ + CF_EXPORT_CLAUSE, + CF_ALU_CLAUSE, + CF_TEX_CLAUSE, + CF_VTX_CLAUSE, + CF_OTHER_CLAUSE, + CF_EMPTY_CLAUSE, + NUMBER_CF_CLAUSE_TYPES +} CF_CLAUSE_TYPE; + +enum +{ + MAX_BOOL_CONSTANTS = 32, + MAX_INT_CONSTANTS = 32, + MAX_FLOAT_CONSTANTS = 256, + + FC_NONE = 0, + FC_IF = 1, + FC_LOOP = 2, + FC_REP = 3, + + COND_NONE = 0, + COND_BOOL = 1, + COND_PRED = 2, + COND_ALU = 3, + + SAFEDIST_TEX = 6, ///< safe distance for using result of texture lookup in alu or another tex lookup + SAFEDIST_ALU = 6 ///< the same for alu->fc +}; + +typedef struct FC_LEVEL +{ + unsigned int first; ///< first fc instruction on level (if, rep, loop) + unsigned int* mid; ///< middle instructions - else or all breaks on this level + unsigned int midLen; + unsigned int type; + unsigned int cond; + unsigned int inv; + unsigned int bpush; ///< 1 if first instruction does branch stack push + int id; ///< id of bool or int variable +} FC_LEVEL; + +typedef struct VTX_FETCH_METHOD +{ + GLboolean bEnableMini; + GLuint mega_fetch_remainder; +} VTX_FETCH_METHOD; + +typedef struct r700_AssemblerBase +{ + R700ControlFlowSXClause* cf_last_export_ptr; + R700ControlFlowSXClause* cf_current_export_clause_ptr; + R700ControlFlowALUClause* cf_current_alu_clause_ptr; + R700ControlFlowGenericClause* cf_current_tex_clause_ptr; + R700ControlFlowGenericClause* cf_current_vtx_clause_ptr; + R700ControlFlowGenericClause* cf_current_cf_clause_ptr; + + //Result shader + R700_Shader * pR700Shader; + + // No clause has been created yet + CF_CLAUSE_TYPE cf_current_clause_type; + + GLuint number_of_exports; + GLuint number_of_colorandz_exports; + GLuint number_of_export_opcodes; + + PVSDWORD D; + PVSDWORD S[3]; + + unsigned int uLastPosUpdate; + + OUT_FRAGMENT_FMT_0 fp_stOutFmt0; + + unsigned int uIIns; + unsigned int uOIns; + unsigned int number_used_registers; + unsigned int uUsedConsts; + + // Fragment programs + unsigned int uiFP_AttributeMap[FRAG_ATTRIB_MAX]; + unsigned int uiFP_OutputMap[FRAG_RESULT_MAX]; + unsigned int uBoolConsts; + unsigned int uIntConsts; + unsigned int uInsts; + unsigned int uConsts; + + // Vertex programs + unsigned char ucVP_AttributeMap[VERT_ATTRIB_MAX]; + unsigned char ucVP_OutputMap[VERT_RESULT_MAX]; + + unsigned char * pucOutMask; + + //----------------------------------------------------------------------------------- + // flow control members + //----------------------------------------------------------------------------------- + unsigned int FCSP; + FC_LEVEL fc_stack[32]; + + unsigned int branch_depth; + unsigned int max_branch_depth; + + //----------------------------------------------------------------------------------- + // ArgSubst used in Assemble_Source() function + //----------------------------------------------------------------------------------- + int aArgSubst[4]; + + GLint hw_gpr[ NUMBER_OF_CYCLES ][ NUMBER_OF_COMPONENTS ]; + GLint hw_cfile_addr[ NUMBER_OF_COMPONENTS ]; + GLint hw_cfile_chan[ NUMBER_OF_COMPONENTS ]; + + GLuint uOutputs; + + GLint color_export_register_number[NUMBER_OF_OUTPUT_COLORS]; + GLint depth_export_register_number; + + GLint stencil_export_register_number; + GLint coverage_to_mask_export_register_number; + GLint mask_export_register_number; + + GLuint starting_export_register_number; + GLuint starting_vfetch_register_number; + GLuint starting_temp_register_number; + GLuint uHelpReg; + GLuint uFirstHelpReg; + + GLboolean input_position_is_used; + GLboolean input_normal_is_used; + + GLboolean input_color_is_used[NUMBER_OF_INPUT_COLORS]; + + GLboolean input_texture_unit_is_used[NUMBER_OF_TEXTURE_UNITS]; + + R700VertexGenericFetch* vfetch_instruction_ptr_array[VERT_ATTRIB_MAX]; + + GLuint number_of_inputs; + + InstDeps *pInstDeps; + + SHADER_PIPE_TYPE currentShaderType; + struct prog_instruction * pILInst; + GLuint uiCurInst; + GLboolean bR6xx; +} r700_AssemblerBase; + +//Internal use +BITS addrmode_PVSDST(PVSDST * pPVSDST); +void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode); +void nomask_PVSDST(PVSDST * pPVSDST); +BITS addrmode_PVSSRC(PVSSRC* pPVSSRC); +void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode); +void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz); +void noswizzle_PVSSRC(PVSSRC* pPVSSRC); +void swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w); +void neg_PVSSRC(PVSSRC* pPVSSRC); +void noneg_PVSSRC(PVSSRC* pPVSSRC); +void flipneg_PVSSRC(PVSSRC* pPVSSRC); +void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c); +void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c); +BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0); +BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) ; +GLboolean is_reduction_opcode(PVSDWORD * dest); +GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size); + +unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm); + +GLboolean IsTex(gl_inst_opcode Opcode); +GLboolean IsAlu(gl_inst_opcode Opcode); +int check_current_clause(r700_AssemblerBase* pAsm, + CF_CLAUSE_TYPE new_clause_type); +GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm, + R700VertexInstruction* vertex_instruction_ptr); +GLboolean add_tex_instruction(r700_AssemblerBase* pAsm, + R700TextureInstruction* tex_instruction_ptr); +GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm, + GLuint gl_client_id, + GLuint destination_register, + GLuint number_of_elements, + GLenum dataElementType, + VTX_FETCH_METHOD* pFetchMethod); +GLuint gethelpr(r700_AssemblerBase* pAsm); +void resethelpr(r700_AssemblerBase* pAsm); +void checkop_init(r700_AssemblerBase* pAsm); +GLboolean mov_temp(r700_AssemblerBase* pAsm, int src); +GLboolean checkop1(r700_AssemblerBase* pAsm); +GLboolean checkop2(r700_AssemblerBase* pAsm); +GLboolean checkop3(r700_AssemblerBase* pAsm); +GLboolean assemble_src(r700_AssemblerBase *pAsm, + int src, + int fld); +GLboolean assemble_dst(r700_AssemblerBase *pAsm); +GLboolean tex_dst(r700_AssemblerBase *pAsm); +GLboolean tex_src(r700_AssemblerBase *pAsm); +GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm); +void initialize(r700_AssemblerBase *pAsm); +GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, + int source_index, + PVSSRC* pSource, + BITS scalar_channel_index); +GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, + R700ALUInstruction* alu_instruction_ptr, + GLuint contiguous_slots_needed); +void get_src_properties(R700ALUInstruction* alu_instruction_ptr, + int source_index, + BITS* psrc_sel, + BITS* psrc_rel, + BITS* psrc_chan, + BITS* psrc_neg); +int is_cfile(BITS sel); +int is_const(BITS sel); +int is_gpr(BITS sel); +GLboolean reserve_cfile(r700_AssemblerBase* pAsm, + GLuint sel, + GLuint chan); +GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle); +GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle); +GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle); +GLboolean check_scalar(r700_AssemblerBase* pAsm, + R700ALUInstruction* alu_instruction_ptr); +GLboolean check_vector(r700_AssemblerBase* pAsm, + R700ALUInstruction* alu_instruction_ptr); +GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm); +GLboolean next_ins(r700_AssemblerBase *pAsm); +GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode); +GLboolean assemble_ABS(r700_AssemblerBase *pAsm); +GLboolean assemble_ADD(r700_AssemblerBase *pAsm); +GLboolean assemble_BAD(char *opcode_str); +GLboolean assemble_CMP(r700_AssemblerBase *pAsm); +GLboolean assemble_COS(r700_AssemblerBase *pAsm); +GLboolean assemble_DOT(r700_AssemblerBase *pAsm); +GLboolean assemble_DST(r700_AssemblerBase *pAsm); +GLboolean assemble_EX2(r700_AssemblerBase *pAsm); +GLboolean assemble_FLR(r700_AssemblerBase *pAsm); +GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm); +GLboolean assemble_FRC(r700_AssemblerBase *pAsm); +GLboolean assemble_KIL(r700_AssemblerBase *pAsm); +GLboolean assemble_LG2(r700_AssemblerBase *pAsm); +GLboolean assemble_LRP(r700_AssemblerBase *pAsm); +GLboolean assemble_MAD(r700_AssemblerBase *pAsm); +GLboolean assemble_LIT(r700_AssemblerBase *pAsm); +GLboolean assemble_MAX(r700_AssemblerBase *pAsm); +GLboolean assemble_MIN(r700_AssemblerBase *pAsm); +GLboolean assemble_MOV(r700_AssemblerBase *pAsm); +GLboolean assemble_MUL(r700_AssemblerBase *pAsm); +GLboolean assemble_POW(r700_AssemblerBase *pAsm); +GLboolean assemble_RCP(r700_AssemblerBase *pAsm); +GLboolean assemble_RSQ(r700_AssemblerBase *pAsm); +GLboolean assemble_SIN(r700_AssemblerBase *pAsm); +GLboolean assemble_SCS(r700_AssemblerBase *pAsm); +GLboolean assemble_SGE(r700_AssemblerBase *pAsm); +GLboolean assemble_SLT(r700_AssemblerBase *pAsm); +GLboolean assemble_STP(r700_AssemblerBase *pAsm); +GLboolean assemble_TEX(r700_AssemblerBase *pAsm); +GLboolean assemble_XPD(r700_AssemblerBase *pAsm); +GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm); +GLboolean assemble_IF(r700_AssemblerBase *pAsm); +GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm); + +GLboolean Process_Export(r700_AssemblerBase* pAsm, + GLuint type, + GLuint export_starting_index, + GLuint export_count, + GLuint starting_register_number, + GLboolean is_depth_export); +GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, + BITS depth_channel_select); + + +//Interface +GLboolean AssembleInstr(GLuint uiNumberInsts, + struct prog_instruction *pILInst, + r700_AssemblerBase *pR700AsmCode); +GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten); +GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten); + +int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader); +GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode); + +#endif //_R700_ASSEMBLER_H_ diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c new file mode 100644 index 0000000000..34c9351177 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -0,0 +1,415 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com> + */ + +#include "main/imports.h" +#include "main/glheader.h" + +#include "r700_interface.h" + +#include "r700_chip.h" +#include "r700_state.h" +#include "r700_oglprog.h" +#include "r700_tex.h" +#include "r700_emit.h" + +extern const struct tnl_pipeline_stage *r700_pipeline[]; + +static GLboolean r700DestroyChipObj(void* pvChipObj) +{ + R700_CHIP_CONTEXT *r700; + + if(NULL == pvChipObj) + { + return GL_TRUE; + } + + r700 = (R700_CHIP_CONTEXT *)pvChipObj; + + FREE(r700->pStateList); + + FREE(r700); + + return GL_TRUE; +} + +static void r700InitFuncs(struct dd_function_table *functions) +{ + r700InitStateFuncs(functions); + r700InitTextureFuncs(functions); + r700InitShaderFuncs(functions); + r700InitClearFuncs(functions); +} + +#define LINK_STATES(reg) \ +do \ +{ \ + pStateListWork->puiValue = (unsigned int*)&(r700->reg); \ + pStateListWork->unOffset = mm##reg - ASIC_CONTEXT_BASE_INDEX; \ + pStateListWork->pNext = pStateListWork + 1; \ + pStateListWork++; \ +}while(0) + +GLboolean r700InitChipObject(context_t *context) +{ + ContextState * pStateListWork; + + R700_CHIP_CONTEXT *r700 = CALLOC( sizeof(R700_CHIP_CONTEXT) ); + + context->chipobj.pvChipObj = (void*)r700; + + context->chipobj.DestroyChipObj = r700DestroyChipObj; + + context->chipobj.GetTexObjSize = r700GetTexObjSize; + + context->chipobj.stages = r700_pipeline; + + context->chipobj.InitFuncs = r700InitFuncs; + + context->chipobj.InitState = r700InitState; + + /* init state list */ + r700->pStateList = (ContextState*) MALLOC (sizeof(ContextState)*sizeof(R700_CHIP_CONTEXT)/sizeof(unsigned int)); + pStateListWork = r700->pStateList; + + LINK_STATES(DB_DEPTH_SIZE); + LINK_STATES(DB_DEPTH_VIEW); + + LINK_STATES(DB_DEPTH_BASE); + LINK_STATES(DB_DEPTH_INFO); + LINK_STATES(DB_HTILE_DATA_BASE); + + LINK_STATES(DB_STENCIL_CLEAR); + LINK_STATES(DB_DEPTH_CLEAR); + + LINK_STATES(PA_SC_SCREEN_SCISSOR_TL); + LINK_STATES(PA_SC_SCREEN_SCISSOR_BR); + + LINK_STATES(CB_COLOR0_BASE); + + LINK_STATES(CB_COLOR0_SIZE); + + LINK_STATES(CB_COLOR0_VIEW); + + LINK_STATES(CB_COLOR0_INFO); + LINK_STATES(CB_COLOR1_INFO); + LINK_STATES(CB_COLOR2_INFO); + LINK_STATES(CB_COLOR3_INFO); + LINK_STATES(CB_COLOR4_INFO); + LINK_STATES(CB_COLOR5_INFO); + LINK_STATES(CB_COLOR6_INFO); + LINK_STATES(CB_COLOR7_INFO); + + LINK_STATES(CB_COLOR0_TILE); + + LINK_STATES(CB_COLOR0_FRAG); + + LINK_STATES(CB_COLOR0_MASK); + + LINK_STATES(PA_SC_WINDOW_OFFSET); + LINK_STATES(PA_SC_WINDOW_SCISSOR_TL); + LINK_STATES(PA_SC_WINDOW_SCISSOR_BR); + LINK_STATES(PA_SC_CLIPRECT_RULE); + LINK_STATES(PA_SC_CLIPRECT_0_TL); + LINK_STATES(PA_SC_CLIPRECT_0_BR); + LINK_STATES(PA_SC_CLIPRECT_1_TL); + LINK_STATES(PA_SC_CLIPRECT_1_BR); + LINK_STATES(PA_SC_CLIPRECT_2_TL); + LINK_STATES(PA_SC_CLIPRECT_2_BR); + LINK_STATES(PA_SC_CLIPRECT_3_TL); + LINK_STATES(PA_SC_CLIPRECT_3_BR); + + LINK_STATES(PA_SC_EDGERULE); + + LINK_STATES(CB_TARGET_MASK); + LINK_STATES(CB_SHADER_MASK); + LINK_STATES(PA_SC_GENERIC_SCISSOR_TL); + LINK_STATES(PA_SC_GENERIC_SCISSOR_BR); + + LINK_STATES(PA_SC_VPORT_SCISSOR_0_TL); + LINK_STATES(PA_SC_VPORT_SCISSOR_0_BR); + LINK_STATES(PA_SC_VPORT_SCISSOR_1_TL); + LINK_STATES(PA_SC_VPORT_SCISSOR_1_BR); + + LINK_STATES(PA_SC_VPORT_ZMIN_0); + LINK_STATES(PA_SC_VPORT_ZMAX_0); + + LINK_STATES(SX_MISC); + + LINK_STATES(SQ_VTX_SEMANTIC_0); + LINK_STATES(SQ_VTX_SEMANTIC_1); + LINK_STATES(SQ_VTX_SEMANTIC_2); + LINK_STATES(SQ_VTX_SEMANTIC_3); + LINK_STATES(SQ_VTX_SEMANTIC_4); + LINK_STATES(SQ_VTX_SEMANTIC_5); + LINK_STATES(SQ_VTX_SEMANTIC_6); + LINK_STATES(SQ_VTX_SEMANTIC_7); + LINK_STATES(SQ_VTX_SEMANTIC_8); + LINK_STATES(SQ_VTX_SEMANTIC_9); + LINK_STATES(SQ_VTX_SEMANTIC_10); + LINK_STATES(SQ_VTX_SEMANTIC_11); + LINK_STATES(SQ_VTX_SEMANTIC_12); + LINK_STATES(SQ_VTX_SEMANTIC_13); + LINK_STATES(SQ_VTX_SEMANTIC_14); + LINK_STATES(SQ_VTX_SEMANTIC_15); + LINK_STATES(SQ_VTX_SEMANTIC_16); + LINK_STATES(SQ_VTX_SEMANTIC_17); + LINK_STATES(SQ_VTX_SEMANTIC_18); + LINK_STATES(SQ_VTX_SEMANTIC_19); + LINK_STATES(SQ_VTX_SEMANTIC_20); + LINK_STATES(SQ_VTX_SEMANTIC_21); + LINK_STATES(SQ_VTX_SEMANTIC_22); + LINK_STATES(SQ_VTX_SEMANTIC_23); + LINK_STATES(SQ_VTX_SEMANTIC_24); + LINK_STATES(SQ_VTX_SEMANTIC_25); + LINK_STATES(SQ_VTX_SEMANTIC_26); + LINK_STATES(SQ_VTX_SEMANTIC_27); + LINK_STATES(SQ_VTX_SEMANTIC_28); + LINK_STATES(SQ_VTX_SEMANTIC_29); + LINK_STATES(SQ_VTX_SEMANTIC_30); + LINK_STATES(SQ_VTX_SEMANTIC_31); + + LINK_STATES(VGT_MAX_VTX_INDX); + LINK_STATES(VGT_MIN_VTX_INDX); + LINK_STATES(VGT_INDX_OFFSET); + LINK_STATES(VGT_MULTI_PRIM_IB_RESET_INDX); + LINK_STATES(SX_ALPHA_TEST_CONTROL); + + LINK_STATES(CB_BLEND_RED); + LINK_STATES(CB_BLEND_GREEN); + LINK_STATES(CB_BLEND_BLUE); + LINK_STATES(CB_BLEND_ALPHA); + + LINK_STATES(PA_CL_VPORT_XSCALE); + LINK_STATES(PA_CL_VPORT_XOFFSET); + LINK_STATES(PA_CL_VPORT_YSCALE); + LINK_STATES(PA_CL_VPORT_YOFFSET); + LINK_STATES(PA_CL_VPORT_ZSCALE); + LINK_STATES(PA_CL_VPORT_ZOFFSET); + + LINK_STATES(SPI_VS_OUT_ID_0); + LINK_STATES(SPI_VS_OUT_ID_1); + LINK_STATES(SPI_VS_OUT_ID_2); + LINK_STATES(SPI_VS_OUT_ID_3); + LINK_STATES(SPI_VS_OUT_ID_4); + LINK_STATES(SPI_VS_OUT_ID_5); + LINK_STATES(SPI_VS_OUT_ID_6); + LINK_STATES(SPI_VS_OUT_ID_7); + LINK_STATES(SPI_VS_OUT_ID_8); + LINK_STATES(SPI_VS_OUT_ID_9); + + LINK_STATES(SPI_PS_INPUT_CNTL_0); + LINK_STATES(SPI_PS_INPUT_CNTL_1); + LINK_STATES(SPI_PS_INPUT_CNTL_2); + LINK_STATES(SPI_PS_INPUT_CNTL_3); + LINK_STATES(SPI_PS_INPUT_CNTL_4); + LINK_STATES(SPI_PS_INPUT_CNTL_5); + LINK_STATES(SPI_PS_INPUT_CNTL_6); + LINK_STATES(SPI_PS_INPUT_CNTL_7); + LINK_STATES(SPI_PS_INPUT_CNTL_8); + LINK_STATES(SPI_PS_INPUT_CNTL_9); + LINK_STATES(SPI_PS_INPUT_CNTL_10); + LINK_STATES(SPI_PS_INPUT_CNTL_11); + LINK_STATES(SPI_PS_INPUT_CNTL_12); + LINK_STATES(SPI_PS_INPUT_CNTL_13); + LINK_STATES(SPI_PS_INPUT_CNTL_14); + LINK_STATES(SPI_PS_INPUT_CNTL_15); + LINK_STATES(SPI_PS_INPUT_CNTL_16); + LINK_STATES(SPI_PS_INPUT_CNTL_17); + LINK_STATES(SPI_PS_INPUT_CNTL_18); + LINK_STATES(SPI_PS_INPUT_CNTL_19); + LINK_STATES(SPI_PS_INPUT_CNTL_20); + LINK_STATES(SPI_PS_INPUT_CNTL_21); + LINK_STATES(SPI_PS_INPUT_CNTL_22); + LINK_STATES(SPI_PS_INPUT_CNTL_23); + LINK_STATES(SPI_PS_INPUT_CNTL_24); + LINK_STATES(SPI_PS_INPUT_CNTL_25); + LINK_STATES(SPI_PS_INPUT_CNTL_26); + LINK_STATES(SPI_PS_INPUT_CNTL_27); + LINK_STATES(SPI_PS_INPUT_CNTL_28); + LINK_STATES(SPI_PS_INPUT_CNTL_29); + LINK_STATES(SPI_PS_INPUT_CNTL_30); + LINK_STATES(SPI_PS_INPUT_CNTL_31); + LINK_STATES(SPI_VS_OUT_CONFIG); + LINK_STATES(SPI_THREAD_GROUPING); + LINK_STATES(SPI_PS_IN_CONTROL_0); + LINK_STATES(SPI_PS_IN_CONTROL_1); + + LINK_STATES(SPI_INPUT_Z); + LINK_STATES(SPI_FOG_CNTL); + + LINK_STATES(CB_BLEND0_CONTROL); + + LINK_STATES(CB_SHADER_CONTROL); + + /*LINK_STATES(VGT_DRAW_INITIATOR); */ + + LINK_STATES(DB_DEPTH_CONTROL); + + LINK_STATES(CB_COLOR_CONTROL); + LINK_STATES(DB_SHADER_CONTROL); + LINK_STATES(PA_CL_CLIP_CNTL); + LINK_STATES(PA_SU_SC_MODE_CNTL); + LINK_STATES(PA_CL_VTE_CNTL); + LINK_STATES(PA_CL_VS_OUT_CNTL); + LINK_STATES(PA_CL_NANINF_CNTL); + + LINK_STATES(SQ_PGM_START_PS); + LINK_STATES(SQ_PGM_RESOURCES_PS); + LINK_STATES(SQ_PGM_EXPORTS_PS); + LINK_STATES(SQ_PGM_START_VS); + LINK_STATES(SQ_PGM_RESOURCES_VS); + LINK_STATES(SQ_PGM_START_GS); + LINK_STATES(SQ_PGM_RESOURCES_GS); + LINK_STATES(SQ_PGM_START_ES); + LINK_STATES(SQ_PGM_RESOURCES_ES); + LINK_STATES(SQ_PGM_START_FS); + LINK_STATES(SQ_PGM_RESOURCES_FS); + LINK_STATES(SQ_ESGS_RING_ITEMSIZE); + LINK_STATES(SQ_GSVS_RING_ITEMSIZE); + LINK_STATES(SQ_ESTMP_RING_ITEMSIZE); + LINK_STATES(SQ_GSTMP_RING_ITEMSIZE); + LINK_STATES(SQ_VSTMP_RING_ITEMSIZE); + LINK_STATES(SQ_PSTMP_RING_ITEMSIZE); + LINK_STATES(SQ_FBUF_RING_ITEMSIZE); + LINK_STATES(SQ_REDUC_RING_ITEMSIZE); + LINK_STATES(SQ_GS_VERT_ITEMSIZE); + LINK_STATES(SQ_PGM_CF_OFFSET_PS); + LINK_STATES(SQ_PGM_CF_OFFSET_VS); + LINK_STATES(SQ_PGM_CF_OFFSET_GS); + LINK_STATES(SQ_PGM_CF_OFFSET_ES); + LINK_STATES(SQ_PGM_CF_OFFSET_FS); + + LINK_STATES(PA_SU_POINT_SIZE); + LINK_STATES(PA_SU_POINT_MINMAX); + LINK_STATES(PA_SU_LINE_CNTL); + LINK_STATES(PA_SC_LINE_STIPPLE); + LINK_STATES(VGT_OUTPUT_PATH_CNTL); + + LINK_STATES(VGT_GS_MODE); + + LINK_STATES(PA_SC_MPASS_PS_CNTL); + LINK_STATES(PA_SC_MODE_CNTL); + + LINK_STATES(VGT_PRIMITIVEID_EN); + LINK_STATES(VGT_DMA_NUM_INSTANCES); + + LINK_STATES(VGT_MULTI_PRIM_IB_RESET_EN); + + LINK_STATES(VGT_INSTANCE_STEP_RATE_0); + LINK_STATES(VGT_INSTANCE_STEP_RATE_1); + + LINK_STATES(VGT_STRMOUT_EN); + LINK_STATES(VGT_REUSE_OFF); + + LINK_STATES(PA_SC_LINE_CNTL); + LINK_STATES(PA_SC_AA_CONFIG); + LINK_STATES(PA_SU_VTX_CNTL); + LINK_STATES(PA_CL_GB_VERT_CLIP_ADJ); + LINK_STATES(PA_CL_GB_VERT_DISC_ADJ); + LINK_STATES(PA_CL_GB_HORZ_CLIP_ADJ); + LINK_STATES(PA_CL_GB_HORZ_DISC_ADJ); + LINK_STATES(PA_SC_AA_SAMPLE_LOCS_MCTX); + LINK_STATES(PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX); + + LINK_STATES(CB_CLRCMP_CONTROL); + LINK_STATES(CB_CLRCMP_SRC); + LINK_STATES(CB_CLRCMP_DST); + LINK_STATES(CB_CLRCMP_MSK); + + LINK_STATES(PA_SC_AA_MASK); + + LINK_STATES(VGT_VERTEX_REUSE_BLOCK_CNTL); + LINK_STATES(VGT_OUT_DEALLOC_CNTL); + + LINK_STATES(DB_RENDER_CONTROL); + LINK_STATES(DB_RENDER_OVERRIDE); + + LINK_STATES(DB_HTILE_SURFACE); + + LINK_STATES(DB_ALPHA_TO_MASK); + + LINK_STATES(PA_SU_POLY_OFFSET_DB_FMT_CNTL); + LINK_STATES(PA_SU_POLY_OFFSET_CLAMP); + LINK_STATES(PA_SU_POLY_OFFSET_FRONT_SCALE); + LINK_STATES(PA_SU_POLY_OFFSET_FRONT_OFFSET); + LINK_STATES(PA_SU_POLY_OFFSET_BACK_SCALE); + + pStateListWork->puiValue = (unsigned int*)&(r700->PA_SU_POLY_OFFSET_BACK_OFFSET); + pStateListWork->unOffset = mmPA_SU_POLY_OFFSET_BACK_OFFSET - ASIC_CONTEXT_BASE_INDEX; + pStateListWork->pNext = NULL; /* END OF STATE LIST */ + + /* TODO : may need order sorting in case someone break the order of states in R700_CHIP_CONTEXT. */ + + return GL_TRUE; +} + +GLboolean r700SendContextStates(context_t *context) +{ + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + + ContextState * pState = r700->pStateList; + ContextState * pInit; + unsigned int toSend; + unsigned int ui; + + while(NULL != pState) + { + toSend = 1; + + pInit = pState; + + while(NULL != pState->pNext) + { + if( (pState->pNext->unOffset - pState->unOffset) > 1 ) + { + break; + } + else + { + pState = pState->pNext; + toSend++; + } + }; + + pState = pState->pNext; + + R700_CMDBUF_CHECK_SPACE(toSend + 2); + R700EP3(context, IT_SET_CONTEXT_REG, toSend); + R700E32(context, pInit->unOffset); + + for(ui=0; ui<toSend; ui++) + { + R700E32(context, *(pInit->puiValue)); + pInit = pInit->pNext; + }; + }; + + return GL_TRUE; +} + + + diff --git a/src/mesa/drivers/dri/r600/r700_chip.h b/src/mesa/drivers/dri/r600/r700_chip.h new file mode 100644 index 0000000000..6105e61593 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_chip.h @@ -0,0 +1,456 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + */ + +#ifndef _R700_CHIP_H_ +#define _R700_CHIP_H_ + +#include "r600_reg.h" +#include "r600_reg_auto_r6xx.h" +#include "r600_reg_r6xx.h" +#include "r600_reg_r7xx.h" + +#include "r700_chipoffset.h" + +#define SETfield(x, val, shift, mask) ( (x) = ((x) & ~(mask)) | ((val) << (shift)) ) /* u32All */ +#define CLEARfield(x, mask) ( (x) &= ~(mask) ) +#define SETbit(x, bit) ( (x) |= (bit) ) +#define CLEARbit(x, bit) ( (x) &= ~(bit) ) + +#define R700_TEXTURE_NUMBERUNITS 16 + +/* Enum not show in r600_*.h */ + +#define FETCH_RESOURCE_STRIDE 7 + +#define ASIC_CONFIG_BASE_INDEX 0x2000 +#define ASIC_CONTEXT_BASE_INDEX 0xA000 +#define ASIC_CTL_CONST_BASE_INDEX 0xF3FC + +enum +{ + SQ_ABSOLUTE = 0x00000000, + SQ_RELATIVE = 0x00000001, +}; + +enum +{ + SQ_ALU_SCL_210 = 0x00000000, + SQ_ALU_SCL_122 = 0x00000001, + SQ_ALU_SCL_212 = 0x00000002, + SQ_ALU_SCL_221 = 0x00000003, +}; + +enum +{ + SQ_TEX_UNNORMALIZED = 0x00000000, + SQ_TEX_NORMALIZED = 0x00000001, +}; + +enum +{ + SQ_CF_PIXEL_MRT0 = 0x00000000, + SQ_CF_PIXEL_MRT1 = 0x00000001, + SQ_CF_PIXEL_MRT2 = 0x00000002, + SQ_CF_PIXEL_MRT3 = 0x00000003, + SQ_CF_PIXEL_MRT4 = 0x00000004, + SQ_CF_PIXEL_MRT5 = 0x00000005, + SQ_CF_PIXEL_MRT6 = 0x00000006, + SQ_CF_PIXEL_MRT7 = 0x00000007, + SQ_CF_PIXEL_Z = 0x0000003d, +}; + +typedef enum ENUM_SQ_CF_ARRAY_BASE_POS { +SQ_CF_POS_0 = 0x0000003c, +SQ_CF_POS_1 = 0x0000003d, +SQ_CF_POS_2 = 0x0000003e, +SQ_CF_POS_3 = 0x0000003f, +} ENUM_SQ_CF_ARRAY_BASE_POS; + +enum +{ + PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit = 23, +}; + +enum +{ + TEX_XYFilter_Point = 0x00000000, + TEX_XYFilter_Linear = 0x00000001, + TEX_XYFilter_Cubic = 0x00000002, + TEX_XYFilter_Cleartype = 0x00000003, + + TEX_MipFilter_None = 0x00000000, + TEX_MipFilter_Point = 0x00000001, + TEX_MipFilter_Linear = 0x00000002, +}; + +enum +{ + SQ_EXPORT_WRITE = 0x00000000, + SQ_EXPORT_WRITE_IND = 0x00000001, + SQ_EXPORT_WRITE_ACK = 0x00000002, + SQ_EXPORT_WRITE_IND_ACK = 0x00000003, +}; + +/* --------------------------------- */ + +enum +{ + R700_PM4_PACKET0_NOP = 0x00000000, + R700_PM4_PACKET1_NOP = 0x40000000, + R700_PM4_PACKET2_NOP = 0x80000000, + R700_PM4_PACKET3_NOP = 0xC0000000, +}; + +#define PM4_OPCODE_SET_INDEX_TYPE (R700_PM4_PACKET3_NOP | (IT_INDEX_TYPE << 8)) + +#define PM4_OPCODE_DRAW_INDEX_AUTO (R700_PM4_PACKET3_NOP | (IT_DRAW_INDEX_AUTO << 8)) +#define PM4_OPCODE_DRAW_INDEX_IMMD (R700_PM4_PACKET3_NOP | (IT_DRAW_INDEX_IMMD << 8)) +#define PM4_OPCODE_WAIT_REG_MEM (R700_PM4_PACKET3_NOP | (IT_WAIT_REG_MEM << 8)) +#define PM4_OPCODE_SET_CONTEXT_REG (R700_PM4_PACKET3_NOP | (IT_SET_CONTEXT_REG << 8)) +#define PM4_OPCODE_SET_CONFIG_REG (R700_PM4_PACKET3_NOP | (IT_SET_CONFIG_REG << 8)) +#define PM4_OPCODE_SET_ALU_CONST (R700_PM4_PACKET3_NOP | (IT_SET_ALU_CONST << 8)) +#define PM4_OPCODE_SET_RESOURCE (R700_PM4_PACKET3_NOP | (IT_SET_RESOURCE << 8)) +#define PM4_OPCODE_SET_SAMPLER (R700_PM4_PACKET3_NOP | (IT_SET_SAMPLER << 8)) +#define PM4_OPCODE_CONTEXT_CONTROL (R700_PM4_PACKET3_NOP | (IT_CONTEXT_CONTROL << 8)) + +union UINT_FLOAT +{ + unsigned int u32All; + float f32All; +}; + +typedef struct _TEXTURE_STATE_STRUCT +{ + union UINT_FLOAT SQ_TEX_RESOURCE0; + union UINT_FLOAT SQ_TEX_RESOURCE1; + union UINT_FLOAT SQ_TEX_RESOURCE2; + union UINT_FLOAT SQ_TEX_RESOURCE3; + union UINT_FLOAT SQ_TEX_RESOURCE4; + union UINT_FLOAT SQ_TEX_RESOURCE5; + union UINT_FLOAT SQ_TEX_RESOURCE6; + GLboolean enabled; +} TEXTURE_STATE_STRUCT; + +typedef struct _SAMPLER_STATE_STRUCT +{ + union UINT_FLOAT SQ_TEX_SAMPLER0; + union UINT_FLOAT SQ_TEX_SAMPLER1; + union UINT_FLOAT SQ_TEX_SAMPLER2; + GLboolean enabled; +} SAMPLER_STATE_STRUCT; + +typedef struct _R700_TEXTURE_STATES +{ + TEXTURE_STATE_STRUCT *textures[R700_TEXTURE_NUMBERUNITS]; + SAMPLER_STATE_STRUCT *samplers[R700_TEXTURE_NUMBERUNITS]; +} R700_TEXTURE_STATES; + +typedef struct ContextState +{ + unsigned int * puiValue; + unsigned int unOffset; + struct ContextState * pNext; +} ContextState; + +typedef struct _R700_CHIP_CONTEXT +{ + union UINT_FLOAT DB_DEPTH_SIZE ; /* 0xA000 */ + union UINT_FLOAT DB_DEPTH_VIEW ; /* 0xA001 */ + + union UINT_FLOAT DB_DEPTH_BASE ; /* 0xA003 */ + union UINT_FLOAT DB_DEPTH_INFO ; /* 0xA004 */ + union UINT_FLOAT DB_HTILE_DATA_BASE ; /* 0xA005 */ + + union UINT_FLOAT DB_STENCIL_CLEAR ; /* 0xA00A */ + union UINT_FLOAT DB_DEPTH_CLEAR ; /* 0xA00B */ + + union UINT_FLOAT PA_SC_SCREEN_SCISSOR_TL ; /* 0xA00C */ + union UINT_FLOAT PA_SC_SCREEN_SCISSOR_BR ; /* 0xA00D */ + + union UINT_FLOAT CB_COLOR0_BASE ; /* 0xA010 */ + + union UINT_FLOAT CB_COLOR0_SIZE ; /* 0xA018 */ + + union UINT_FLOAT CB_COLOR0_VIEW ; /* 0xA020 */ + + union UINT_FLOAT CB_COLOR0_INFO ; /* 0xA028 */ + union UINT_FLOAT CB_COLOR1_INFO ; /* 0xA029 */ + union UINT_FLOAT CB_COLOR2_INFO ; /* 0xA02A */ + union UINT_FLOAT CB_COLOR3_INFO ; /* 0xA02B */ + union UINT_FLOAT CB_COLOR4_INFO ; /* 0xA02C */ + union UINT_FLOAT CB_COLOR5_INFO ; /* 0xA02D */ + union UINT_FLOAT CB_COLOR6_INFO ; /* 0xA02E */ + union UINT_FLOAT CB_COLOR7_INFO ; /* 0xA02F */ + + union UINT_FLOAT CB_COLOR0_TILE ; /* 0xA030 */ + + union UINT_FLOAT CB_COLOR0_FRAG ; /* 0xA038 */ + + union UINT_FLOAT CB_COLOR0_MASK ; /* 0xA040 */ + + union UINT_FLOAT PA_SC_WINDOW_OFFSET ; /* 0xA080 */ + union UINT_FLOAT PA_SC_WINDOW_SCISSOR_TL ; /* 0xA081 */ + union UINT_FLOAT PA_SC_WINDOW_SCISSOR_BR ; /* 0xA082 */ + union UINT_FLOAT PA_SC_CLIPRECT_RULE ; /* 0xA083 */ + union UINT_FLOAT PA_SC_CLIPRECT_0_TL ; /* 0xA084 */ + union UINT_FLOAT PA_SC_CLIPRECT_0_BR ; /* 0xA085 */ + union UINT_FLOAT PA_SC_CLIPRECT_1_TL ; /* 0xA086 */ + union UINT_FLOAT PA_SC_CLIPRECT_1_BR ; /* 0xA087 */ + union UINT_FLOAT PA_SC_CLIPRECT_2_TL ; /* 0xA088 */ + union UINT_FLOAT PA_SC_CLIPRECT_2_BR ; /* 0xA089 */ + union UINT_FLOAT PA_SC_CLIPRECT_3_TL ; /* 0xA08A */ + union UINT_FLOAT PA_SC_CLIPRECT_3_BR ; /* 0xA08B */ + + union UINT_FLOAT PA_SC_EDGERULE ; /* 0xA08C */ + + union UINT_FLOAT CB_TARGET_MASK ; /* 0xA08E */ + union UINT_FLOAT CB_SHADER_MASK ; /* 0xA08F */ + union UINT_FLOAT PA_SC_GENERIC_SCISSOR_TL ; /* 0xA090 */ + union UINT_FLOAT PA_SC_GENERIC_SCISSOR_BR ; /* 0xA091 */ + + union UINT_FLOAT PA_SC_VPORT_SCISSOR_0_TL ; /* 0xA094 */ + union UINT_FLOAT PA_SC_VPORT_SCISSOR_0_BR ; /* 0xA095 */ + union UINT_FLOAT PA_SC_VPORT_SCISSOR_1_TL ; /* 0xA096 */ + union UINT_FLOAT PA_SC_VPORT_SCISSOR_1_BR ; /* 0xA097 */ + + union UINT_FLOAT PA_SC_VPORT_ZMIN_0 ; /* 0xA0B4 */ + union UINT_FLOAT PA_SC_VPORT_ZMAX_0 ; /* 0xA0B5 */ + + union UINT_FLOAT SX_MISC ; /* 0xA0D4 */ + + union UINT_FLOAT SQ_VTX_SEMANTIC_0 ; /* 0xA0E0 */ + union UINT_FLOAT SQ_VTX_SEMANTIC_1 ; /* 0xA0E1 */ + union UINT_FLOAT SQ_VTX_SEMANTIC_2 ; /* 0xA0E2 */ + union UINT_FLOAT SQ_VTX_SEMANTIC_3 ; /* 0xA0E3 */ + union UINT_FLOAT SQ_VTX_SEMANTIC_4 ; /* 0xA0E4 */ + union UINT_FLOAT SQ_VTX_SEMANTIC_5 ; /* 0xA0E5 */ + union UINT_FLOAT SQ_VTX_SEMANTIC_6 ; /* 0xA0E6 */ + union UINT_FLOAT SQ_VTX_SEMANTIC_7 ; /* 0xA0E7 */ + union UINT_FLOAT SQ_VTX_SEMANTIC_8 ; /* 0xA0E8 */ + union UINT_FLOAT SQ_VTX_SEMANTIC_9 ; /* 0xA0E9 */ + union UINT_FLOAT SQ_VTX_SEMANTIC_10 ; /* 0xA0EA */ + union UINT_FLOAT SQ_VTX_SEMANTIC_11 ; /* 0xA0EB */ + union UINT_FLOAT SQ_VTX_SEMANTIC_12 ; /* 0xA0EC */ + union UINT_FLOAT SQ_VTX_SEMANTIC_13 ; /* 0xA0ED */ + union UINT_FLOAT SQ_VTX_SEMANTIC_14 ; /* 0xA0EE */ + union UINT_FLOAT SQ_VTX_SEMANTIC_15 ; /* 0xA0EF */ + union UINT_FLOAT SQ_VTX_SEMANTIC_16 ; /* 0xA0F0 */ + union UINT_FLOAT SQ_VTX_SEMANTIC_17 ; /* 0xA0F1 */ + union UINT_FLOAT SQ_VTX_SEMANTIC_18 ; /* 0xA0F2 */ + union UINT_FLOAT SQ_VTX_SEMANTIC_19 ; /* 0xA0F3 */ + union UINT_FLOAT SQ_VTX_SEMANTIC_20 ; /* 0xA0F4 */ + union UINT_FLOAT SQ_VTX_SEMANTIC_21 ; /* 0xA0F5 */ + union UINT_FLOAT SQ_VTX_SEMANTIC_22 ; /* 0xA0F6 */ + union UINT_FLOAT SQ_VTX_SEMANTIC_23 ; /* 0xA0F7 */ + union UINT_FLOAT SQ_VTX_SEMANTIC_24 ; /* 0xA0F8 */ + union UINT_FLOAT SQ_VTX_SEMANTIC_25 ; /* 0xA0F9 */ + union UINT_FLOAT SQ_VTX_SEMANTIC_26 ; /* 0xA0FA */ + union UINT_FLOAT SQ_VTX_SEMANTIC_27 ; /* 0xA0FB */ + union UINT_FLOAT SQ_VTX_SEMANTIC_28 ; /* 0xA0FC */ + union UINT_FLOAT SQ_VTX_SEMANTIC_29 ; /* 0xA0FD */ + union UINT_FLOAT SQ_VTX_SEMANTIC_30 ; /* 0xA0FE */ + union UINT_FLOAT SQ_VTX_SEMANTIC_31 ; /* 0xA0FF */ + + union UINT_FLOAT VGT_MAX_VTX_INDX ; /* 0xA100 */ + union UINT_FLOAT VGT_MIN_VTX_INDX ; /* 0xA101 */ + union UINT_FLOAT VGT_INDX_OFFSET ; /* 0xA102 */ + union UINT_FLOAT VGT_MULTI_PRIM_IB_RESET_INDX; /* 0xA103 */ + union UINT_FLOAT SX_ALPHA_TEST_CONTROL ; /* 0xA104 */ + + union UINT_FLOAT CB_BLEND_RED ; /* 0xA105 */ + union UINT_FLOAT CB_BLEND_GREEN ; /* 0xA106 */ + union UINT_FLOAT CB_BLEND_BLUE ; /* 0xA107 */ + union UINT_FLOAT CB_BLEND_ALPHA ; /* 0xA108 */ + + union UINT_FLOAT PA_CL_VPORT_XSCALE ; /* 0xA10F */ + union UINT_FLOAT PA_CL_VPORT_XOFFSET ; /* 0xA110 */ + union UINT_FLOAT PA_CL_VPORT_YSCALE ; /* 0xA111 */ + union UINT_FLOAT PA_CL_VPORT_YOFFSET ; /* 0xA112 */ + union UINT_FLOAT PA_CL_VPORT_ZSCALE ; /* 0xA113 */ + union UINT_FLOAT PA_CL_VPORT_ZOFFSET ; /* 0xA114 */ + + union UINT_FLOAT SPI_VS_OUT_ID_0 ; /* 0xA185 */ + union UINT_FLOAT SPI_VS_OUT_ID_1 ; /* 0xA186 */ + union UINT_FLOAT SPI_VS_OUT_ID_2 ; /* 0xA187 */ + union UINT_FLOAT SPI_VS_OUT_ID_3 ; /* 0xA188 */ + union UINT_FLOAT SPI_VS_OUT_ID_4 ; /* 0xA189 */ + union UINT_FLOAT SPI_VS_OUT_ID_5 ; /* 0xA18A */ + union UINT_FLOAT SPI_VS_OUT_ID_6 ; /* 0xA18B */ + union UINT_FLOAT SPI_VS_OUT_ID_7 ; /* 0xA18C */ + union UINT_FLOAT SPI_VS_OUT_ID_8 ; /* 0xA18D */ + union UINT_FLOAT SPI_VS_OUT_ID_9 ; /* 0xA18E */ + + union UINT_FLOAT SPI_PS_INPUT_CNTL_0 ; /* 0xA191 */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_1 ; /* 0xA192 */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_2 ; /* 0xA193 */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_3 ; /* 0xA194 */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_4 ; /* 0xA195 */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_5 ; /* 0xA196 */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_6 ; /* 0xA197 */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_7 ; /* 0xA198 */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_8 ; /* 0xA199 */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_9 ; /* 0xA19A */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_10 ; /* 0xA19B */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_11 ; /* 0xA19C */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_12 ; /* 0xA19D */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_13 ; /* 0xA19E */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_14 ; /* 0xA19F */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_15 ; /* 0xA1A0 */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_16 ; /* 0xA1A1 */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_17 ; /* 0xA1A2 */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_18 ; /* 0xA1A3 */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_19 ; /* 0xA1A4 */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_20 ; /* 0xA1A5 */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_21 ; /* 0xA1A6 */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_22 ; /* 0xA1A7 */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_23 ; /* 0xA1A8 */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_24 ; /* 0xA1A9 */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_25 ; /* 0xA1AA */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_26 ; /* 0xA1AB */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_27 ; /* 0xA1AC */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_28 ; /* 0xA1AD */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_29 ; /* 0xA1AE */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_30 ; /* 0xA1AF */ + union UINT_FLOAT SPI_PS_INPUT_CNTL_31 ; /* 0xA1B0 */ + union UINT_FLOAT SPI_VS_OUT_CONFIG ; /* 0xA1B1 */ + union UINT_FLOAT SPI_THREAD_GROUPING ; /* 0xA1B2 */ + union UINT_FLOAT SPI_PS_IN_CONTROL_0 ; /* 0xA1B3 */ + union UINT_FLOAT SPI_PS_IN_CONTROL_1 ; /* 0xA1B4 */ + + union UINT_FLOAT SPI_INPUT_Z ; /* 0xA1B6 */ + union UINT_FLOAT SPI_FOG_CNTL ; /* 0xA1B7 */ + + union UINT_FLOAT CB_BLEND0_CONTROL ; /* 0xA1E0 */ + + union UINT_FLOAT CB_SHADER_CONTROL ; /* 0xA1E8 */ + + /*union UINT_FLOAT VGT_DRAW_INITIATOR*/ ; /* 0xA1FC */ + + union UINT_FLOAT DB_DEPTH_CONTROL ; /* 0xA200 */ + + union UINT_FLOAT CB_COLOR_CONTROL ; /* 0xA202 */ + union UINT_FLOAT DB_SHADER_CONTROL ; /* 0xA203 */ + union UINT_FLOAT PA_CL_CLIP_CNTL ; /* 0xA204 */ + union UINT_FLOAT PA_SU_SC_MODE_CNTL ; /* 0xA205 */ + union UINT_FLOAT PA_CL_VTE_CNTL ; /* 0xA206 */ + union UINT_FLOAT PA_CL_VS_OUT_CNTL ; /* 0xA207 */ + union UINT_FLOAT PA_CL_NANINF_CNTL ; /* 0xA208 */ + + union UINT_FLOAT SQ_PGM_START_PS ; /* 0xA210 */ + union UINT_FLOAT SQ_PGM_RESOURCES_PS ; /* 0xA214 */ + union UINT_FLOAT SQ_PGM_EXPORTS_PS ; /* 0xA215 */ + union UINT_FLOAT SQ_PGM_START_VS ; /* 0xA216 */ + union UINT_FLOAT SQ_PGM_RESOURCES_VS ; /* 0xA21A */ + union UINT_FLOAT SQ_PGM_START_GS ; /* 0xA21B */ + union UINT_FLOAT SQ_PGM_RESOURCES_GS ; /* 0xA21F */ + union UINT_FLOAT SQ_PGM_START_ES ; /* 0xA220 */ + union UINT_FLOAT SQ_PGM_RESOURCES_ES ; /* 0xA224 */ + union UINT_FLOAT SQ_PGM_START_FS ; /* 0xA225 */ + union UINT_FLOAT SQ_PGM_RESOURCES_FS ; /* 0xA229 */ + union UINT_FLOAT SQ_ESGS_RING_ITEMSIZE ; /* 0xA22A */ + union UINT_FLOAT SQ_GSVS_RING_ITEMSIZE ; /* 0xA22B */ + union UINT_FLOAT SQ_ESTMP_RING_ITEMSIZE ; /* 0xA22C */ + union UINT_FLOAT SQ_GSTMP_RING_ITEMSIZE ; /* 0xA22D */ + union UINT_FLOAT SQ_VSTMP_RING_ITEMSIZE ; /* 0xA22E */ + union UINT_FLOAT SQ_PSTMP_RING_ITEMSIZE ; /* 0xA22F */ + union UINT_FLOAT SQ_FBUF_RING_ITEMSIZE ; /* 0xA230 */ + union UINT_FLOAT SQ_REDUC_RING_ITEMSIZE ; /* 0xA231 */ + union UINT_FLOAT SQ_GS_VERT_ITEMSIZE ; /* 0xA232 */ + union UINT_FLOAT SQ_PGM_CF_OFFSET_PS ; /* 0xA233 */ + union UINT_FLOAT SQ_PGM_CF_OFFSET_VS ; /* 0xA234 */ + union UINT_FLOAT SQ_PGM_CF_OFFSET_GS ; /* 0xA235 */ + union UINT_FLOAT SQ_PGM_CF_OFFSET_ES ; /* 0xA236 */ + union UINT_FLOAT SQ_PGM_CF_OFFSET_FS ; /* 0xA237 */ + + union UINT_FLOAT PA_SU_POINT_SIZE ; /* 0xA280 */ + union UINT_FLOAT PA_SU_POINT_MINMAX ; /* 0xA281 */ + union UINT_FLOAT PA_SU_LINE_CNTL ; /* 0xA282 */ + union UINT_FLOAT PA_SC_LINE_STIPPLE ; /* 0xA283 */ + union UINT_FLOAT VGT_OUTPUT_PATH_CNTL ; /* 0xA284 */ + + union UINT_FLOAT VGT_GS_MODE ; /* 0xA290 */ + + union UINT_FLOAT PA_SC_MPASS_PS_CNTL ; /* 0xA292 */ + union UINT_FLOAT PA_SC_MODE_CNTL ; /* 0xA293 */ + + union UINT_FLOAT VGT_PRIMITIVEID_EN ; /* 0xA2A1 */ + union UINT_FLOAT VGT_DMA_NUM_INSTANCES ; /* 0xA2A2 */ + + union UINT_FLOAT VGT_MULTI_PRIM_IB_RESET_EN; /* 0xA2A5 */ + + union UINT_FLOAT VGT_INSTANCE_STEP_RATE_0 ; /* 0xA2A8 */ + union UINT_FLOAT VGT_INSTANCE_STEP_RATE_1 ; /* 0xA2A9 */ + + union UINT_FLOAT VGT_STRMOUT_EN ; /* 0xA2AC */ + union UINT_FLOAT VGT_REUSE_OFF ; /* 0xA2AD */ + + union UINT_FLOAT PA_SC_LINE_CNTL ; /* 0xA300 */ + union UINT_FLOAT PA_SC_AA_CONFIG ; /* 0xA301 */ + union UINT_FLOAT PA_SU_VTX_CNTL ; /* 0xA302 */ + union UINT_FLOAT PA_CL_GB_VERT_CLIP_ADJ ; /* 0xA303 */ + union UINT_FLOAT PA_CL_GB_VERT_DISC_ADJ ; /* 0xA304 */ + union UINT_FLOAT PA_CL_GB_HORZ_CLIP_ADJ ; /* 0xA305 */ + union UINT_FLOAT PA_CL_GB_HORZ_DISC_ADJ ; /* 0xA306 */ + union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_MCTX ; /* 0xA307 */ + union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX; /* 0xA308 */ + + union UINT_FLOAT CB_CLRCMP_CONTROL ; /* 0xA30C */ + union UINT_FLOAT CB_CLRCMP_SRC ; /* 0xA30D */ + union UINT_FLOAT CB_CLRCMP_DST ; /* 0xA30E */ + union UINT_FLOAT CB_CLRCMP_MSK ; /* 0xA30F */ + + union UINT_FLOAT PA_SC_AA_MASK ; /* 0xA312 */ + + union UINT_FLOAT VGT_VERTEX_REUSE_BLOCK_CNTL; /* 0xA316 */ + union UINT_FLOAT VGT_OUT_DEALLOC_CNTL ; /* 0xA317 */ + + union UINT_FLOAT DB_RENDER_CONTROL ; /* 0xA343 */ + union UINT_FLOAT DB_RENDER_OVERRIDE ; /* 0xA344 */ + + union UINT_FLOAT DB_HTILE_SURFACE ; /* 0xA349 */ + + union UINT_FLOAT DB_ALPHA_TO_MASK ; /* 0xA351 */ + + union UINT_FLOAT PA_SU_POLY_OFFSET_DB_FMT_CNTL; /* 0xA37E */ + union UINT_FLOAT PA_SU_POLY_OFFSET_CLAMP ; /* 0xA37F */ + union UINT_FLOAT PA_SU_POLY_OFFSET_FRONT_SCALE; /* 0xA380 */ + union UINT_FLOAT PA_SU_POLY_OFFSET_FRONT_OFFSET; /* 0xA381 */ + union UINT_FLOAT PA_SU_POLY_OFFSET_BACK_SCALE; /* 0xA382 */ + union UINT_FLOAT PA_SU_POLY_OFFSET_BACK_OFFSET; /* 0xA383 */ + + ContextState * pStateList; + + R700_TEXTURE_STATES texture_states; + +} R700_CHIP_CONTEXT; + +#define R700_CONTEXT_STATES(context) ((R700_CHIP_CONTEXT *)(context->chipobj.pvChipObj)) + +extern GLboolean r700InitChipObject(context_t *context); +extern GLboolean r700SendContextStates(context_t *context); + +#endif /* _R700_CHIP_H_ */ + diff --git a/src/mesa/drivers/dri/r600/r700_chipoffset.h b/src/mesa/drivers/dri/r600/r700_chipoffset.h new file mode 100644 index 0000000000..9050b9a715 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_chipoffset.h @@ -0,0 +1,684 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com> + */ + +#ifndef _R700_CHIPOFFSET_H_ +#define _R700_CHIPOFFSET_H_ + +#define mmWAIT_UNTIL 0x2010 +#define mmSCRATCH_REG0 0x2140 +#define mmGUI_SCRATCH_REG0 0x2140 +#define mmSCRATCH_REG1 0x2141 +#define mmGUI_SCRATCH_REG1 0x2141 +#define mmSCRATCH_REG2 0x2142 +#define mmGUI_SCRATCH_REG2 0x2142 +#define mmSCRATCH_REG3 0x2143 +#define mmGUI_SCRATCH_REG3 0x2143 +#define mmSCRATCH_REG4 0x2144 +#define mmGUI_SCRATCH_REG4 0x2144 +#define mmSCRATCH_REG5 0x2145 +#define mmGUI_SCRATCH_REG5 0x2145 +#define mmSCRATCH_REG6 0x2146 +#define mmGUI_SCRATCH_REG6 0x2146 +#define mmSCRATCH_REG7 0x2147 +#define mmGUI_SCRATCH_REG7 0x2147 + +#define mmCP_COHER_CNTL 0x217C +#define mmCP_COHER_SIZE 0x217D +#define mmCP_COHER_BASE 0x217E +#define mmCP_COHER_STATUS 0x217F + +#define mmPA_CL_VPORT_XSCALE 0xA10F +#define mmPA_CL_VPORT_XOFFSET 0xA110 +#define mmPA_CL_VPORT_YSCALE 0xA111 +#define mmPA_CL_VPORT_YOFFSET 0xA112 +#define mmPA_CL_VPORT_ZSCALE 0xA113 +#define mmPA_CL_VPORT_ZOFFSET 0xA114 +#define mmPA_CL_VPORT_XSCALE_1 0xA115 +#define mmPA_CL_VPORT_XSCALE_2 0xA11B +#define mmPA_CL_VPORT_XSCALE_3 0xA121 +#define mmPA_CL_VPORT_XSCALE_4 0xA127 +#define mmPA_CL_VPORT_XSCALE_5 0xA12D +#define mmPA_CL_VPORT_XSCALE_6 0xA133 +#define mmPA_CL_VPORT_XSCALE_7 0xA139 +#define mmPA_CL_VPORT_XSCALE_8 0xA13F +#define mmPA_CL_VPORT_XSCALE_9 0xA145 +#define mmPA_CL_VPORT_XSCALE_10 0xA14B +#define mmPA_CL_VPORT_XSCALE_11 0xA151 +#define mmPA_CL_VPORT_XSCALE_12 0xA157 +#define mmPA_CL_VPORT_XSCALE_13 0xA15D +#define mmPA_CL_VPORT_XSCALE_14 0xA163 +#define mmPA_CL_VPORT_XSCALE_15 0xA169 +#define mmPA_CL_VPORT_XOFFSET_1 0xA116 +#define mmPA_CL_VPORT_XOFFSET_2 0xA11C +#define mmPA_CL_VPORT_XOFFSET_3 0xA122 +#define mmPA_CL_VPORT_XOFFSET_4 0xA128 +#define mmPA_CL_VPORT_XOFFSET_5 0xA12E +#define mmPA_CL_VPORT_XOFFSET_6 0xA134 +#define mmPA_CL_VPORT_XOFFSET_7 0xA13A +#define mmPA_CL_VPORT_XOFFSET_8 0xA140 +#define mmPA_CL_VPORT_XOFFSET_9 0xA146 +#define mmPA_CL_VPORT_XOFFSET_10 0xA14C +#define mmPA_CL_VPORT_XOFFSET_11 0xA152 +#define mmPA_CL_VPORT_XOFFSET_12 0xA158 +#define mmPA_CL_VPORT_XOFFSET_13 0xA15E +#define mmPA_CL_VPORT_XOFFSET_14 0xA164 +#define mmPA_CL_VPORT_XOFFSET_15 0xA16A +#define mmPA_CL_VPORT_YSCALE_1 0xA117 +#define mmPA_CL_VPORT_YSCALE_2 0xA11D +#define mmPA_CL_VPORT_YSCALE_3 0xA123 +#define mmPA_CL_VPORT_YSCALE_4 0xA129 +#define mmPA_CL_VPORT_YSCALE_5 0xA12F +#define mmPA_CL_VPORT_YSCALE_6 0xA135 +#define mmPA_CL_VPORT_YSCALE_7 0xA13B +#define mmPA_CL_VPORT_YSCALE_8 0xA141 +#define mmPA_CL_VPORT_YSCALE_9 0xA147 +#define mmPA_CL_VPORT_YSCALE_10 0xA14D +#define mmPA_CL_VPORT_YSCALE_11 0xA153 +#define mmPA_CL_VPORT_YSCALE_12 0xA159 +#define mmPA_CL_VPORT_YSCALE_13 0xA15F +#define mmPA_CL_VPORT_YSCALE_14 0xA165 +#define mmPA_CL_VPORT_YSCALE_15 0xA16B +#define mmPA_CL_VPORT_YOFFSET_1 0xA118 +#define mmPA_CL_VPORT_YOFFSET_2 0xA11E +#define mmPA_CL_VPORT_YOFFSET_3 0xA124 +#define mmPA_CL_VPORT_YOFFSET_4 0xA12A +#define mmPA_CL_VPORT_YOFFSET_5 0xA130 +#define mmPA_CL_VPORT_YOFFSET_6 0xA136 +#define mmPA_CL_VPORT_YOFFSET_7 0xA13C +#define mmPA_CL_VPORT_YOFFSET_8 0xA142 +#define mmPA_CL_VPORT_YOFFSET_9 0xA148 +#define mmPA_CL_VPORT_YOFFSET_10 0xA14E +#define mmPA_CL_VPORT_YOFFSET_11 0xA154 +#define mmPA_CL_VPORT_YOFFSET_12 0xA15A +#define mmPA_CL_VPORT_YOFFSET_13 0xA160 +#define mmPA_CL_VPORT_YOFFSET_14 0xA166 +#define mmPA_CL_VPORT_YOFFSET_15 0xA16C +#define mmPA_CL_VPORT_ZSCALE_1 0xA119 +#define mmPA_CL_VPORT_ZSCALE_2 0xA11F +#define mmPA_CL_VPORT_ZSCALE_3 0xA125 +#define mmPA_CL_VPORT_ZSCALE_4 0xA12B +#define mmPA_CL_VPORT_ZSCALE_5 0xA131 +#define mmPA_CL_VPORT_ZSCALE_6 0xA137 +#define mmPA_CL_VPORT_ZSCALE_7 0xA13D +#define mmPA_CL_VPORT_ZSCALE_8 0xA143 +#define mmPA_CL_VPORT_ZSCALE_9 0xA149 +#define mmPA_CL_VPORT_ZSCALE_10 0xA14F +#define mmPA_CL_VPORT_ZSCALE_11 0xA155 +#define mmPA_CL_VPORT_ZSCALE_12 0xA15B +#define mmPA_CL_VPORT_ZSCALE_13 0xA161 +#define mmPA_CL_VPORT_ZSCALE_14 0xA167 +#define mmPA_CL_VPORT_ZSCALE_15 0xA16D +#define mmPA_CL_VPORT_ZOFFSET_1 0xA11A +#define mmPA_CL_VPORT_ZOFFSET_2 0xA120 +#define mmPA_CL_VPORT_ZOFFSET_3 0xA126 +#define mmPA_CL_VPORT_ZOFFSET_4 0xA12C +#define mmPA_CL_VPORT_ZOFFSET_5 0xA132 +#define mmPA_CL_VPORT_ZOFFSET_6 0xA138 +#define mmPA_CL_VPORT_ZOFFSET_7 0xA13E +#define mmPA_CL_VPORT_ZOFFSET_8 0xA144 +#define mmPA_CL_VPORT_ZOFFSET_9 0xA14A +#define mmPA_CL_VPORT_ZOFFSET_10 0xA150 +#define mmPA_CL_VPORT_ZOFFSET_11 0xA156 +#define mmPA_CL_VPORT_ZOFFSET_12 0xA15C +#define mmPA_CL_VPORT_ZOFFSET_13 0xA162 +#define mmPA_CL_VPORT_ZOFFSET_14 0xA168 +#define mmPA_CL_VPORT_ZOFFSET_15 0xA16E +#define mmPA_CL_VTE_CNTL 0xA206 +#define mmPA_CL_VS_OUT_CNTL 0xA207 +#define mmPA_CL_NANINF_CNTL 0xA208 +#define mmPA_CL_CLIP_CNTL 0xA204 +#define mmPA_CL_GB_VERT_CLIP_ADJ 0xA303 +#define mmPA_CL_GB_VERT_DISC_ADJ 0xA304 +#define mmPA_CL_GB_HORZ_CLIP_ADJ 0xA305 +#define mmPA_CL_GB_HORZ_DISC_ADJ 0xA306 +#define mmPA_CL_UCP_0_X 0xA388 +#define mmPA_CL_UCP_0_Y 0xA389 +#define mmPA_CL_UCP_0_Z 0xA38A +#define mmPA_CL_UCP_0_W 0xA38B +#define mmPA_CL_UCP_1_X 0xA38C +#define mmPA_CL_UCP_1_Y 0xA38D +#define mmPA_CL_UCP_1_Z 0xA38E +#define mmPA_CL_UCP_1_W 0xA38F +#define mmPA_CL_UCP_2_X 0xA390 +#define mmPA_CL_UCP_2_Y 0xA391 +#define mmPA_CL_UCP_2_Z 0xA392 +#define mmPA_CL_UCP_2_W 0xA393 +#define mmPA_CL_UCP_3_X 0xA394 +#define mmPA_CL_UCP_3_Y 0xA395 +#define mmPA_CL_UCP_3_Z 0xA396 +#define mmPA_CL_UCP_3_W 0xA397 +#define mmPA_CL_UCP_4_X 0xA398 +#define mmPA_CL_UCP_4_Y 0xA399 +#define mmPA_CL_UCP_4_Z 0xA39A +#define mmPA_CL_UCP_4_W 0xA39B +#define mmPA_CL_UCP_5_X 0xA39C +#define mmPA_CL_UCP_5_Y 0xA39D +#define mmPA_CL_UCP_5_Z 0xA39E +#define mmPA_CL_UCP_5_W 0xA39F +#define mmPA_CL_POINT_X_RAD 0xA384 +#define mmPA_CL_POINT_Y_RAD 0xA385 +#define mmPA_CL_POINT_SIZE 0xA386 +#define mmPA_CL_POINT_CULL_RAD 0xA387 + +#define mmPA_SU_VTX_CNTL 0xA302 +#define mmPA_SU_POINT_SIZE 0xA280 +#define mmPA_SU_POINT_MINMAX 0xA281 +#define mmPA_SU_LINE_CNTL 0xA282 +#define mmPA_SU_SC_MODE_CNTL 0xA205 +#define mmPA_SU_POLY_OFFSET_DB_FMT_CNTL 0xA37E +#define mmPA_SU_POLY_OFFSET_CLAMP 0xA37F +#define mmPA_SU_POLY_OFFSET_FRONT_SCALE 0xA380 +#define mmPA_SU_POLY_OFFSET_FRONT_OFFSET 0xA381 +#define mmPA_SU_POLY_OFFSET_BACK_SCALE 0xA382 +#define mmPA_SU_POLY_OFFSET_BACK_OFFSET 0xA383 + +#define mmPA_SC_WINDOW_OFFSET 0xA080 +#define mmPA_SC_AA_CONFIG 0xA301 +#define mmPA_SC_AA_MASK 0xA312 +#define mmPA_SC_AA_SAMPLE_LOCS_MCTX 0xA307 +#define mmPA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX 0xA308 +#define mmPA_SC_LINE_STIPPLE 0xA283 +#define mmPA_SC_LINE_CNTL 0xA300 +#define mmPA_SC_SCREEN_SCISSOR_TL 0xA00C +#define mmPA_SC_SCREEN_SCISSOR_BR 0xA00D +#define mmPA_SC_WINDOW_SCISSOR_TL 0xA081 +#define mmPA_SC_WINDOW_SCISSOR_BR 0xA082 +#define mmPA_SC_CLIPRECT_RULE 0xA083 +#define mmPA_SC_CLIPRECT_0_TL 0xA084 +#define mmPA_SC_CLIPRECT_0_BR 0xA085 +#define mmPA_SC_CLIPRECT_1_TL 0xA086 +#define mmPA_SC_CLIPRECT_1_BR 0xA087 +#define mmPA_SC_CLIPRECT_2_TL 0xA088 +#define mmPA_SC_CLIPRECT_2_BR 0xA089 +#define mmPA_SC_CLIPRECT_3_TL 0xA08A +#define mmPA_SC_CLIPRECT_3_BR 0xA08B +#define mmPA_SC_EDGERULE 0xA08C +#define mmPA_SC_GENERIC_SCISSOR_TL 0xA090 +#define mmPA_SC_GENERIC_SCISSOR_BR 0xA091 +#define mmPA_SC_VPORT_SCISSOR_0_TL 0xA094 +#define mmPA_SC_VPORT_SCISSOR_1_TL 0xA096 +#define mmPA_SC_VPORT_SCISSOR_2_TL 0xA098 +#define mmPA_SC_VPORT_SCISSOR_3_TL 0xA09A +#define mmPA_SC_VPORT_SCISSOR_4_TL 0xA09C +#define mmPA_SC_VPORT_SCISSOR_5_TL 0xA09E +#define mmPA_SC_VPORT_SCISSOR_6_TL 0xA0A0 +#define mmPA_SC_VPORT_SCISSOR_7_TL 0xA0A2 +#define mmPA_SC_VPORT_SCISSOR_8_TL 0xA0A4 +#define mmPA_SC_VPORT_SCISSOR_9_TL 0xA0A6 +#define mmPA_SC_VPORT_SCISSOR_10_TL 0xA0A8 +#define mmPA_SC_VPORT_SCISSOR_11_TL 0xA0AA +#define mmPA_SC_VPORT_SCISSOR_12_TL 0xA0AC +#define mmPA_SC_VPORT_SCISSOR_13_TL 0xA0AE +#define mmPA_SC_VPORT_SCISSOR_14_TL 0xA0B0 +#define mmPA_SC_VPORT_SCISSOR_15_TL 0xA0B2 +#define mmPA_SC_VPORT_SCISSOR_0_BR 0xA095 +#define mmPA_SC_VPORT_SCISSOR_1_BR 0xA097 +#define mmPA_SC_VPORT_SCISSOR_2_BR 0xA099 +#define mmPA_SC_VPORT_SCISSOR_3_BR 0xA09B +#define mmPA_SC_VPORT_SCISSOR_4_BR 0xA09D +#define mmPA_SC_VPORT_SCISSOR_5_BR 0xA09F +#define mmPA_SC_VPORT_SCISSOR_6_BR 0xA0A1 +#define mmPA_SC_VPORT_SCISSOR_7_BR 0xA0A3 +#define mmPA_SC_VPORT_SCISSOR_8_BR 0xA0A5 +#define mmPA_SC_VPORT_SCISSOR_9_BR 0xA0A7 +#define mmPA_SC_VPORT_SCISSOR_10_BR 0xA0A9 +#define mmPA_SC_VPORT_SCISSOR_11_BR 0xA0AB +#define mmPA_SC_VPORT_SCISSOR_12_BR 0xA0AD +#define mmPA_SC_VPORT_SCISSOR_13_BR 0xA0AF +#define mmPA_SC_VPORT_SCISSOR_14_BR 0xA0B1 +#define mmPA_SC_VPORT_SCISSOR_15_BR 0xA0B3 +#define mmPA_SC_VPORT_ZMIN_0 0xA0B4 +#define mmPA_SC_VPORT_ZMIN_1 0xA0B6 +#define mmPA_SC_VPORT_ZMIN_2 0xA0B8 +#define mmPA_SC_VPORT_ZMIN_3 0xA0BA +#define mmPA_SC_VPORT_ZMIN_4 0xA0BC +#define mmPA_SC_VPORT_ZMIN_5 0xA0BE +#define mmPA_SC_VPORT_ZMIN_6 0xA0C0 +#define mmPA_SC_VPORT_ZMIN_7 0xA0C2 +#define mmPA_SC_VPORT_ZMIN_8 0xA0C4 +#define mmPA_SC_VPORT_ZMIN_9 0xA0C6 +#define mmPA_SC_VPORT_ZMIN_10 0xA0C8 +#define mmPA_SC_VPORT_ZMIN_11 0xA0CA +#define mmPA_SC_VPORT_ZMIN_12 0xA0CC +#define mmPA_SC_VPORT_ZMIN_13 0xA0CE +#define mmPA_SC_VPORT_ZMIN_14 0xA0D0 +#define mmPA_SC_VPORT_ZMIN_15 0xA0D2 +#define mmPA_SC_VPORT_ZMAX_0 0xA0B5 +#define mmPA_SC_VPORT_ZMAX_1 0xA0B7 +#define mmPA_SC_VPORT_ZMAX_2 0xA0B9 +#define mmPA_SC_VPORT_ZMAX_3 0xA0BB +#define mmPA_SC_VPORT_ZMAX_4 0xA0BD +#define mmPA_SC_VPORT_ZMAX_5 0xA0BF +#define mmPA_SC_VPORT_ZMAX_6 0xA0C1 +#define mmPA_SC_VPORT_ZMAX_7 0xA0C3 +#define mmPA_SC_VPORT_ZMAX_8 0xA0C5 +#define mmPA_SC_VPORT_ZMAX_9 0xA0C7 +#define mmPA_SC_VPORT_ZMAX_10 0xA0C9 +#define mmPA_SC_VPORT_ZMAX_11 0xA0CB +#define mmPA_SC_VPORT_ZMAX_12 0xA0CD +#define mmPA_SC_VPORT_ZMAX_13 0xA0CF +#define mmPA_SC_VPORT_ZMAX_14 0xA0D1 +#define mmPA_SC_VPORT_ZMAX_15 0xA0D3 +#define mmPA_SC_MODE_CNTL 0xA293 +#define mmPA_SC_MPASS_PS_CNTL 0xA292 + +#define mmVGT_DRAW_INITIATOR 0xA1FC +#define mmVGT_EVENT_INITIATOR 0xA2A4 +#define mmVGT_EVENT_ADDRESS_REG 0xA1FE +#define mmVGT_DMA_BASE_HI 0xA1F9 +#define mmVGT_DMA_BASE 0xA1FA +#define mmVGT_DMA_INDEX_TYPE 0xA29F +#define mmVGT_DMA_NUM_INSTANCES 0xA2A2 +#define mmVGT_DMA_SIZE 0xA29D + +#define mmVGT_IMMED_DATA 0xA1FD +#define mmVGT_INDEX_TYPE 0x2257 +#define mmVGT_NUM_INDICES 0x225C +#define mmVGT_NUM_INSTANCES 0x225D +#define mmVGT_PRIMITIVE_TYPE 0x2256 +#define mmVGT_PRIMITIVEID_EN 0xA2A1 +#define mmVGT_VTX_CNT_EN 0xA2AE +#define mmVGT_REUSE_OFF 0xA2AD +#define mmVGT_INSTANCE_STEP_RATE_0 0xA2A8 +#define mmVGT_INSTANCE_STEP_RATE_1 0xA2A9 +#define mmVGT_MAX_VTX_INDX 0xA100 +#define mmVGT_MIN_VTX_INDX 0xA101 +#define mmVGT_INDX_OFFSET 0xA102 +#define mmVGT_VERTEX_REUSE_BLOCK_CNTL 0xA316 +#define mmVGT_OUT_DEALLOC_CNTL 0xA317 +#define mmVGT_MULTI_PRIM_IB_RESET_INDX 0xA103 +#define mmVGT_MULTI_PRIM_IB_RESET_EN 0xA2A5 +#define mmVGT_ENHANCE 0xA294 +#define mmVGT_OUTPUT_PATH_CNTL 0xA284 +#define mmVGT_HOS_CNTL 0xA285 +#define mmVGT_HOS_MAX_TESS_LEVEL 0xA286 +#define mmVGT_HOS_MIN_TESS_LEVEL 0xA287 +#define mmVGT_HOS_REUSE_DEPTH 0xA288 +#define mmVGT_GROUP_PRIM_TYPE 0xA289 +#define mmVGT_GROUP_FIRST_DECR 0xA28A +#define mmVGT_GROUP_DECR 0xA28B +#define mmVGT_GROUP_VECT_0_CNTL 0xA28C +#define mmVGT_GROUP_VECT_1_CNTL 0xA28D +#define mmVGT_GROUP_VECT_0_FMT_CNTL 0xA28E +#define mmVGT_GROUP_VECT_1_FMT_CNTL 0xA28F +#define mmVGT_GS_MODE 0xA290 +#define mmVGT_GS_OUT_PRIM_TYPE 0xA29B + +#define mmVGT_STRMOUT_EN 0xA2AC +#define mmVGT_STRMOUT_BUFFER_SIZE_0 0xA2B4 +#define mmVGT_STRMOUT_BUFFER_SIZE_1 0xA2B8 +#define mmVGT_STRMOUT_BUFFER_SIZE_2 0xA2BC +#define mmVGT_STRMOUT_BUFFER_SIZE_3 0xA2C0 +#define mmVGT_STRMOUT_BUFFER_OFFSET_0 0xA2B7 +#define mmVGT_STRMOUT_BUFFER_OFFSET_1 0xA2BB +#define mmVGT_STRMOUT_BUFFER_OFFSET_2 0xA2BF +#define mmVGT_STRMOUT_BUFFER_OFFSET_3 0xA2C3 +#define mmVGT_STRMOUT_VTX_STRIDE_0 0xA2B5 +#define mmVGT_STRMOUT_VTX_STRIDE_1 0xA2B9 +#define mmVGT_STRMOUT_VTX_STRIDE_2 0xA2BD +#define mmVGT_STRMOUT_VTX_STRIDE_3 0xA2C1 +#define mmVGT_STRMOUT_BUFFER_BASE_0 0xA2B6 +#define mmVGT_STRMOUT_BUFFER_BASE_1 0xA2BA +#define mmVGT_STRMOUT_BUFFER_BASE_2 0xA2BE +#define mmVGT_STRMOUT_BUFFER_BASE_3 0xA2C2 +#define mmVGT_STRMOUT_BUFFER_EN 0xA2C8 +#define mmVGT_STRMOUT_BASE_OFFSET_0 0xA2C4 +#define mmVGT_STRMOUT_BASE_OFFSET_1 0xA2C5 +#define mmVGT_STRMOUT_BASE_OFFSET_2 0xA2C6 +#define mmVGT_STRMOUT_BASE_OFFSET_3 0xA2C7 +#define mmVGT_STRMOUT_BASE_OFFSET_HI_0 0xA2D1 +#define mmVGT_STRMOUT_BASE_OFFSET_HI_1 0xA2D2 +#define mmVGT_STRMOUT_BASE_OFFSET_HI_2 0xA2D3 +#define mmVGT_STRMOUT_BASE_OFFSET_HI_3 0xA2D4 +#define mmVGT_STRMOUT_DRAW_OPAQUE_OFFSET 0xA2CA +#define mmVGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE 0xA2CB +#define mmVGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE 0xA2CC + +#define mmSQ_PGM_START_PS 0xA210 +#define mmSQ_PGM_CF_OFFSET_PS 0xA233 +#define mmSQ_PGM_RESOURCES_PS 0xA214 +#define mmSQ_PGM_EXPORTS_PS 0xA215 +#define mmSQ_PGM_START_VS 0xA216 +#define mmSQ_PGM_CF_OFFSET_VS 0xA234 +#define mmSQ_PGM_RESOURCES_VS 0xA21A +#define mmSQ_PGM_START_GS 0xA21B +#define mmSQ_PGM_CF_OFFSET_GS 0xA235 +#define mmSQ_PGM_RESOURCES_GS 0xA21F +#define mmSQ_PGM_START_ES 0xA220 +#define mmSQ_PGM_CF_OFFSET_ES 0xA236 +#define mmSQ_PGM_RESOURCES_ES 0xA224 +#define mmSQ_PGM_START_FS 0xA225 +#define mmSQ_PGM_CF_OFFSET_FS 0xA237 +#define mmSQ_PGM_RESOURCES_FS 0xA229 +#define mmSQ_ESGS_RING_ITEMSIZE 0xA22A +#define mmSQ_GSVS_RING_ITEMSIZE 0xA22B +#define mmSQ_ESTMP_RING_ITEMSIZE 0xA22C +#define mmSQ_GSTMP_RING_ITEMSIZE 0xA22D +#define mmSQ_VSTMP_RING_ITEMSIZE 0xA22E +#define mmSQ_PSTMP_RING_ITEMSIZE 0xA22F +#define mmSQ_FBUF_RING_ITEMSIZE 0xA230 +#define mmSQ_REDUC_RING_ITEMSIZE 0xA231 +#define mmSQ_GS_VERT_ITEMSIZE 0xA232 +#define mmSQ_VTX_SEMANTIC_CLEAR 0xA238 + +#define mmSQ_VTX_SEMANTIC_0 0xA0E0 +#define mmSQ_VTX_SEMANTIC_1 0xA0E1 +#define mmSQ_VTX_SEMANTIC_2 0xA0E2 +#define mmSQ_VTX_SEMANTIC_3 0xA0E3 +#define mmSQ_VTX_SEMANTIC_4 0xA0E4 +#define mmSQ_VTX_SEMANTIC_5 0xA0E5 +#define mmSQ_VTX_SEMANTIC_6 0xA0E6 +#define mmSQ_VTX_SEMANTIC_7 0xA0E7 +#define mmSQ_VTX_SEMANTIC_8 0xA0E8 +#define mmSQ_VTX_SEMANTIC_9 0xA0E9 +#define mmSQ_VTX_SEMANTIC_10 0xA0EA +#define mmSQ_VTX_SEMANTIC_11 0xA0EB +#define mmSQ_VTX_SEMANTIC_12 0xA0EC +#define mmSQ_VTX_SEMANTIC_13 0xA0ED +#define mmSQ_VTX_SEMANTIC_14 0xA0EE +#define mmSQ_VTX_SEMANTIC_15 0xA0EF +#define mmSQ_VTX_SEMANTIC_16 0xA0F0 +#define mmSQ_VTX_SEMANTIC_17 0xA0F1 +#define mmSQ_VTX_SEMANTIC_18 0xA0F2 +#define mmSQ_VTX_SEMANTIC_19 0xA0F3 +#define mmSQ_VTX_SEMANTIC_20 0xA0F4 +#define mmSQ_VTX_SEMANTIC_21 0xA0F5 +#define mmSQ_VTX_SEMANTIC_22 0xA0F6 +#define mmSQ_VTX_SEMANTIC_23 0xA0F7 +#define mmSQ_VTX_SEMANTIC_24 0xA0F8 +#define mmSQ_VTX_SEMANTIC_25 0xA0F9 +#define mmSQ_VTX_SEMANTIC_26 0xA0FA +#define mmSQ_VTX_SEMANTIC_27 0xA0FB +#define mmSQ_VTX_SEMANTIC_28 0xA0FC +#define mmSQ_VTX_SEMANTIC_29 0xA0FD +#define mmSQ_VTX_SEMANTIC_30 0xA0FE +#define mmSQ_VTX_SEMANTIC_31 0xA0FF + +#define mmSQ_ALU_CONST_CACHE_PS_0 0xA250 +#define mmSQ_ALU_CONST_CACHE_PS_1 0xA251 +#define mmSQ_ALU_CONST_CACHE_PS_2 0xA252 +#define mmSQ_ALU_CONST_CACHE_PS_3 0xA253 +#define mmSQ_ALU_CONST_CACHE_PS_4 0xA254 +#define mmSQ_ALU_CONST_CACHE_PS_5 0xA255 +#define mmSQ_ALU_CONST_CACHE_PS_6 0xA256 +#define mmSQ_ALU_CONST_CACHE_PS_7 0xA257 +#define mmSQ_ALU_CONST_CACHE_PS_8 0xA258 +#define mmSQ_ALU_CONST_CACHE_PS_9 0xA259 +#define mmSQ_ALU_CONST_CACHE_PS_10 0xA25A +#define mmSQ_ALU_CONST_CACHE_PS_11 0xA25B +#define mmSQ_ALU_CONST_CACHE_PS_12 0xA25C +#define mmSQ_ALU_CONST_CACHE_PS_13 0xA25D +#define mmSQ_ALU_CONST_CACHE_PS_14 0xA25E +#define mmSQ_ALU_CONST_CACHE_PS_15 0xA25F +#define mmSQ_ALU_CONST_CACHE_VS_0 0xA260 +#define mmSQ_ALU_CONST_CACHE_VS_1 0xA261 +#define mmSQ_ALU_CONST_CACHE_VS_2 0xA262 +#define mmSQ_ALU_CONST_CACHE_VS_3 0xA263 +#define mmSQ_ALU_CONST_CACHE_VS_4 0xA264 +#define mmSQ_ALU_CONST_CACHE_VS_5 0xA265 +#define mmSQ_ALU_CONST_CACHE_VS_6 0xA266 +#define mmSQ_ALU_CONST_CACHE_VS_7 0xA267 +#define mmSQ_ALU_CONST_CACHE_VS_8 0xA268 +#define mmSQ_ALU_CONST_CACHE_VS_9 0xA269 +#define mmSQ_ALU_CONST_CACHE_VS_10 0xA26A +#define mmSQ_ALU_CONST_CACHE_VS_11 0xA26B +#define mmSQ_ALU_CONST_CACHE_VS_12 0xA26C +#define mmSQ_ALU_CONST_CACHE_VS_13 0xA26D +#define mmSQ_ALU_CONST_CACHE_VS_14 0xA26E +#define mmSQ_ALU_CONST_CACHE_VS_15 0xA26F +#define mmSQ_ALU_CONST_CACHE_GS_0 0xA270 +#define mmSQ_ALU_CONST_CACHE_GS_1 0xA271 +#define mmSQ_ALU_CONST_CACHE_GS_2 0xA272 +#define mmSQ_ALU_CONST_CACHE_GS_3 0xA273 +#define mmSQ_ALU_CONST_CACHE_GS_4 0xA274 +#define mmSQ_ALU_CONST_CACHE_GS_5 0xA275 +#define mmSQ_ALU_CONST_CACHE_GS_6 0xA276 +#define mmSQ_ALU_CONST_CACHE_GS_7 0xA277 +#define mmSQ_ALU_CONST_CACHE_GS_8 0xA278 +#define mmSQ_ALU_CONST_CACHE_GS_9 0xA279 +#define mmSQ_ALU_CONST_CACHE_GS_10 0xA27A +#define mmSQ_ALU_CONST_CACHE_GS_11 0xA27B +#define mmSQ_ALU_CONST_CACHE_GS_12 0xA27C +#define mmSQ_ALU_CONST_CACHE_GS_13 0xA27D +#define mmSQ_ALU_CONST_CACHE_GS_14 0xA27E +#define mmSQ_ALU_CONST_CACHE_GS_15 0xA27F +#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_0 0xA050 +#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_1 0xA051 +#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_2 0xA052 +#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_3 0xA053 +#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_4 0xA054 +#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_5 0xA055 +#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_6 0xA056 +#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_7 0xA057 +#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_8 0xA058 +#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_9 0xA059 +#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_10 0xA05A +#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_11 0xA05B +#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_12 0xA05C +#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_13 0xA05D +#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_14 0xA05E +#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_15 0xA05F +#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_0 0xA060 +#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_1 0xA061 +#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_2 0xA062 +#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_3 0xA063 +#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_4 0xA064 +#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_5 0xA065 +#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_6 0xA066 +#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_7 0xA067 +#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_8 0xA068 +#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_9 0xA069 +#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_10 0xA06A +#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_11 0xA06B +#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_12 0xA06C +#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_13 0xA06D +#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_14 0xA06E +#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_15 0xA06F +#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_0 0xA070 +#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_1 0xA071 +#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_2 0xA072 +#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_3 0xA073 +#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_4 0xA074 +#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_5 0xA075 +#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_6 0xA076 +#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_7 0xA077 +#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_8 0xA078 +#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_9 0xA079 +#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_10 0xA07A +#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_11 0xA07B +#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_12 0xA07C +#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_13 0xA07D +#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_14 0xA07E +#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_15 0xA07F + +#define mmSPI_VS_OUT_ID_0 0xA185 +#define mmSPI_VS_OUT_ID_1 0xA186 +#define mmSPI_VS_OUT_ID_2 0xA187 +#define mmSPI_VS_OUT_ID_3 0xA188 +#define mmSPI_VS_OUT_ID_4 0xA189 +#define mmSPI_VS_OUT_ID_5 0xA18A +#define mmSPI_VS_OUT_ID_6 0xA18B +#define mmSPI_VS_OUT_ID_7 0xA18C +#define mmSPI_VS_OUT_ID_8 0xA18D +#define mmSPI_VS_OUT_ID_9 0xA18E +#define mmSPI_PS_INPUT_CNTL_0 0xA191 +#define mmSPI_PS_INPUT_CNTL_1 0xA192 +#define mmSPI_PS_INPUT_CNTL_2 0xA193 +#define mmSPI_PS_INPUT_CNTL_3 0xA194 +#define mmSPI_PS_INPUT_CNTL_4 0xA195 +#define mmSPI_PS_INPUT_CNTL_5 0xA196 +#define mmSPI_PS_INPUT_CNTL_6 0xA197 +#define mmSPI_PS_INPUT_CNTL_7 0xA198 +#define mmSPI_PS_INPUT_CNTL_8 0xA199 +#define mmSPI_PS_INPUT_CNTL_9 0xA19A +#define mmSPI_PS_INPUT_CNTL_10 0xA19B +#define mmSPI_PS_INPUT_CNTL_11 0xA19C +#define mmSPI_PS_INPUT_CNTL_12 0xA19D +#define mmSPI_PS_INPUT_CNTL_13 0xA19E +#define mmSPI_PS_INPUT_CNTL_14 0xA19F +#define mmSPI_PS_INPUT_CNTL_15 0xA1A0 +#define mmSPI_PS_INPUT_CNTL_16 0xA1A1 +#define mmSPI_PS_INPUT_CNTL_17 0xA1A2 +#define mmSPI_PS_INPUT_CNTL_18 0xA1A3 +#define mmSPI_PS_INPUT_CNTL_19 0xA1A4 +#define mmSPI_PS_INPUT_CNTL_20 0xA1A5 +#define mmSPI_PS_INPUT_CNTL_21 0xA1A6 +#define mmSPI_PS_INPUT_CNTL_22 0xA1A7 +#define mmSPI_PS_INPUT_CNTL_23 0xA1A8 +#define mmSPI_PS_INPUT_CNTL_24 0xA1A9 +#define mmSPI_PS_INPUT_CNTL_25 0xA1AA +#define mmSPI_PS_INPUT_CNTL_26 0xA1AB +#define mmSPI_PS_INPUT_CNTL_27 0xA1AC +#define mmSPI_PS_INPUT_CNTL_28 0xA1AD +#define mmSPI_PS_INPUT_CNTL_29 0xA1AE +#define mmSPI_PS_INPUT_CNTL_30 0xA1AF +#define mmSPI_PS_INPUT_CNTL_31 0xA1B0 +#define mmSPI_VS_OUT_CONFIG 0xA1B1 +#define mmSPI_THREAD_GROUPING 0xA1B2 +#define mmSPI_PS_IN_CONTROL_0 0xA1B3 +#define mmSPI_PS_IN_CONTROL_1 0xA1B4 +#define mmSPI_INTERP_CONTROL_0 0xA1B5 +#define mmSPI_INPUT_Z 0xA1B6 +#define mmSPI_FOG_CNTL 0xA1B7 +#define mmSPI_FOG_FUNC_SCALE 0xA1B8 +#define mmSPI_FOG_FUNC_BIAS 0xA1B9 + +#define mmSX_MISC 0xA0D4 + +#define mmSX_ALPHA_TEST_CONTROL 0xA104 +#define mmSX_ALPHA_REF 0xA10E + +#define mmDB_DEPTH_BASE 0xA003 +#define mmDB_DEPTH_INFO 0xA004 +#define mmDB_HTILE_DATA_BASE 0xA005 +#define mmDB_DEPTH_SIZE 0xA000 +#define mmDB_DEPTH_VIEW 0xA001 +#define mmDB_RENDER_CONTROL 0xA343 +#define mmDB_RENDER_OVERRIDE 0xA344 +#define mmDB_SHADER_CONTROL 0xA203 +#define mmDB_STENCIL_CLEAR 0xA00A +#define mmDB_DEPTH_CLEAR 0xA00B +#define mmDB_HTILE_SURFACE 0xA349 +#define mmDB_PRELOAD_CONTROL 0xA34C +#define mmDB_PREFETCH_LIMIT 0xA34D +#define mmDB_STENCILREFMASK 0xA10C +#define mmDB_STENCILREFMASK_BF 0xA10D +#define mmDB_SRESULTS_COMPARE_STATE0 0xA34A +#define mmDB_SRESULTS_COMPARE_STATE1 0xA34B +#define mmDB_DEPTH_CONTROL 0xA200 +#define mmDB_ALPHA_TO_MASK 0xA351 + +#define mmCB_BLEND_RED 0xA105 +#define mmCB_BLEND_GREEN 0xA106 +#define mmCB_BLEND_BLUE 0xA107 +#define mmCB_BLEND_ALPHA 0xA108 +#define mmCB_FOG_RED_R6XX 0xA109 +#define mmCB_FOG_GREEN_R6XX 0xA10A +#define mmCB_FOG_BLUE_R6XX 0xA10B +#define mmCB_BLEND_CONTROL 0xA201 +#define mmCB_COLOR_CONTROL 0xA202 +#define mmCB_BLEND0_CONTROL 0xA1E0 +#define mmCB_BLEND1_CONTROL 0xA1E1 +#define mmCB_BLEND2_CONTROL 0xA1E2 +#define mmCB_BLEND3_CONTROL 0xA1E3 +#define mmCB_BLEND4_CONTROL 0xA1E4 +#define mmCB_BLEND5_CONTROL 0xA1E5 +#define mmCB_BLEND6_CONTROL 0xA1E6 +#define mmCB_BLEND7_CONTROL 0xA1E7 +#define mmCB_CLRCMP_CONTROL 0xA30C +#define mmCB_CLRCMP_SRC 0xA30D +#define mmCB_CLRCMP_DST 0xA30E +#define mmCB_CLRCMP_MSK 0xA30F +#define mmCB_COLOR0_BASE 0xA010 +#define mmCB_COLOR1_BASE 0xA011 +#define mmCB_COLOR2_BASE 0xA012 +#define mmCB_COLOR3_BASE 0xA013 +#define mmCB_COLOR4_BASE 0xA014 +#define mmCB_COLOR5_BASE 0xA015 +#define mmCB_COLOR6_BASE 0xA016 +#define mmCB_COLOR7_BASE 0xA017 +#define mmCB_COLOR0_SIZE 0xA018 +#define mmCB_COLOR1_SIZE 0xA019 +#define mmCB_COLOR2_SIZE 0xA01A +#define mmCB_COLOR3_SIZE 0xA01B +#define mmCB_COLOR4_SIZE 0xA01C +#define mmCB_COLOR5_SIZE 0xA01D +#define mmCB_COLOR6_SIZE 0xA01E +#define mmCB_COLOR7_SIZE 0xA01F +#define mmCB_COLOR0_VIEW 0xA020 +#define mmCB_COLOR1_VIEW 0xA021 +#define mmCB_COLOR2_VIEW 0xA022 +#define mmCB_COLOR3_VIEW 0xA023 +#define mmCB_COLOR4_VIEW 0xA024 +#define mmCB_COLOR5_VIEW 0xA025 +#define mmCB_COLOR6_VIEW 0xA026 +#define mmCB_COLOR7_VIEW 0xA027 +#define mmCB_COLOR0_INFO 0xA028 +#define mmCB_COLOR1_INFO 0xA029 +#define mmCB_COLOR2_INFO 0xA02A +#define mmCB_COLOR3_INFO 0xA02B +#define mmCB_COLOR4_INFO 0xA02C +#define mmCB_COLOR5_INFO 0xA02D +#define mmCB_COLOR6_INFO 0xA02E +#define mmCB_COLOR7_INFO 0xA02F +#define mmCB_COLOR0_TILE 0xA030 +#define mmCB_COLOR1_TILE 0xA031 +#define mmCB_COLOR2_TILE 0xA032 +#define mmCB_COLOR3_TILE 0xA033 +#define mmCB_COLOR4_TILE 0xA034 +#define mmCB_COLOR5_TILE 0xA035 +#define mmCB_COLOR6_TILE 0xA036 +#define mmCB_COLOR7_TILE 0xA037 +#define mmCB_COLOR0_FRAG 0xA038 +#define mmCB_COLOR1_FRAG 0xA039 +#define mmCB_COLOR2_FRAG 0xA03A +#define mmCB_COLOR3_FRAG 0xA03B +#define mmCB_COLOR4_FRAG 0xA03C +#define mmCB_COLOR5_FRAG 0xA03D +#define mmCB_COLOR6_FRAG 0xA03E +#define mmCB_COLOR7_FRAG 0xA03F +#define mmCB_COLOR0_MASK 0xA040 +#define mmCB_COLOR1_MASK 0xA041 +#define mmCB_COLOR2_MASK 0xA042 +#define mmCB_COLOR3_MASK 0xA043 +#define mmCB_COLOR4_MASK 0xA044 +#define mmCB_COLOR5_MASK 0xA045 +#define mmCB_COLOR6_MASK 0xA046 +#define mmCB_COLOR7_MASK 0xA047 +#define mmCB_CLEAR_RED_R6XX 0xA048 +#define mmCB_CLEAR_GREEN_R6XX 0xA049 +#define mmCB_CLEAR_BLUE_R6XX 0xA04A +#define mmCB_CLEAR_ALPHA_R6XX 0xA04B +#define mmCB_TARGET_MASK 0xA08E +#define mmCB_SHADER_MASK 0xA08F +#define mmCB_SHADER_CONTROL 0xA1E8 + +#define mmSQ_VTX_BASE_VTX_LOC 0xF3FC +#define mmSQ_VTX_START_INST_LOC 0xF3FD + +#endif /* _R700_CHIPOFFSET_H_ */ + diff --git a/src/mesa/drivers/dri/r600/r700_clear.c b/src/mesa/drivers/dri/r600/r700_clear.c new file mode 100644 index 0000000000..93f873beef --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_clear.c @@ -0,0 +1,262 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com> + */ + +#include "glheader.h" +#include "context.h" +#include "macros.h" +#include "imports.h" +#include "mtypes.h" +#include "enums.h" + +#include "r700_interface.h" +#include "r700_chip.h" +#include "r700_shaderinst.h" +#include "r700_emit.h" + +extern void r700InitState (GLcontext * ctx); +extern void r700SetupVTXConstans(GLcontext * ctx, + unsigned int nStreamID, + unsigned int aos_offset, + unsigned int size, /* number of elements in vector */ + unsigned int stride, + unsigned int count); +extern GLboolean r700SendContextStates(context_t *context); +extern GLboolean r700SyncSurf(context_t *context); + +static GLboolean r700ClearFast(context_t *context, GLbitfield mask) +{ + /* TODO, fast clear need implementation */ + return GL_FALSE; +} + +static GLboolean r700ClearWithDraw(context_t *context, GLbitfield mask) +{ + GLcontext *ctx = context->ctx; + + R700_CHIP_CONTEXT r700Saved; + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + struct r600_dma_region aos_vs; + struct r600_dma_region aos_fs; + struct r600_dma_region aos_vb; + aos_vs.buf = NULL; + aos_fs.buf = NULL; + aos_vb.buf = NULL; + + + unsigned int ui; + GLfloat fTemp; + GLfloat fVb[] = { 1.0f, 1.0f, 1.0f, 1.0f, + -1.0f, -1.0f, 1.0f, 1.0f, + 1.0f, -1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, + -1.0f, 1.0f, 1.0f, 1.0f, + -1.0f, -1.0f, 1.0f, 1.0f}; /* TODO : Z set here */ + unsigned int uVs[] = { 0xC, 0x81000000, 0x4, 0xA01C0000, + 0xC001203C, 0x94000688, 0xC001C000, 0x94200688, + 0x10000001, 0x540C90, 0x10000401, 0x20540C90, + 0x10000801, 0x40540C90, 0x90000C01, 0x60400C90, + 0x10000100, 0x600C90, 0x10000500, 0x20600C90, + 0x10000900, 0x40600C90, 0x90000D00, 0x60680C90, + 0x7C000000, 0x2D1001, 0x80000, 0xBEADEAF }; + unsigned int uFs[] = { 0x2, 0xA00C0000, 0xC0008000, 0x94200688, + 0x10000000, 0x340C90, 0x10000400, 0x20340C90, + 0x10000800, 0x40340C90, 0x90000C00, 0x60200C90}; + + if (context->screen->chip.type <= CHIP_TYPE_RV670) + { + uVs[9] = 0x541910; + uVs[11] = 0x20541910; + uVs[13] = 0x40541910; + uVs[15] = 0x60401910; + uVs[17] = 0x601910; + uVs[19] = 0x20601910; + uVs[21] = 0x40601910; + uVs[23] = 0x60681910; + uFs[5] = 0x341910; + uFs[7] = 0x20341910; + uFs[9] = 0x40341910; + uFs[11] = 0x60201910; + } + + r700SyncSurf(context); + + /* Save current chip object. */ + memcpy(&r700Saved, r700, sizeof(R700_CHIP_CONTEXT)); + + r700InitState(ctx); + + r700->CB_COLOR0_BASE.u32All = context->target.rt.gpu >> 8; + + /* Turn off perspective divid. */ + SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit); + SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit); + SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit); + + if( (mask & BUFFER_BIT_FRONT_LEFT) || (mask & BUFFER_BIT_BACK_LEFT) ) + { /* Enable render target output. */ + SETfield(r700->CB_TARGET_MASK.u32All, 0xF, TARGET0_ENABLE_shift, TARGET0_ENABLE_mask); + } + else + { /* Disable render target output. */ + CLEARfield(r700->CB_TARGET_MASK.u32All, TARGET0_ENABLE_mask); /* TODO : OGL need 4 rt. */ + } + if (mask & BUFFER_BIT_DEPTH) + { + /* Set correct Z to clear. */ + SETbit(r700->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit); + SETbit(r700->DB_DEPTH_CONTROL.u32All, Z_ENABLE_bit); + SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS, ZFUNC_shift, ZFUNC_mask); + fTemp = ctx->Depth.Clear; + for(ui=2; ui<24;) + { + fVb[ui] = fTemp; + ui += 4; + } + } + else + { + /* Disable Z write. */ + CLEARbit(r700->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit); + CLEARbit(r700->DB_DEPTH_CONTROL.u32All, Z_ENABLE_bit); + } + + /* Setup vb */ + R700_CMDBUF_CHECK_SPACE(6); + R700EP3 (context, IT_SET_CTL_CONST, 1); + R700E32 (context, mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); + R700E32 (context, 0); + R700EP3 (context, IT_SET_CTL_CONST, 1); + R700E32 (context, mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX); + R700E32 (context, 0); + (context->chipobj.EmitVec)(ctx, &aos_vb, (GLvoid *)fVb, 4, 16, 6); + r700SetupVTXConstans(ctx, VERT_ATTRIB_POS, (unsigned int)aos_vb.aos_offset, 4, 16, 6); + + /* Setup shaders, copied from dump */ + r700->SQ_PGM_RESOURCES_PS.u32All = 0; + r700->SQ_PGM_RESOURCES_VS.u32All = 0; + SETbit(r700->SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit); + SETbit(r700->SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit); + /* vs */ + (context->chipobj.EmitShader)(ctx, &aos_vs, (GLvoid *)(&uVs[0]), 28); + r700->SQ_PGM_START_VS.u32All = (aos_vs.aos_offset >> 8) & 0x00FFFFFF; + r700->SQ_PGM_RESOURCES_VS.u32All = 0x00800004; + /* vs const */ /* TODO : Set color here */ + R700_CMDBUF_CHECK_SPACE(4 + 2); + R700EP3 (context, IT_SET_ALU_CONST, 4); + R700E32 (context, SQ_ALU_CONSTANT_VS_OFFSET * 4); + R700E32 (context, *((unsigned int*)&(ctx->Color.ClearColor[0]))); + R700E32 (context, *((unsigned int*)&(ctx->Color.ClearColor[1]))); + R700E32 (context, *((unsigned int*)&(ctx->Color.ClearColor[2]))); + R700E32 (context, *((unsigned int*)&(ctx->Color.ClearColor[3]))); + + r700->SPI_VS_OUT_CONFIG.u32All = 0x00000000; + r700->SPI_PS_IN_CONTROL_0.u32All = 0x20000001; + /* ps */ + (context->chipobj.EmitShader)(ctx, &aos_fs, (GLvoid *)(&uFs[0]), 12); + r700->SQ_PGM_START_PS.u32All = (aos_fs.aos_offset >> 8) & 0x00FFFFFF; + r700->SQ_PGM_RESOURCES_PS.u32All = 0x00800002; + r700->SQ_PGM_EXPORTS_PS.u32All = 0x00000002; + r700->DB_SHADER_CONTROL.u32All = 0x00000200; + + r700->CB_SHADER_CONTROL.u32All = 0x00000001; + + /* set a valid base address to make the command checker happy */ + r700->SQ_PGM_START_FS.u32All = (aos_fs.aos_offset >> 8) & 0x00FFFFFF; + r700->SQ_PGM_START_ES.u32All = (aos_fs.aos_offset >> 8) & 0x00FFFFFF; + r700->SQ_PGM_START_GS.u32All = (aos_fs.aos_offset >> 8) & 0x00FFFFFF; + + /* Now, send the states */ + r700SendContextStates(context); + + /* Draw */ + GLuint numEntires, j; + GLuint numIndices = 6; + unsigned int VGT_DRAW_INITIATOR = 0; + unsigned int VGT_INDEX_TYPE = 0; + unsigned int VGT_PRIMITIVE_TYPE = 0; + unsigned int VGT_NUM_INDICES = 0; + + numEntires = 2 /* VGT_INDEX_TYPE */ + + 3 /* VGT_PRIMITIVE_TYPE */ + + numIndices + 3; /* DRAW_INDEX_IMMD */ + + R700_CMDBUF_CHECK_SPACE(numEntires); + + SETfield(VGT_INDEX_TYPE, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + + R700EP3(context, IT_INDEX_TYPE, 0); + R700E32(context, VGT_INDEX_TYPE); + + VGT_NUM_INDICES = numIndices; + + SETfield(VGT_PRIMITIVE_TYPE, DI_PT_TRILIST, VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); + R700EP3(context, IT_SET_CONFIG_REG, 1); + R700E32(context, mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); + R700E32(context, VGT_PRIMITIVE_TYPE); + + SETfield(VGT_DRAW_INITIATOR, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + SETfield(VGT_DRAW_INITIATOR, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); + + R700EP3(context, IT_DRAW_INDEX_IMMD, (numIndices + 1)); + R700E32(context, VGT_NUM_INDICES); + R700E32(context, VGT_DRAW_INITIATOR); + + for (j=0; j<numIndices; j++) + { + R700E32(context, j); + } + + (context->chipobj.FlushCmdBuffer)(context); + + /* TODO : keep these in context, don't load and release every time. */ + (context->chipobj.FreeDmaRegion)(context, &aos_vs); + (context->chipobj.FreeDmaRegion)(context, &aos_fs); + (context->chipobj.FreeDmaRegion)(context, &aos_vb); + + /* Restore chip object. */ + memcpy(r700, &r700Saved, sizeof(R700_CHIP_CONTEXT)); + + return GL_TRUE; +} + +void r700Clear(GLcontext * ctx, GLbitfield mask) +{ + context_t *context = R700_CONTEXT(ctx); + + if( GL_TRUE == r700ClearFast(context, mask) ) + { + return; + } + + r700ClearWithDraw(context, mask); +} + +void r700InitClearFuncs(struct dd_function_table *functions) +{ + functions->Clear = r700Clear; +} + diff --git a/src/mesa/drivers/dri/r600/r700_clear.h b/src/mesa/drivers/dri/r600/r700_clear.h new file mode 100644 index 0000000000..a0b7a8e0bd --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_clear.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com> + */ + +#ifndef __r700_CLEAR_H__ +#define __r700_CLEAR_H__ + +extern void r700InitClearFuncs(struct dd_function_table *functions); + +#endif /* __r700_CLEAR_H__ */ diff --git a/src/mesa/drivers/dri/r600/r700_debug.c b/src/mesa/drivers/dri/r600/r700_debug.c new file mode 100644 index 0000000000..d8e4731c96 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_debug.c @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com> + */ + +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> + +#include "main/glheader.h" + +#include "r700_debug.h" +#include "r600_common.h" + +void NormalizeLogErrorCode(int nError) +{ + //TODO +} + +void r700_error(int nLocalError, char* fmt, ...) +{ + va_list args; + + NormalizeLogErrorCode(nLocalError); + + va_start(args, fmt); + fprintf(stderr, fmt, args); + va_end(args); +} + +void DumpHwBinary(int type, void *addr, int size) +{ + int i; + unsigned int *pHw = (unsigned int *)addr; + switch (type) + { + case DUMP_PIXEL_SHADER: + DEBUGF("Pixel Shader\n"); + break; + case DUMP_VERTEX_SHADER: + DEBUGF("Vertex Shader\n"); + break; + case DUMP_FETCH_SHADER: + DEBUGF("Fetch Shader\n"); + break; + } + + for (i = 0; i < size; i++) + { + DEBUGP("0x%08x,\t", *pHw); + if (i%4 == 3) + DEBUGP("\n", *pHw); + pHw++; + + } +} + diff --git a/src/mesa/drivers/dri/r600/r700_debug.h b/src/mesa/drivers/dri/r600/r700_debug.h new file mode 100644 index 0000000000..689cfbe30c --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_debug.h @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com> + */ + +#ifndef _R700_DEBUG_H_ +#define _R700_DEBUG_H_ + +enum R700_ERROR +{ + ERROR_ASM_VTX_CLAUSE = 0x1000, + ERROR_ASM_UNKOWNCLAUSE = 0x1001, + ERROR_ASM_ALLOCEXPORTCF = 0x1002, + ERROR_ASM_ALLOCVTXCF = 0x1003, + ERROR_ASM_ALLOCTEXCF = 0x1004, + ERROR_ASM_ALLOCALUCF = 0x1005, + ERROR_ASM_UNKNOWNILINST = 0x1006, + ERROR_ASM_SRCARGUMENT = 0x1007, + ERROR_ASM_DSTARGUMENT = 0x1008, + ERROR_ASM_TEXINSTRUCTION = 0x1009, + ERROR_ASM_ALUINSTRUCTION = 0x100A, + ERROR_ASM_INSTDSTTRACK = 0x100B, + ERROR_ASM_TEXDSTBADTYPE = 0x100C, + ERROR_ASM_ALUSRCBADTYPE = 0x100D, + ERROR_ASM_ALUSRCSELECT = 0x100E, + ERROR_ASM_ALUSRCNUMBER = 0x100F, + ERROR_ASM_ALUDSTBADTYPE = 0x1010, + ERROR_ASM_CONSTCHANNEL = 0x1011, + ERROR_ASM_BADSCALARBZ = 0x1012, + ERROR_ASM_BADGPRRESERVE = 0x1013, + ERROR_ASM_BADVECTORBZ = 0x1014, + ERROR_ASM_BADTEXINST = 0x1015, + ERROR_ASM_BADTEXSRC = 0x1016, + ERROR_ASM_BADEXPORTTYPE = 0x1017, + + + TODO_ASM_CONSTTEXADDR = 0x8000, + TODO_ASM_NEEDIMPINST = 0x8001, + TODO_ASM_TXB = 0x8002, + TODO_ASM_TXP = 0x8003 +}; + +enum R700_DUMP_TYPE +{ + DUMP_VERTEX_SHADER = 0x1, + DUMP_PIXEL_SHADER = 0x2, + DUMP_FETCH_SHADER = 0x4, +}; + +void NormalizeLogErrorCode(int nError); +/*NormalizeLogErrorCode(nLocalError); */ +void r700_error(int nLocalError, char *fmt, ...); +extern void DumpHwBinary(int, void *, int); + +#ifdef STANDALONE_COMPILER +#ifdef __cplusplus +extern "C" +{ +#endif //__cplusplus + +void LogString(char* szStr); + +#ifdef __cplusplus +} +#endif //__cplusplus +#endif /*STANDALONE_COMPILER*/ + +#endif /*_R700_DEBUG_H_*/ diff --git a/src/mesa/drivers/dri/r600/r700_emit.h b/src/mesa/drivers/dri/r600/r700_emit.h new file mode 100644 index 0000000000..5694e6ec1b --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_emit.h @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com> + */ + +#ifndef __R700_EMIT_H__ +#define __R700_EMIT_H__ + +#include "r700_chip.h" + +/* R6xx knows 2 ways to set registers: + * 1. Packet0, + * 2. A set of Packet3 commands, + * In current file, register setting use Packet3 commands. + */ + +/* R700_PM4_PACKET0 uses register index rather than offset. */ +#define R700_PM4_PACKET0(reg, n) (R700_PM4_PACKET0_NOP | ((n)<<16) | (reg)) +#define R700_PM4_PACKET3(cmd, n) (R700_PM4_PACKET3_NOP | ((n)<<16) | ((cmd)<<8)) + +#define R700_CMDBUF_BEGIN \ + uint32_t *dest = context->cmdbuf.cmd_buf + context->cmdbuf.count_used; + +#define R700_CMDBUF_CHECK_SPACE(n) \ + do \ + { \ + if (context->cmdbuf.count_used + (n) > context->cmdbuf.size) \ + { \ + fprintf (stderr, "Insufficient cmdbuffer size-aborting\n"); \ + _mesa_exit(-1); \ + } \ + } while (0) + +#define R700_CMDBUF_END(n) \ + do \ + { \ + context->cmdbuf.count_used += (n); \ + } while (0) + +/* Emit a single uint32_t value */ +#define R700E32(context, dword) \ + do \ + { \ + R700_CMDBUF_BEGIN; \ + R700_CMDBUF_CHECK_SPACE(1); \ + *dest = (dword); \ + R700_CMDBUF_END(1); \ + } while(0) + +/* Emit packet 3 header */ +#define R700EP3(context, cmd, num) \ + do \ + { \ + R700E32 (context, R700_PM4_PACKET3 ((cmd), (num))); \ + }while(0) + +/* Emit packet 0 header */ +#define R700EP0(context, cmd, num) \ + do \ + { \ + R700E32 (context, R700_PM4_PACKET0 ((cmd), (num))); \ + }while(0) + +#endif /* __R700_EMIT_H__ */ diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c new file mode 100644 index 0000000000..7123e5f94c --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -0,0 +1,355 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com> + */ + +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> + +#include "main/imports.h" +#include "shader/prog_parameter.h" +#include "shader/prog_statevars.h" + +#include "r700_interface.h" + +#include "r700_chip.h" +#include "r700_fragprog.h" +#include "r700_emit.h" +#include "r700_debug.h" + +//TODO : Validate FP input with VP output. +void Map_Fragment_Program(r700_AssemblerBase *pAsm, + struct gl_fragment_program *mesa_fp) +{ + unsigned int unBit; + unsigned int i; + GLuint ui; + + pAsm->number_used_registers = 0; + +//Input mapping : mesa_fp->Base.InputsRead set the flag, set in + //The flags parsed in parse_attrib_binding. FRAG_ATTRIB_COLx, FRAG_ATTRIB_TEXx, ... + //MUST match order in Map_Vertex_Output + unBit = 1 << FRAG_ATTRIB_COL0; + if(mesa_fp->Base.InputsRead & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0] = pAsm->number_used_registers++; + } + + unBit = 1 << FRAG_ATTRIB_COL1; + if(mesa_fp->Base.InputsRead & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++; + } + + for(i=0; i<8; i++) + { + unBit = 1 << (FRAG_ATTRIB_TEX0 + i); + if(mesa_fp->Base.InputsRead & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++; + } + } + +/* Map temporary registers (GPRs) */ + pAsm->starting_temp_register_number = pAsm->number_used_registers; + + if(mesa_fp->Base.NumNativeTemporaries >= mesa_fp->Base.NumTemporaries) + { + pAsm->number_used_registers += mesa_fp->Base.NumNativeTemporaries; + } + else + { + pAsm->number_used_registers += mesa_fp->Base.NumTemporaries; + } + +/* Output mapping */ + pAsm->number_of_exports = 0; + pAsm->number_of_colorandz_exports = 0; /* don't include stencil and mask out. */ + pAsm->starting_export_register_number = pAsm->number_used_registers; + unBit = 1 << FRAG_RESULT_COLR; + if(mesa_fp->Base.OutputsWritten & unBit) + { + pAsm->uiFP_OutputMap[FRAG_RESULT_COLR] = pAsm->number_used_registers++; + pAsm->number_of_exports++; + pAsm->number_of_colorandz_exports++; + } + unBit = 1 << FRAG_RESULT_DEPR; + if(mesa_fp->Base.OutputsWritten & unBit) + { + pAsm->depth_export_register_number = pAsm->number_used_registers; + pAsm->uiFP_OutputMap[FRAG_RESULT_DEPR] = pAsm->number_used_registers++; + pAsm->number_of_exports++; + pAsm->number_of_colorandz_exports++; + } + + pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports); + for(ui=0; ui<pAsm->number_of_exports; ui++) + { + pAsm->pucOutMask[ui] = 0x0; + } + + pAsm->uFirstHelpReg = pAsm->number_used_registers; +} + +GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, + struct gl_fragment_program *mesa_fp) +{ + GLuint i, j; + GLint * puiTEMPwrites; + struct prog_instruction * pILInst; + InstDeps *pInstDeps; + struct prog_instruction * texcoord_DepInst; + GLint nDepInstID; + + puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries); + for(i=0; i<mesa_fp->Base.NumTemporaries; i++) + { + puiTEMPwrites[i] = -1; + } + + pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_fp->Base.NumInstructions); + + for(i=0; i<mesa_fp->Base.NumInstructions; i++) + { + pInstDeps[i].nDstDep = -1; + pILInst = &(mesa_fp->Base.Instructions[i]); + + //Dst + if(pILInst->DstReg.File == PROGRAM_TEMPORARY) + { + //Set lastwrite for the temp + puiTEMPwrites[pILInst->DstReg.Index] = i; + } + + //Src + for(j=0; j<3; j++) + { + if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY) + { + //Set dep. + pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index]; + } + else + { + pInstDeps[i].nSrcDeps[j] = -1; + } + } + } + + fp->r700AsmCode.pInstDeps = pInstDeps; + + FREE(puiTEMPwrites); + + //Find dep for tex inst + for(i=0; i<mesa_fp->Base.NumInstructions; i++) + { + pILInst = &(mesa_fp->Base.Instructions[i]); + + if(GL_TRUE == IsTex(pILInst->Opcode)) + { //src0 is the tex coord register, src1 is texunit, src2 is textype + nDepInstID = pInstDeps[i].nSrcDeps[0]; + if(nDepInstID >= 0) + { + texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]); + if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) ) + { + pInstDeps[nDepInstID].nDstDep = i; + pInstDeps[i].nDstDep = i; + } + else if(GL_TRUE == IsTex(texcoord_DepInst->Opcode) ) + { + pInstDeps[i].nDstDep = i; + } + else + { //... other deps? + } + } + } + } + + return GL_TRUE; +} + +GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, + struct gl_fragment_program *mesa_fp) +{ + GLuint number_of_colors_exported; + GLboolean z_enabled = GL_FALSE; + GLuint unBit; + + //Init_Program + Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) ); + Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp); + + if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) ) + { + return GL_FALSE; + } + + if( GL_FALSE == AssembleInstr(mesa_fp->Base.NumInstructions, + &(mesa_fp->Base.Instructions[0]), + &(fp->r700AsmCode)) ) + { + return GL_FALSE; + } + + if(GL_FALSE == Process_Fragment_Exports(&(fp->r700AsmCode), mesa_fp->Base.OutputsWritten) ) + { + return GL_FALSE; + } + + fp->r700Shader.nRegs = (fp->r700AsmCode.number_used_registers == 0) ? 0 + : (fp->r700AsmCode.number_used_registers - 1); + + fp->r700Shader.nParamExports = fp->r700AsmCode.number_of_exports; + + number_of_colors_exported = fp->r700AsmCode.number_of_colorandz_exports; + + unBit = 1 << FRAG_RESULT_DEPR; + if(mesa_fp->Base.OutputsWritten & unBit) + { + z_enabled = GL_TRUE; + number_of_colors_exported--; + } + + fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled; + + fp->translated = GL_TRUE; + + return GL_TRUE; +} + +GLboolean r700SetupFragmentProgram(GLcontext * ctx) +{ + context_t *context = R700_CONTEXT(ctx); + + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + + struct r700_fragment_program *fp = (struct r700_fragment_program *) + (ctx->FragmentProgram._Current); + + struct gl_program_parameter_list *paramList; + unsigned int unNumParamData; + unsigned int ui; + + unsigned int unNumOfReg; + + if(GL_FALSE == fp->loaded) + { + if(fp->r700Shader.bNeedsAssembly == GL_TRUE) + { + Assemble( &(fp->r700Shader) ); + } + + /* Load fp to gpu */ + (context->chipobj.EmitShader)(ctx, + &(fp->shadercode), + (GLvoid *)(fp->r700Shader.pProgram), + fp->r700Shader.uShaderBinaryDWORDSize); + + fp->loaded = GL_TRUE; + } + + DumpHwBinary(DUMP_PIXEL_SHADER, (GLvoid *)(fp->r700Shader.pProgram), + fp->r700Shader.uShaderBinaryDWORDSize); + + /* TODO : enable this after MemUse fixed *= + (context->chipobj.MemUse)(context, fp->shadercode.buf->id); + */ + + r700->SQ_PGM_START_PS.u32All = (fp->shadercode.aos_offset >> 8) & 0x00FFFFFF; + + unNumOfReg = fp->r700Shader.nRegs + 1; + + ui = (r700->SPI_PS_IN_CONTROL_0.u32All & NUM_INTERP_mask) / (1 << NUM_INTERP_shift); + + ui = ui ? unNumOfReg : ui; + + SETfield(r700->SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask); + + CLEARbit(r700->SQ_PGM_RESOURCES_PS.u32All, UNCACHED_FIRST_INST_bit); + + if(fp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */ + { + SETfield(r700->SQ_PGM_RESOURCES_PS.u32All, fp->r700Shader.uStackSize, + STACK_SIZE_shift, STACK_SIZE_mask); + } + + SETfield(r700->SQ_PGM_EXPORTS_PS.u32All, fp->r700Shader.exportMode, + EXPORT_MODE_shift, EXPORT_MODE_mask); + + if(fp->r700Shader.killIsUsed) + { + SETbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit); + } + else + { + CLEARbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit); + } + + if(fp->r700Shader.depthIsExported) + { + SETbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit); + } + else + { + CLEARbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit); + } + + /* sent out shader constants. */ + + paramList = fp->mesa_program.Base.Parameters; + + if(NULL != paramList) + { + _mesa_load_state_parameters(ctx, paramList); + + unNumParamData = paramList->NumParameters * 4; + R700_CMDBUF_CHECK_SPACE(2 + unNumParamData); + + R700EP3(context, IT_SET_ALU_CONST, unNumParamData); + /* assembler map const from very beginning. */ + R700E32(context, SQ_ALU_CONSTANT_PS_OFFSET * 4); + + unNumParamData = paramList->NumParameters; + + for(ui=0; ui<unNumParamData; ui++) + { + R700E32(context, *((unsigned int*)&(paramList->ParameterValues[ui][0]))); + R700E32(context, *((unsigned int*)&(paramList->ParameterValues[ui][1]))); + R700E32(context, *((unsigned int*)&(paramList->ParameterValues[ui][2]))); + R700E32(context, *((unsigned int*)&(paramList->ParameterValues[ui][3]))); + } + } + + return GL_TRUE; +} + + + diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.h b/src/mesa/drivers/dri/r600/r700_fragprog.h new file mode 100644 index 0000000000..54fe1f96fc --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_fragprog.h @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + */ + +#ifndef _R700_FRAGPROG_H_ +#define _R700_FRAGPROG_H_ + +#include "r700_interface.h" +#include "r700_assembler.h" + +struct r700_fragment_program +{ + struct gl_fragment_program mesa_program; + + r700_AssemblerBase r700AsmCode; + R700_Shader r700Shader; + + GLboolean translated; + GLboolean loaded; + GLboolean error; + + struct r600_dma_region shadercode; + + GLboolean WritesDepth; + GLuint optimization; +}; + +/* Internal */ +void Map_Fragment_Program(r700_AssemblerBase *pAsm, + struct gl_fragment_program *mesa_fp); +GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, + struct gl_fragment_program *mesa_fp); + +/* Interface */ +extern GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, + struct gl_fragment_program *mesa_vp); +extern GLboolean r700SetupFragmentProgram(GLcontext * ctx); + +#endif /*_R700_FRAGPROG_H_*/ diff --git a/src/mesa/drivers/dri/r600/r700_gem.c b/src/mesa/drivers/dri/r600/r700_gem.c new file mode 100644 index 0000000000..5c29ed6a96 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_gem.c @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com> + */ + diff --git a/src/mesa/drivers/dri/r600/r700_interface.h b/src/mesa/drivers/dri/r600/r700_interface.h new file mode 100644 index 0000000000..524c80b4b4 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_interface.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + */ + +#ifndef _R700_INTERFACE_H_ +#define _R700_INTERFACE_H_ + +#include "r600_common.h" +#include "r600_context.h" + +/* the eag... */ +extern GLboolean r700InitChipObject(context_t *context); + +#endif /* _R700_INTERFACE_H_ */ + diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.c b/src/mesa/drivers/dri/r600/r700_oglprog.c new file mode 100644 index 0000000000..d22398bf6d --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_oglprog.c @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + */ + +#include "main/glheader.h" +#include "main/imports.h" +#include <string.h> + +#include "shader/program.h" +#include "tnl/tnl.h" + +#include "r700_interface.h" + +#include "r700_chip.h" +#include "r700_oglprog.h" +#include "r700_fragprog.h" +#include "r700_vertprog.h" + +static struct gl_program *r700NewProgram(GLcontext * ctx, + GLenum target, + GLuint id) +{ + struct gl_program *pProgram = NULL; + + struct r700_vertex_program *vp; + struct r700_fragment_program *fp; + + switch (target) + { + case GL_VERTEX_STATE_PROGRAM_NV: + case GL_VERTEX_PROGRAM_ARB: + vp = CALLOC_STRUCT(r700_vertex_program); + pProgram = _mesa_init_vertex_program(ctx, + &vp->mesa_program, + target, + id); + vp->translated = GL_FALSE; + vp->loaded = GL_FALSE; + vp->shadercode.buf = NULL; + break; + case GL_FRAGMENT_PROGRAM_NV: + case GL_FRAGMENT_PROGRAM_ARB: + fp = CALLOC_STRUCT(r700_fragment_program); + pProgram = _mesa_init_fragment_program(ctx, + &fp->mesa_program, + target, + id); + fp->translated = GL_FALSE; + fp->loaded = GL_FALSE; + fp->shadercode.buf = NULL; + break; + default: + _mesa_problem(ctx, "Bad target in r700NewProgram"); + } + + return pProgram; +} + +static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) +{ + + struct r700_vertex_program * vp; + struct r700_fragment_program * fp; + context_t *context = R700_CONTEXT(ctx); + + switch (prog->Target) + { + case GL_VERTEX_STATE_PROGRAM_NV: + case GL_VERTEX_PROGRAM_ARB: + vp = (struct r700_vertex_program*)prog; + /* Release DMA region */ + (context->chipobj.FreeDmaRegion)(context, &(vp->shadercode)); + /* Clean up */ + Clean_Up_Assembler(&(vp->r700AsmCode)); + Clean_Up_Shader(&(vp->r700Shader)); + break; + case GL_FRAGMENT_PROGRAM_NV: + case GL_FRAGMENT_PROGRAM_ARB: + fp = (struct r700_fragment_program*)prog; + /* Release DMA region */ + (context->chipobj.FreeDmaRegion)(context, &(fp->shadercode)); + /* Clean up */ + Clean_Up_Assembler(&(fp->r700AsmCode)); + Clean_Up_Shader(&(fp->r700Shader)); + break; + default: + _mesa_problem(ctx, "Bad target in r700NewProgram"); + } + + _mesa_delete_program(ctx, prog); +} + +static void +r700ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) +{ + +} + +static GLboolean r700IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) +{ + + return GL_TRUE; +} + +void r700InitShaderFuncs(struct dd_function_table *functions) +{ + functions->NewProgram = r700NewProgram; + functions->DeleteProgram = r700DeleteProgram; + functions->ProgramStringNotify = r700ProgramStringNotify; + functions->IsProgramNative = r700IsProgramNative; +} diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.h b/src/mesa/drivers/dri/r600/r700_oglprog.h new file mode 100644 index 0000000000..78867f2600 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_oglprog.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com> + */ + +#ifndef _R700_OGLPROG_H_ +#define _R700_OGLPROG_H_ +#include "r600_common.h" + +extern void r700InitShaderFuncs(struct dd_function_table *functions); + +#endif /*_R700_OGLPROG_H_*/ diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c new file mode 100644 index 0000000000..c71475d1c7 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -0,0 +1,452 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com> + */ + +#include "glheader.h" +#include "state.h" +#include "imports.h" +#include "enums.h" +#include "macros.h" +#include "context.h" +#include "dd.h" +#include "simple_list.h" +#include "api_arrayelt.h" +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "vbo/vbo.h" + +#include "tnl/tnl.h" +#include "tnl/t_vp_build.h" +#include "tnl/t_context.h" +#include "tnl/t_vertex.h" +#include "tnl/t_pipeline.h" + +#include "r700_interface.h" + +#include "r700_chip.h" +#include "r700_vertprog.h" +#include "r700_fragprog.h" +#include "r700_state.h" +#include "r700_tex.h" +#include "r700_emit.h" + +void r700WaitForIdle(context_t *context) +{ + R700EP3 (context, IT_SET_CONFIG_REG, 1); + R700E32 (context, mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX); + R700E32 (context, 1 << 15); +} + +void r700WaitForIdleClean(context_t *context) +{ + R700EP3 (context, IT_EVENT_WRITE, 0); + R700E32 (context, 0x16); + + R700EP3 (context, IT_SET_CONFIG_REG, 1); + R700E32 (context, mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX); + R700E32 (context, 1 << 17); +} + +static void r700Start3D(context_t *context) +{ + if (context->screen->chip.type <= CHIP_TYPE_RV670) + { + R700EP3 (context, IT_START_3D_CMDBUF, 1); + R700E32 (context, 0); + } + + R700EP3 (context, IT_CONTEXT_CONTROL, 1); + R700E32 (context, 0x80000000); + R700E32 (context, 0x80000000); + r700WaitForIdleClean(context); +} + + +static int r700SetupStreams(GLcontext * ctx) +{ + context_t *context = R700_CONTEXT(ctx); + + struct r700_vertex_program *vpc + = (struct r700_vertex_program *)ctx->VertexProgram._Current; + + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + + unsigned int unBit; + unsigned int i; + + R700_CMDBUF_CHECK_SPACE(6); + R700EP3 (context, IT_SET_CTL_CONST, 1); + R700E32 (context, mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); + R700E32 (context, 0); + + R700EP3 (context, IT_SET_CTL_CONST, 1); + R700E32 (context, mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX); + R700E32 (context, 0); + + context->aos_count = 0; + for(i=0; i<VERT_ATTRIB_MAX; i++) + { + unBit = 1 << i; + if(vpc->mesa_program.Base.InputsRead & unBit) + { + (context->chipobj.EmitVec)(ctx, + &(context->aos[context->aos_count]), + vb->AttribPtr[i]->data, + vb->AttribPtr[i]->size, + vb->AttribPtr[i]->stride, + vb->Count); + + context->aos[context->aos_count].aos_size = vb->AttribPtr[i]->size; + + /* currently aos are packed */ + r700SetupVTXConstans(ctx, + i, + (unsigned int)context->aos[context->aos_count].aos_offset, + (unsigned int)vb->AttribPtr[i]->size, + (unsigned int)(vb->AttribPtr[i]->size * 4), + (unsigned int)vb->Count); + /* TODO : enable this after MemUse fixed *= + (context->chipobj.MemUse)(context, context->aos[context->aos_count].buf->id); + */ + + context->aos_count++; + } + } + for(i=context->aos_count; i<VERT_ATTRIB_MAX; i++) + { + context->aos[i].buf = NULL; + } + + return R600_FALLBACK_NONE; +} + +static GLboolean r700SetupShaders(GLcontext * ctx) +{ + context_t *context = R700_CONTEXT(ctx); + + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + + GLuint exportCount; + + r700->SQ_PGM_RESOURCES_PS.u32All = 0; + r700->SQ_PGM_RESOURCES_VS.u32All = 0; + + SETbit(r700->SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit); + SETbit(r700->SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit); + + r700SetupVertexProgram(ctx); + + r700SetupFragmentProgram(ctx); + + exportCount = (r700->SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift); + r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1; + + return GL_TRUE; +} + +GLboolean r700SendTextureState(context_t *context) +{ + unsigned int i; + + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + + for(i=0; i<R700_TEXTURE_NUMBERUNITS; i++) + { + if(r700->texture_states.textures[i] != 0) + { + R700_CMDBUF_CHECK_SPACE(9); + R700EP3 (context, IT_SET_RESOURCE, 7); + R700E32 (context, i * 7); + R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE0.u32All); + R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE1.u32All); + R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE2.u32All); + R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE3.u32All); + R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE4.u32All); + R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE5.u32All); + R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE6.u32All); + } + + if(r700->texture_states.samplers[i] != 0) + { + R700_CMDBUF_CHECK_SPACE(5); + R700EP3 (context, IT_SET_SAMPLER, 3); + R700E32 (context, i * 3); // Base at 0x7000 + R700E32 (context, r700->texture_states.samplers[i]->SQ_TEX_SAMPLER0.u32All); + R700E32 (context, r700->texture_states.samplers[i]->SQ_TEX_SAMPLER1.u32All); + R700E32 (context, r700->texture_states.samplers[i]->SQ_TEX_SAMPLER2.u32All); + } + } + + return GL_TRUE; +} + +GLboolean r700SyncSurf(context_t *context) +{ + /* TODO : too heavy? */ + unsigned int CP_COHER_CNTL = 0; + + CP_COHER_CNTL |= TC_ACTION_ENA_bit + |VC_ACTION_ENA_bit + |CB_ACTION_ENA_bit + |DB_ACTION_ENA_bit + |SH_ACTION_ENA_bit + |SMX_ACTION_ENA_bit; + + + R700_CMDBUF_CHECK_SPACE(5); + R700EP3(context, IT_SURFACE_SYNC, 3); + R700E32(context, CP_COHER_CNTL); + R700E32(context, 0xFFFFFFFF); + R700E32(context, 0x00000000); + R700E32(context, 10); + + return GL_TRUE; +} + +static void r700SetRenderTarget(context_t *context) +{ + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + + r700->CB_COLOR0_BASE.u32All = context->target.rt.gpu >> 8; +} + +unsigned int r700PrimitiveType(int prim) +{ + switch (prim & PRIM_MODE_MASK) + { + case GL_POINTS: + return DI_PT_POINTLIST; + break; + case GL_LINES: + return DI_PT_LINELIST; + break; + case GL_LINE_STRIP: + return DI_PT_LINESTRIP; + break; + case GL_LINE_LOOP: + return DI_PT_LINELOOP; + break; + case GL_TRIANGLES: + return DI_PT_TRILIST; + break; + case GL_TRIANGLE_STRIP: + return DI_PT_TRISTRIP; + break; + case GL_TRIANGLE_FAN: + return DI_PT_TRIFAN; + break; + case GL_QUADS: + return DI_PT_QUADLIST; + break; + case GL_QUAD_STRIP: + return DI_PT_QUADSTRIP; + break; + case GL_POLYGON: + return DI_PT_POLYGON; + break; + default: + assert(0); + return -1; + break; + } +} + +static GLboolean r700RunRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + unsigned int i, j; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + + struct r700_fragment_program *fp = (struct r700_fragment_program *) + (ctx->FragmentProgram._Current); + if (context->screen->chip.type <= CHIP_TYPE_RV670) + { + fp->r700AsmCode.bR6xx = 1; + } + + r700Start3D(context); /* TODO : this is too much. */ + + r700SyncSurf(context); /* TODO : make it light. */ + + r700UpdateShaders(ctx); + + r700SetRenderTarget(context); + + if(r700SetupStreams(ctx)) + { + return GL_TRUE; + } + + r700UpdateTextureState(context); + r700SendTextureState(context); + + if(GL_FALSE == fp->translated) + { + if( GL_FALSE == r700TranslateFragmentShader(fp, &(fp->mesa_program)) ) + { + return GL_TRUE; + } + } + + r700SetupShaders(ctx); + + /* set a valid base address to make the command checker happy */ + r700->SQ_PGM_START_FS.u32All = r700->SQ_PGM_START_PS.u32All; + r700->SQ_PGM_START_ES.u32All = r700->SQ_PGM_START_PS.u32All; + r700->SQ_PGM_START_GS.u32All = r700->SQ_PGM_START_PS.u32All; + + r700SendContextStates(context); + + /* richard test code */ + for (i = 0; i < vb->PrimitiveCount; i++) + { + GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); + GLuint start = vb->Primitive[i].start; + GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; + GLuint numIndices = vb->Primitive[i].count; + GLuint numEntires; + //r300RunRenderPrimitive(rmesa, ctx, start, end, prim); + + unsigned int VGT_DRAW_INITIATOR = 0; + unsigned int VGT_INDEX_TYPE = 0; + unsigned int VGT_PRIMITIVE_TYPE = 0; + unsigned int VGT_NUM_INDICES = 0; + + numEntires = 2 /* VGT_INDEX_TYPE */ + + 3 /* VGT_PRIMITIVE_TYPE */ + + numIndices + 3 /* DRAW_INDEX_IMMD */ + + 2; /* test stamp */ + + R700_CMDBUF_CHECK_SPACE(numEntires); + + VGT_INDEX_TYPE |= DI_INDEX_SIZE_32_BIT << INDEX_TYPE_shift; + + R700EP3(context, IT_INDEX_TYPE, 0); + R700E32(context, VGT_INDEX_TYPE); + + VGT_NUM_INDICES = numIndices; + + VGT_PRIMITIVE_TYPE |= r700PrimitiveType(prim) << VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift; + R700EP3(context, IT_SET_CONFIG_REG, 1); + R700E32(context, mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); + R700E32(context, VGT_PRIMITIVE_TYPE); + + VGT_DRAW_INITIATOR |= DI_SRC_SEL_IMMEDIATE << SOURCE_SELECT_shift; + VGT_DRAW_INITIATOR |= DI_MAJOR_MODE_0 << MAJOR_MODE_shift; + + R700EP3(context, IT_DRAW_INDEX_IMMD, (numIndices + 1)); + R700E32(context, VGT_NUM_INDICES); + R700E32(context, VGT_DRAW_INITIATOR); + + for (j=0; j<numIndices; j++) + { + R700E32(context, j); + } + + /* test stamp, write a number to mmSCRATCH4 */ + R700EP3(context, IT_SET_CONFIG_REG, 1); + R700E32(context, 0x2144 - 0x2000); + R700E32(context, 0x12341234); + } + + /* Flush render op cached for last several quads. */ + R700_CMDBUF_CHECK_SPACE(2); + R700EP3 (context, IT_EVENT_WRITE, 0); + R700E32 (context, CACHE_FLUSH_AND_INV_EVENT); + + (context->chipobj.FlushCmdBuffer)(context); + + /* free aos => TODO : cache mgr */ + for (i = 0; i < context->aos_count; i++) + { + (context->chipobj.FreeDmaRegion)(context, &(context->aos[i])); + } + + return GL_FALSE; +} + +static GLboolean r700RunNonTCLRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) /* -------------------- */ +{ + GLboolean bRet = GL_TRUE; + + return bRet; +} + +static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/ + struct tnl_pipeline_stage *stage) +{ + GLboolean bRet = GL_FALSE; + context_t *context = R700_CONTEXT(ctx); + + r700UpdateShaders(ctx); + + bRet = r700RunRender(ctx, stage); + + return bRet; + //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline + //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success. +} + +const struct tnl_pipeline_stage _r700_render_stage = { + "r700 Hardware Rasterization", + NULL, + NULL, + NULL, + NULL, + r700RunNonTCLRender +}; + +const struct tnl_pipeline_stage _r700_tcl_stage = { + "r700 Hardware Transform, Clipping and Lighting", + NULL, + NULL, + NULL, + NULL, + r700RunTCLRender +}; + +const struct tnl_pipeline_stage *r700_pipeline[] = +{ + &_r700_tcl_stage, + &_tnl_vertex_transform_stage, + &_tnl_normal_transform_stage, + &_tnl_lighting_stage, + &_tnl_fog_coordinate_stage, + &_tnl_texgen_stage, + &_tnl_texture_transform_stage, + &_tnl_vertex_program_stage, + + &_r700_render_stage, + &_tnl_render_stage, + 0, +}; + + diff --git a/src/mesa/drivers/dri/r600/r700_shader.c b/src/mesa/drivers/dri/r600/r700_shader.c new file mode 100644 index 0000000000..394aa8d846 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_shader.c @@ -0,0 +1,528 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + */ + +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> + +#include "main/imports.h" + +#include "main/glheader.h" + +#include "r700_interface.h" +#include "r700_chip.h" +#include "r700_debug.h" + +#include "r700_shader.h" + +void r700ShaderInit(GLcontext * ctx) +{ +} + +void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst) +{ + if(NULL == plstCFInstructions->pTail) + { //first + plstCFInstructions->pHead = pInst; + plstCFInstructions->pTail = pInst; + } + else + { + plstCFInstructions->pTail->pNextInst = pInst; + plstCFInstructions->pTail = pInst; + } + pInst->pNextInst = NULL; + + plstCFInstructions->uNumOfNode++; +} + +void Init_R700_Shader(R700_Shader * pShader) +{ + pShader->Type = R700_SHADER_INVALID; + pShader->pProgram = NULL; + pShader->bBinaryShader = GL_FALSE; + pShader->bFetchShaderRequired = GL_FALSE; + pShader->bNeedsAssembly = GL_FALSE; + pShader->bLinksDirty = GL_TRUE; + pShader->uShaderBinaryDWORDSize = 0; + pShader->nRegs = 0; + pShader->nParamExports = 0; + pShader->nMemExports = 0; + pShader->resource = 0; + + pShader->exportMode = 0; + pShader->depthIsImported = GL_FALSE; + + pShader->positionVectorIsExported = GL_FALSE; + pShader->miscVectorIsExported = GL_FALSE; + pShader->renderTargetArrayIndexIsExported = GL_FALSE; + pShader->ccDist0VectorIsExported = GL_FALSE; + pShader->ccDist1VectorIsExported = GL_FALSE; + + + pShader->depthIsExported = GL_FALSE; + pShader->stencilRefIsExported = GL_FALSE; + pShader->coverageToMaskIsExported = GL_FALSE; + pShader->maskIsExported = GL_FALSE; + pShader->killIsUsed = GL_FALSE; + + pShader->uCFOffset = 0; + pShader->uStackSize = 0; + pShader->uMaxCallDepth = 0; + + pShader->bSurfAllocated = GL_FALSE; + + pShader->lstCFInstructions.pHead=NULL; + pShader->lstCFInstructions.pTail=NULL; + pShader->lstCFInstructions.uNumOfNode=0; + pShader->lstALUInstructions.pHead=NULL; + pShader->lstALUInstructions.pTail=NULL; + pShader->lstALUInstructions.uNumOfNode=0; + pShader->lstTEXInstructions.pHead=NULL; + pShader->lstTEXInstructions.pTail=NULL; + pShader->lstTEXInstructions.uNumOfNode=0; + pShader->lstVTXInstructions.pHead=NULL; + pShader->lstVTXInstructions.pTail=NULL; + pShader->lstVTXInstructions.uNumOfNode=0; +} + +void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst) +{ + R700ControlFlowSXClause* pSXClause; + R700ControlFlowSMXClause* pSMXClause; + + pCFInst->m_uIndex = pShader->lstCFInstructions.uNumOfNode; + AddInstToList(&(pShader->lstCFInstructions), + (R700ShaderInstruction*)pCFInst); + pShader->uShaderBinaryDWORDSize += GetInstructionSize(pCFInst->m_ShaderInstType); + + pSXClause = NULL; + pSMXClause = NULL; + switch (pCFInst->m_ShaderInstType) + { + case SIT_CF_ALL_EXP_SX: + pSXClause = (R700ControlFlowSXClause*)pCFInst; + break; + case SIT_CF_ALL_EXP_SMX: + pSMXClause = (R700ControlFlowSMXClause*)pCFInst; + break; + default: + break; + }; + + if((pSXClause != NULL) && (pSXClause->m_Word0.f.type == SQ_EXPORT_PARAM)) + { + pShader->nParamExports += pSXClause->m_Word1.f.burst_count + 1; + } + else if ((pSMXClause != NULL) && (pSMXClause->m_Word1.f.cf_inst == SQ_CF_INST_MEM_RING) && + (pSMXClause->m_Word0.f.type == SQ_EXPORT_WRITE || pSMXClause->m_Word0.f.type == SQ_EXPORT_WRITE_IND)) + { + pShader->nMemExports += pSMXClause->m_Word1.f.burst_count + 1; + } + + pShader->bLinksDirty = GL_TRUE; + pShader->bNeedsAssembly = GL_TRUE; + + pCFInst->useCount++; +} + +void AddVTXInstruction(R700_Shader *pShader, R700VertexInstruction *pVTXInst) +{ + pVTXInst->m_uIndex = pShader->lstVTXInstructions.uNumOfNode; + AddInstToList(&(pShader->lstVTXInstructions), + (R700ShaderInstruction*)pVTXInst); + pShader->uShaderBinaryDWORDSize += GetInstructionSize(pVTXInst->m_ShaderInstType); + + if(pVTXInst->m_ShaderInstType == SIT_VTX_GENERIC) + { + R700VertexGenericFetch* pVTXGenericClause = (R700VertexGenericFetch*)pVTXInst; + pShader->nRegs = (pShader->nRegs < pVTXGenericClause->m_Word1_GPR.f.dst_gpr) ? pVTXGenericClause->m_Word1_GPR.f.dst_gpr : pShader->nRegs; + } + + pShader->bLinksDirty = GL_TRUE; + pShader->bNeedsAssembly = GL_TRUE; + + pVTXInst->useCount++; +} + +void AddTEXInstruction(R700_Shader *pShader, R700TextureInstruction *pTEXInst) +{ + pTEXInst->m_uIndex = pShader->lstTEXInstructions.uNumOfNode; + AddInstToList(&(pShader->lstTEXInstructions), + (R700ShaderInstruction*)pTEXInst); + pShader->uShaderBinaryDWORDSize += GetInstructionSize(pTEXInst->m_ShaderInstType); + + pShader->nRegs = (pShader->nRegs < pTEXInst->m_Word1.f.dst_gpr) ? pTEXInst->m_Word1.f.dst_gpr : pShader->nRegs; + + pShader->bLinksDirty = GL_TRUE; + pShader->bNeedsAssembly = GL_TRUE; + + pTEXInst->useCount++; +} + +void AddALUInstruction(R700_Shader *pShader, R700ALUInstruction *pALUInst) +{ + pALUInst->m_uIndex = pShader->lstALUInstructions.uNumOfNode; + AddInstToList(&(pShader->lstALUInstructions), + (R700ShaderInstruction*)pALUInst); + pShader->uShaderBinaryDWORDSize += GetInstructionSize(pALUInst->m_ShaderInstType); + + pShader->nRegs = (pShader->nRegs < pALUInst->m_Word1.f.dst_gpr) ? pALUInst->m_Word1.f.dst_gpr : pShader->nRegs; + + pShader->bLinksDirty = GL_TRUE; + pShader->bNeedsAssembly = GL_TRUE; + + pALUInst->useCount++; +} + +void ResolveLinks(R700_Shader *pShader) +{ + GLuint uiSize; + R700ShaderInstruction *pInst; + R700ALUInstruction *pALUinst; + R700TextureInstruction *pTEXinst; + R700VertexInstruction *pVTXinst; + + GLuint vtxOffset; + + GLuint cfOffset = 0x0; + + GLuint aluOffset = cfOffset + pShader->lstCFInstructions.uNumOfNode * GetInstructionSize(SIT_CF); + + GLuint texOffset = aluOffset; // + m_lstALUInstructions.size() * R700ALUInstruction::SIZE, + + pInst = pShader->lstALUInstructions.pHead; + while(NULL != pInst) + { + texOffset += GetInstructionSize(pInst->m_ShaderInstType); + + pInst = pInst->pNextInst; + }; + + vtxOffset = texOffset + pShader->lstTEXInstructions.uNumOfNode * GetInstructionSize(SIT_TEX); + + if ( ((pShader->lstTEXInstructions.uNumOfNode > 0) && (texOffset % 4 != 0)) || + ((pShader->lstVTXInstructions.uNumOfNode > 0) && (vtxOffset % 4 != 0)) ) + { + pALUinst = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + Init_R700ALUInstruction(pALUinst); + AddALUInstruction(pShader, pALUinst); + texOffset += GetInstructionSize(SIT_ALU); + vtxOffset += GetInstructionSize(SIT_ALU); + } + + pInst = pShader->lstALUInstructions.pHead; + uiSize = 0; + while(NULL != pInst) + { + pALUinst = (R700ALUInstruction*)pInst; + + if(pALUinst->m_pLinkedALUClause != NULL) + { + // This address is quad-word aligned + pALUinst->m_pLinkedALUClause->m_Word0.f.addr = (aluOffset + uiSize) >> 1; + } + + uiSize += GetInstructionSize(pALUinst->m_ShaderInstType); + + pInst = pInst->pNextInst; + }; + + pInst = pShader->lstTEXInstructions.pHead; + uiSize = 0; + while(NULL != pInst) + { + pTEXinst = (R700TextureInstruction*)pInst; + + if (pTEXinst->m_pLinkedGenericClause != NULL) + { + pTEXinst->m_pLinkedGenericClause->m_Word0.f.addr = (texOffset + uiSize) >> 1; + } + + uiSize += GetInstructionSize(pTEXinst->m_ShaderInstType); + + pInst = pInst->pNextInst; + }; + + pInst = pShader->lstVTXInstructions.pHead; + uiSize = 0; + while(NULL != pInst) + { + pVTXinst = (R700VertexInstruction*)pInst; + + if (pVTXinst->m_pLinkedGenericClause != NULL) + { + pVTXinst->m_pLinkedGenericClause->m_Word0.f.addr = (vtxOffset + uiSize) >> 1; + } + + uiSize += GetInstructionSize(pVTXinst->m_ShaderInstType); + + pInst = pInst->pNextInst; + }; + + pShader->bLinksDirty = GL_FALSE; +} + +void Assemble(R700_Shader *pShader) +{ + GLuint i; + GLuint *pShaderBinary; + GLuint size_of_program; + GLuint *pCurrPos; + + GLuint end_of_cf_instructions; + GLuint number_of_alu_dwords; + + R700ShaderInstruction *pInst; + + if(GL_TRUE == pShader->bBinaryShader) + { + return; + } + + if(pShader->bLinksDirty == GL_TRUE) + { + ResolveLinks(pShader); + } + + size_of_program = pShader->uShaderBinaryDWORDSize; + + pShaderBinary = (GLuint*) MALLOC(sizeof(GLuint)*size_of_program); + + pCurrPos = pShaderBinary; + + for (i = 0; i < size_of_program; i++) + { + pShaderBinary[i] = 0; + } + + pInst = pShader->lstCFInstructions.pHead; + while(NULL != pInst) + { + switch (pInst->m_ShaderInstType) + { + case SIT_CF_GENERIC: + { + R700ControlFlowGenericClause* pCFgeneric = (R700ControlFlowGenericClause*)pInst; + *pCurrPos++ = pCFgeneric->m_Word0.val; + *pCurrPos++ = pCFgeneric->m_Word1.val; + } + break; + case SIT_CF_ALU: + { + R700ControlFlowALUClause* pCFalu = (R700ControlFlowALUClause*)pInst; + *pCurrPos++ = pCFalu->m_Word0.val; + *pCurrPos++ = pCFalu->m_Word1.val; + } + break; + case SIT_CF_ALL_EXP_SX: + { + R700ControlFlowSXClause* pCFsx = (R700ControlFlowSXClause*)pInst; + *pCurrPos++ = pCFsx->m_Word0.val; + *pCurrPos++ = (pCFsx->m_Word1.val | pCFsx->m_Word1_SWIZ.val); + } + break; + case SIT_CF_ALL_EXP_SMX: + { + R700ControlFlowSMXClause* pCFsmx = (R700ControlFlowSMXClause*)pInst; + *pCurrPos++ = pCFsmx->m_Word0.val; + *pCurrPos++ = (pCFsmx->m_Word1.val | pCFsmx->m_Word1_BUF.val); + } + break; + default: + break; + } + + pInst = pInst->pNextInst; + }; + + number_of_alu_dwords = 0; + pInst = pShader->lstALUInstructions.pHead; + while(NULL != pInst) + { + switch (pInst->m_ShaderInstType) + { + case SIT_ALU: + { + R700ALUInstruction* pALU = (R700ALUInstruction*)pInst; + + *pCurrPos++ = pALU->m_Word0.val; + *pCurrPos++ = (pALU->m_Word1.val | pALU->m_Word1_OP2.val | pALU->m_Word1_OP3.val); + + number_of_alu_dwords += 2; + } + break; + case SIT_ALU_HALF_LIT: + { + R700ALUInstructionHalfLiteral* pALUhalf = (R700ALUInstructionHalfLiteral*)pInst; + + *pCurrPos++ = pALUhalf->m_Word0.val; + *pCurrPos++ = (pALUhalf->m_Word1.val | pALUhalf->m_Word1_OP2.val | pALUhalf->m_Word1_OP3.val); + *pCurrPos++ = *((GLuint*)&(pALUhalf->m_fLiteralX)); + *pCurrPos++ = *((GLuint*)&(pALUhalf->m_fLiteralY)); + + number_of_alu_dwords += 4; + } + break; + case SIT_ALU_FALL_LIT: + { + R700ALUInstructionFullLiteral* pALUfull = (R700ALUInstructionFullLiteral*)pInst; + + *pCurrPos++ = pALUfull->m_Word0.val; + *pCurrPos++ = (pALUfull->m_Word1.val | pALUfull->m_Word1_OP2.val | pALUfull->m_Word1_OP3.val); + + *pCurrPos++ = *((GLuint*)&(pALUfull->m_fLiteralX)); + *pCurrPos++ = *((GLuint*)&(pALUfull->m_fLiteralY)); + *pCurrPos++ = *((GLuint*)&(pALUfull->m_fLiteralZ)); + *pCurrPos++ = *((GLuint*)&(pALUfull->m_fLiteralW)); + + number_of_alu_dwords += 6; + } + break; + default: + break; + } + + pInst = pInst->pNextInst; + }; + + pInst = pShader->lstTEXInstructions.pHead; + while(NULL != pInst) + { + R700TextureInstruction* pTEX = (R700TextureInstruction*)pInst; + + *pCurrPos++ = pTEX->m_Word0.val; + *pCurrPos++ = pTEX->m_Word1.val; + *pCurrPos++ = pTEX->m_Word2.val; + *pCurrPos++ = 0x0beadeaf; + + pInst = pInst->pNextInst; + }; + + pInst = pShader->lstVTXInstructions.pHead; + while(NULL != pInst) + { + switch (pInst->m_ShaderInstType) + { + case SIT_VTX_SEM: // + { + R700VertexSemanticFetch* pVTXsem = (R700VertexSemanticFetch*)pInst; + + *pCurrPos++ = pVTXsem->m_Word0.val; + *pCurrPos++ = (pVTXsem->m_Word1.val | pVTXsem->m_Word1_SEM.val); + *pCurrPos++ = pVTXsem->m_Word2.val; + *pCurrPos++ = 0x0beadeaf; + } + break; + case SIT_VTX_GENERIC: // + { + R700VertexGenericFetch* pVTXgeneric = (R700VertexGenericFetch*)pInst; + + *pCurrPos++ = pVTXgeneric->m_Word0.val; + *pCurrPos++ = (pVTXgeneric->m_Word1.val | pVTXgeneric->m_Word1_GPR.val); + *pCurrPos++ = pVTXgeneric->m_Word2.val; + *pCurrPos++ = 0x0beadeaf; + } + break; + default: + break; + } + + pInst = pInst->pNextInst; + }; + + if(NULL != pShader->pProgram) + { + FREE(pShader->pProgram); + } + pShader->pProgram = (GLubyte*)pShaderBinary; + + end_of_cf_instructions = pShader->uCFOffset + pShader->lstCFInstructions.uNumOfNode * GetInstructionSize(SIT_CF); + + pShader->uEndOfCF = end_of_cf_instructions >> 1; + + pShader->uEndOfALU = (end_of_cf_instructions + number_of_alu_dwords) >> 1; + + pShader->uEndOfFetch = (pShader->uCFOffset + pShader->uShaderBinaryDWORDSize) >> 1; + + pShader->bNeedsAssembly = GL_FALSE; +} + +void LoadProgram(R700_Shader *pShader) //context +{ +} + +void UpdateShaderRegisters(R700_Shader *pShader) //context +{ +} + +void DeleteInstructions(R700_Shader *pShader) +{ +} + +void DebugPrint(void) +{ +} + +void Clean_Up_Shader(R700_Shader *pShader) +{ + FREE(pShader->pProgram); + + R700ShaderInstruction *pInst; + R700ShaderInstruction *pInstToFree; + + pInst = pShader->lstCFInstructions.pHead; + while(NULL != pInst) + { + pInstToFree = pInst; + pInst = pInst->pNextInst; + FREE(pInstToFree); + }; + pInst = pShader->lstALUInstructions.pHead; + while(NULL != pInst) + { + pInstToFree = pInst; + pInst = pInst->pNextInst; + FREE(pInstToFree); + }; + pInst = pShader->lstTEXInstructions.pHead; + while(NULL != pInst) + { + pInstToFree = pInst; + pInst = pInst->pNextInst; + FREE(pInstToFree); + }; + pInst = pShader->lstVTXInstructions.pHead; + while(NULL != pInst) + { + pInstToFree = pInst; + pInst = pInst->pNextInst; + FREE(pInstToFree); + }; +} + diff --git a/src/mesa/drivers/dri/r600/r700_shader.h b/src/mesa/drivers/dri/r600/r700_shader.h new file mode 100644 index 0000000000..bfd01e1a93 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_shader.h @@ -0,0 +1,150 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + */ + + +#ifndef __R700_SHADER_H__ +#define __R700_SHADER_H__ + +#include "main/mtypes.h" + +#include "r700_shaderinst.h" + + +void r700ShaderInit(GLcontext * ctx); + +typedef enum R700ShaderType +{ + R700_SHADER_FS = 0x0, + R700_SHADER_ES = 0x1, + R700_SHADER_GS = 0x2, + R700_SHADER_VS = 0x3, + R700_SHADER_PS = 0x4, + R700_SHADER_INVALID = 0x5, +} R700ShaderType; + +typedef struct TypedShaderList +{ + R700ShaderInstruction * pHead; + R700ShaderInstruction * pTail; + GLuint uNumOfNode; +} TypedShaderList; + +typedef struct RealRegister +{ + GLuint uAddr; + GLuint uValue; +} RealRegister; + +typedef struct InstDeps +{ + GLint nDstDep; + GLint nSrcDeps[3]; +} InstDeps; + +typedef struct R700_Shader +{ + R700ShaderType Type; + + GLubyte* pProgram; + + GLboolean bBinaryShader; + GLboolean bFetchShaderRequired; + GLboolean bNeedsAssembly; + GLboolean bLinksDirty; + + GLuint uShaderBinaryDWORDSize; // in DWORDS + GLuint nRegs; + GLuint nParamExports; // VS_ EXPORT_COUNT (1 based, the actual register is 0 based!) + GLuint nMemExports; + GLuint resource; // VS and PS _RESOURCE + GLuint exportMode; // VS and PS _EXPORT_MODE + + GLboolean depthIsImported; + + // Vertex program exports + GLboolean positionVectorIsExported; + + GLboolean miscVectorIsExported; + GLboolean renderTargetArrayIndexIsExported; + + GLboolean ccDist0VectorIsExported; + GLboolean ccDist1VectorIsExported; + + // Pixel program exports + GLboolean depthIsExported; + GLboolean stencilRefIsExported; + GLboolean coverageToMaskIsExported; + GLboolean maskIsExported; + + GLboolean killIsUsed; + + GLuint uStartAddr; + GLuint uCFOffset; + GLuint uEndOfCF; + GLuint uEndOfALU; + GLuint uEndOfFetch; + GLuint uStackSize; + GLuint uMaxCallDepth; + + TypedShaderList lstCFInstructions; + TypedShaderList lstALUInstructions; + TypedShaderList lstTEXInstructions; + TypedShaderList lstVTXInstructions; + + RealRegister RegStartAddr; + RealRegister RegCFOffset; + RealRegister RegEndCF; + RealRegister RegEndALU; + RealRegister egEndFetcg; + + // -------- constants + GLfloat ConstantArray[SQ_ALU_CONSTANT_PS_COUNT * 4]; + + GLboolean bSurfAllocated; +} R700_Shader; + +//Internal +void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst); +void ResolveLinks(R700_Shader *pShader); +void Assemble(R700_Shader *pShader); + + +//Interface +void Init_R700_Shader(R700_Shader * pShader); +void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst); +void AddVTXInstruction(R700_Shader *pShader, R700VertexInstruction *pVTXInst); +void AddTEXInstruction(R700_Shader *pShader, R700TextureInstruction *pTEXInst); +void AddALUInstruction(R700_Shader *pShader, R700ALUInstruction *pALUInst); + +void LoadProgram(R700_Shader *pShader); +void UpdateShaderRegisters(R700_Shader *pShader); +void DeleteInstructions(R700_Shader *pShader); +void DebugPrint(void); + +void Clean_Up_Shader(R700_Shader *pShader); + +#endif /*__R700_SHADER_H__*/ + diff --git a/src/mesa/drivers/dri/r600/r700_shaderinst.c b/src/mesa/drivers/dri/r600/r700_shaderinst.c new file mode 100644 index 0000000000..c1bffee91f --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_shaderinst.c @@ -0,0 +1,224 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + */ + + +#include "main/mtypes.h" + +#include "r700_debug.h" +#include "r700_shaderinst.h" + +void Init_R700ControlFlowGenericClause(R700ControlFlowGenericClause* pInst) +{ + pInst->m_Word0.val = 0x00000000; + pInst->m_Word1.val = 0x00000000; + + pInst->m_pLinkedVTXInstruction = 0; + pInst->m_pLinkedTEXInstruction = 0; + + pInst->useCount = 0; + + pInst->m_ShaderInstType = SIT_CF_GENERIC; +} + +void Init_R700ControlFlowALUClause(R700ControlFlowALUClause* pInst) +{ + pInst->m_Word0.val = 0x00000000; + pInst->m_Word1.val = 0x00000000; + + pInst->m_pLinkedALUInstruction = 0; + + pInst->useCount = 0; + + pInst->m_ShaderInstType = SIT_CF_ALU; +} + +void Init_R700ControlFlowSXClause(R700ControlFlowSXClause* pInst) +{ + pInst->m_Word0.val = 0x00000000; + pInst->m_Word1.val = 0x00000000; + pInst->m_Word1_SWIZ.val = 0x00000000; + + pInst->useCount = 0; + + pInst->m_ShaderInstType = SIT_CF_ALL_EXP_SX; +} + +void Init_R700ControlFlowSMXClause(R700ControlFlowSMXClause* pInst) +{ + pInst->m_Word0.val = 0x00000000; + pInst->m_Word1.val = 0x00000000; + pInst->m_Word1_BUF.val = 0x00000000; + + pInst->useCount = 0; + + pInst->m_ShaderInstType = SIT_CF_ALL_EXP_SMX; +} + +void Init_R700ALUInstruction(R700ALUInstruction* pInst) +{ + pInst->m_Word0.val = 0x00000000; + pInst->m_Word1.val = 0x00000000; + pInst->m_Word1_OP2.val = 0x00000000; + pInst->m_Word1_OP3.val = 0x00000000; + + pInst->m_pLinkedALUClause = 0; + + pInst->useCount = 0; + + pInst->m_ShaderInstType = SIT_ALU; +} + +void Init_R700ALUInstructionHalfLiteral(R700ALUInstructionHalfLiteral* pInst, GLfloat x, GLfloat y) +{ + pInst->m_Word0.val = 0x00000000; + pInst->m_Word1.val = 0x00000000; + pInst->m_Word1_OP2.val = 0x00000000; + pInst->m_Word1_OP3.val = 0x00000000; + + pInst->m_pLinkedALUClause = 0; + + pInst->m_fLiteralX = x; + pInst->m_fLiteralY = y; + + pInst->useCount = 0; + + pInst->m_ShaderInstType = SIT_ALU_HALF_LIT; +} + +void Init_R700ALUInstructionFullLiteral(R700ALUInstructionFullLiteral* pInst, GLfloat x, GLfloat y, GLfloat z, GLfloat w) +{ + pInst->m_Word0.val = 0x00000000; + pInst->m_Word1.val = 0x00000000; + pInst->m_Word1_OP2.val = 0x00000000; + pInst->m_Word1_OP3.val = 0x00000000; + + pInst->m_pLinkedALUClause = 0; + + pInst->m_fLiteralX = x; + pInst->m_fLiteralY = y; + pInst->m_fLiteralZ = z; + pInst->m_fLiteralW = w; + + pInst->useCount = 0; + + pInst->m_ShaderInstType = SIT_ALU_FALL_LIT; +} + +void Init_R700TextureInstruction(R700TextureInstruction* pInst) +{ + pInst->m_Word0.val = 0x00000000; + pInst->m_Word1.val = 0x00000000; + pInst->m_Word2.val = 0x00000000; + + pInst->m_pLinkedGenericClause = 0; + + pInst->useCount = 0; + + pInst->m_ShaderInstType = SIT_TEX; +} + +void Init_R700VertexSemanticFetch(R700VertexSemanticFetch* pInst) +{ + pInst->m_Word0.val = 0x00000000; + pInst->m_Word1.val = 0x00000000; + pInst->m_Word1_SEM.val = 0x00000000; + pInst->m_Word2.val = 0x00000000; + + pInst->m_pLinkedGenericClause = 0; + + pInst->useCount = 0; + + pInst->m_ShaderInstType = SIT_VTX_SEM; +} + +void Init_R700VertexGenericFetch(R700VertexGenericFetch* pInst) +{ + pInst->m_Word0.val = 0x00000000; + pInst->m_Word1.val = 0x00000000; + pInst->m_Word1_GPR.val = 0x00000000; + pInst->m_Word2.val = 0x00000000; + + pInst->m_pLinkedGenericClause = 0; + + pInst->useCount = 0; + + pInst->m_ShaderInstType = SIT_VTX_GENERIC; +} + +unsigned int GetInstructionSize(ShaderInstType instType) +{ + switch(instType) + { + case SIT_ALU_HALF_LIT: + case SIT_TEX: + case SIT_VTX: + case SIT_VTX_GENERIC: + case SIT_VTX_SEM: + return 4; + case SIT_ALU_FALL_LIT: + return 6; + default: + break; + } + + return 2; +} + +unsigned int GetCFMaxInstructions(ShaderInstType instType) +{ + switch (instType) + { + case SIT_CF_ALL_EXP: + case SIT_CF_ALL_EXP_SX: + case SIT_CF_ALL_EXP_SMX: + return 0x10; + case SIT_CF_GENERIC: + return 0x8; //For tex and vtx + case SIT_CF_ALU: + return 0x80; + default: + break; + } + return 0x10; +} + +GLboolean LinkVertexInstruction(R700ControlFlowGenericClause *pCFGeneric, + R700VertexInstruction *pVTXInstruction) +{ + if (pCFGeneric->m_pLinkedTEXInstruction != 0) + { + r700_error(ERROR_ASM_VTX_CLAUSE, "This instruction is already linked to a texture instruction"); + return GL_FALSE; + } + + pCFGeneric->m_pLinkedVTXInstruction = pVTXInstruction; + pVTXInstruction->m_pLinkedGenericClause = pCFGeneric; + + return GL_TRUE; +} + + + diff --git a/src/mesa/drivers/dri/r600/r700_shaderinst.h b/src/mesa/drivers/dri/r600/r700_shaderinst.h new file mode 100644 index 0000000000..b829cb11ad --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_shaderinst.h @@ -0,0 +1,321 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + */ + + +#ifndef _R700_SHADERINST_H_ +#define _R700_SHADERINST_H_ + +#include "main/glheader.h" + +#include "defaultendian.h" +#include "sq_micro_reg.h" + +#define SQ_ALU_CONSTANT_PS_OFFSET 0x00000000
+#define SQ_ALU_CONSTANT_PS_COUNT 0x00000100
+#define SQ_ALU_CONSTANT_VS_OFFSET 0x00000100
+#define SQ_ALU_CONSTANT_VS_COUNT 0x00000100 +#define SQ_FETCH_RESOURCE_PS_OFFSET 0x00000000
+#define SQ_FETCH_RESOURCE_PS_COUNT 0x000000a0
+#define SQ_FETCH_RESOURCE_VS_OFFSET 0x000000a0
+#define SQ_FETCH_RESOURCE_VS_COUNT 0x000000b0 + +#define SHADERINST_TYPEMASK_CF 0x10 +#define SHADERINST_TYPEMASK_ALU 0x20 +#define SHADERINST_TYPEMASK_TEX 0x40 +#define SHADERINST_TYPEMASK_VTX 0x80 + +typedef enum ShaderInstType +{ + SIT_CF = 0x10, /*SIZE = 0x2*/ + SIT_CF_ALL_EXP = 0x14, /*SIZE = 0x2, MAX_INSTRUCTIONS = 0x10;*/ + SIT_CF_ALL_EXP_SX = 0x15, /*SIZE = 0x2, MAX_INSTRUCTIONS = 0x10;*/ + SIT_CF_ALL_EXP_SMX= 0x16, /*SIZE = 0x2, MAX_INSTRUCTIONS = 0x10;*/ + SIT_CF_GENERIC = 0x18, /*SIZE = 0x2, MAX_INSTRUCTIONS = 0x8; //For tex and vtx*/ + SIT_CF_ALU = 0x19, /*SIZE = 0x2, MAX_INSTRUCTIONS = 0x80;*/ + SIT_ALU = 0x20, /*SIZE = 0x2,*/ + SIT_ALU_HALF_LIT = 0x21, /*SIZE = 0x4,*/ + SIT_ALU_FALL_LIT = 0x22, /*SIZE = 0x6,*/ + SIT_TEX = 0x40, /*SIZE = 0x4,*/ + SIT_VTX = 0x80, /*SIZE = 0x4, MEGA_FETCH_BYTES = 0x20*/ + SIT_VTX_GENERIC = 0x81, /*SIZE = 0x4, MEGA_FETCH_BYTES = 0x20*/ + SIT_VTX_SEM = 0x82 /*SIZE = 0x4, MEGA_FETCH_BYTES = 0x20*/ +} ShaderInstType; + +typedef struct R700ShaderInstruction +{ + ShaderInstType m_ShaderInstType; + struct R700ShaderInstruction *pNextInst; + GLuint m_uIndex; + GLuint useCount; +} R700ShaderInstruction; + +// ------------------ CF insts --------------------------- + +typedef R700ShaderInstruction R700ControlFlowInstruction; + +typedef struct R700ControlFlowAllocExportClause +{ + ShaderInstType m_ShaderInstType; + R700ShaderInstruction * pNextInst; + GLuint m_uIndex; + GLuint useCount; + + sq_cf_alloc_export_word0_u m_Word0; + sq_cf_alloc_export_word1_u m_Word1; +} R700ControlFlowAllocExportClause; + +typedef struct R700ControlFlowSXClause +{ + ShaderInstType m_ShaderInstType; + R700ShaderInstruction * pNextInst; + //R700ControlFlowAllocExportClause + //R700ControlFlowInstruction + //R700ShaderInstruction + GLuint m_uIndex; + GLuint useCount; + //--------------------- + //--------------------------- + sq_cf_alloc_export_word0_u m_Word0; + sq_cf_alloc_export_word1_u m_Word1; + //------------------------------------- + + sq_cf_alloc_export_word1_swiz_u m_Word1_SWIZ; +} R700ControlFlowSXClause; + +typedef struct R700ControlFlowSMXClause +{ + ShaderInstType m_ShaderInstType; + R700ShaderInstruction * pNextInst; + //R700ControlFlowAllocExportClause + //R700ControlFlowInstruction + //R700ShaderInstruction + GLuint m_uIndex; + GLuint useCount; + //--------------------- + //--------------------------- + sq_cf_alloc_export_word0_u m_Word0; + sq_cf_alloc_export_word1_u m_Word1; + //------------------------------- + + sq_cf_alloc_export_word1_buf_u m_Word1_BUF; +} R700ControlFlowSMXClause; + +typedef struct R700ControlFlowGenericClause +{ + ShaderInstType m_ShaderInstType; + R700ShaderInstruction * pNextInst; + //R700ControlFlowInstruction + //R700ShaderInstruction + GLuint m_uIndex; + GLuint useCount; + //--------------------- + //--------------------- + + sq_cf_word0_u m_Word0; + sq_cf_word1_u m_Word1; + + struct R700VertexInstruction *m_pLinkedVTXInstruction; + struct R700TextureInstruction *m_pLinkedTEXInstruction; +} R700ControlFlowGenericClause; + +typedef struct R700ControlFlowALUClause +{ + ShaderInstType m_ShaderInstType; + R700ShaderInstruction * pNextInst; + //R700ControlFlowInstruction + //R700ShaderInstruction + GLuint m_uIndex; + GLuint useCount; + //--------------------- + //--------------------- + + sq_cf_alu_word0_u m_Word0; + sq_cf_alu_word1_u m_Word1; + + struct R700ALUInstruction *m_pLinkedALUInstruction; +} R700ControlFlowALUClause; + +// ------------------- End of CF Inst ------------------------ + +// ------------------- ALU Inst ------------------------------ +typedef struct R700ALUInstruction +{ + ShaderInstType m_ShaderInstType; + R700ShaderInstruction * pNextInst; + //R700ShaderInstruction + GLuint m_uIndex; + GLuint useCount; + //--------------------- + + sq_alu_word0_u m_Word0; + sq_alu_word1_u m_Word1; + sq_alu_word1_op2_v2_u m_Word1_OP2; + sq_alu_word1_op3_u m_Word1_OP3; + + struct R700ControlFlowALUClause *m_pLinkedALUClause; +} R700ALUInstruction; + +typedef struct R700ALUInstructionHalfLiteral +{ + ShaderInstType m_ShaderInstType; + R700ShaderInstruction * pNextInst; + //R700ALUInstruction + //R700ShaderInstruction + GLuint m_uIndex; + GLuint useCount; + //--------------------- + + sq_alu_word0_u m_Word0; + sq_alu_word1_u m_Word1; + sq_alu_word1_op2_v2_u m_Word1_OP2; + sq_alu_word1_op3_u m_Word1_OP3; + + struct R700ControlFlowALUClause *m_pLinkedALUClause; + //------------------- + + GLfloat m_fLiteralX, + m_fLiteralY; +} R700ALUInstructionHalfLiteral; + +typedef struct R700ALUInstructionFullLiteral +{ + ShaderInstType m_ShaderInstType; + R700ShaderInstruction * pNextInst; + //R700ALUInstruction + //R700ShaderInstruction + GLuint m_uIndex; + GLuint useCount; + //--------------------- + + sq_alu_word0_u m_Word0; + sq_alu_word1_u m_Word1; + sq_alu_word1_op2_v2_u m_Word1_OP2; + sq_alu_word1_op3_u m_Word1_OP3; + + struct R700ControlFlowALUClause *m_pLinkedALUClause; + //------------------- + + GLfloat m_fLiteralX, + m_fLiteralY, + m_fLiteralZ, + m_fLiteralW; +} R700ALUInstructionFullLiteral; +// ------------------- End of ALU Inst ----------------------- + +// ------------------- Textuer/Vertex Instruction -------------------- + +typedef struct R700TextureInstruction +{ + ShaderInstType m_ShaderInstType; + R700ShaderInstruction * pNextInst; + //R700ShaderInstruction + GLuint m_uIndex; + GLuint useCount; + //--------------------- + + sq_tex_word0_u m_Word0; + sq_tex_word1_u m_Word1; + sq_tex_word2_u m_Word2; + + struct R700ControlFlowGenericClause *m_pLinkedGenericClause; +} R700TextureInstruction; + +typedef struct R700VertexInstruction +{ + ShaderInstType m_ShaderInstType; + R700ShaderInstruction * pNextInst; + //R700ShaderInstruction + GLuint m_uIndex; + GLuint useCount; + //--------------------- + + sq_vtx_word0_u m_Word0; + sq_vtx_word1_u m_Word1; + sq_vtx_word2_u m_Word2; + + struct R700ControlFlowGenericClause *m_pLinkedGenericClause; +} R700VertexInstruction; +// +typedef struct R700VertexSemanticFetch +{ + ShaderInstType m_ShaderInstType; + R700ShaderInstruction * pNextInst; + //R700VertexInstruction + //R700ShaderInstruction + GLuint m_uIndex; + GLuint useCount; + //--------------------- + + sq_vtx_word0_u m_Word0; + sq_vtx_word1_u m_Word1; + sq_vtx_word2_u m_Word2; + + struct R700ControlFlowGenericClause *m_pLinkedGenericClause; + //--------------------------- + + sq_vtx_word1_sem_u m_Word1_SEM; +} R700VertexSemanticFetch; +// +typedef struct R700VertexGenericFetch +{ + ShaderInstType m_ShaderInstType; + R700ShaderInstruction * pNextInst; + //R700VertexInstruction + //R700ShaderInstruction + GLuint m_uIndex; + GLuint useCount; + //--------------------- + + sq_vtx_word0_u m_Word0; + sq_vtx_word1_u m_Word1; + sq_vtx_word2_u m_Word2; + + struct R700ControlFlowGenericClause *m_pLinkedGenericClause; + //--------------------------- + + sq_vtx_word1_gpr_u m_Word1_GPR; +} R700VertexGenericFetch; + +// ------------------- End of Texture Vertex Instruction -------------------- + +void Init_R700ControlFlowGenericClause(R700ControlFlowGenericClause* pInst); +void Init_R700ControlFlowALUClause(R700ControlFlowALUClause* pInst); +void Init_R700ControlFlowSXClause(R700ControlFlowSXClause* pInst); +void Init_R700ControlFlowSMXClause(R700ControlFlowSMXClause* pInst); +void Init_R700ALUInstruction(R700ALUInstruction* pInst); +void Init_R700ALUInstructionHalfLiteral(R700ALUInstructionHalfLiteral* pInst, GLfloat x, GLfloat y); +void Init_R700ALUInstructionFullLiteral(R700ALUInstructionFullLiteral* pInst, GLfloat x, GLfloat y, GLfloat z, GLfloat w); +void Init_R700TextureInstruction(R700TextureInstruction* pInst); +void Init_R700VertexSemanticFetch(R700VertexSemanticFetch* pInst); +void Init_R700VertexGenericFetch(R700VertexGenericFetch* pInst); + +unsigned int GetInstructionSize(ShaderInstType instType); +unsigned int GetCFMaxInstructions(ShaderInstType instType); + +GLboolean LinkVertexInstruction(R700ControlFlowGenericClause *pCFGeneric, + R700VertexInstruction *pVTXInstruction); + +#endif //_R700_SHADERINST_H_ diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c new file mode 100644 index 0000000000..786dcd15d8 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -0,0 +1,912 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + */ + +#include "main/glheader.h" +#include "main/mtypes.h" +#include "state.h" +#include "imports.h" +#include "enums.h" +#include "macros.h" +#include "dd.h" +#include "simple_list.h" + +#include "tnl/tnl.h" +#include "tnl/t_pipeline.h" +#include "tnl/t_vp_build.h" +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "main/api_arrayelt.h" +#include "main/state.h" +#include "main/framebuffer.h" + +#include "shader/prog_parameter.h" +#include "shader/prog_statevars.h" +#include "vbo/vbo.h" +#include "texformat.h" + +#include "r700_interface.h" + +#include "r700_chip.h" +#include "r700_state.h" +#include "r700_fragprog.h" +#include "r700_vertprog.h" + + +void r700SetDefaultStates(context_t *context) +{ + +} + +void r700UpdateShaders (GLcontext * ctx) +{ + context_t *context = R700_CONTEXT(ctx); + + GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; + GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; + + struct r700_vertex_program *vp; + int i; + + if (context->NewGLState) + { + context->NewGLState = 0; + + for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) + { + /* mat states from state var not array for sw */ + dummy_attrib[i].stride = 0; + + temp_attrib[i] = TNL_CONTEXT(ctx)->vb.AttribPtr[i]; + TNL_CONTEXT(ctx)->vb.AttribPtr[i] = &(dummy_attrib[i]); + } + + _tnl_UpdateFixedFunctionProgram(ctx); + + for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) + { + TNL_CONTEXT(ctx)->vb.AttribPtr[i] = temp_attrib[i]; + } + + r700SelectVertexShader(ctx); + vp = (struct r700_vertex_program *)ctx->VertexProgram._Current; + + if (vp->translated == GL_FALSE) + { + // TODO + //fprintf(stderr, "Failing back to sw-tcl\n"); + //hw_tcl_on = future_hw_tcl_on = 0; + //r300ResetHwState(rmesa); + // + r700UpdateStateParameters(ctx, _NEW_PROGRAM); + return; + } + } + + r700UpdateStateParameters(ctx, _NEW_PROGRAM); +} + +/* + * To correctly position primitives: + */ +void r700UpdateViewportOffset(GLcontext * ctx) +{ +} + +/** + * Tell the card where to render (offset, pitch). + * Effected by glDrawBuffer, etc + */ +void r700UpdateDrawBuffer(GLcontext * ctx) /* TODO */ +{ + context_t *context = R700_CONTEXT(ctx); + + switch (ctx->DrawBuffer->_ColorDrawBufferIndexes[0]) + { + case BUFFER_FRONT_LEFT: + context->target.rt = context->screen->frontBuffer; + break; + case BUFFER_BACK_LEFT: + context->target.rt = context->screen->backBuffer; + break; + default: + memset (&context->target.rt, sizeof(context->target.rt), 0); + } +} + +static void r700FetchStateParameter(GLcontext * ctx, + const gl_state_index state[STATE_LENGTH], + GLfloat * value) +{ + context_t *context = R700_CONTEXT(ctx); + + /* TODO */ +} + +void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state) +{ + struct r700_fragment_program *fp; + struct gl_program_parameter_list *paramList; + GLuint i; + + if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM))) + return; + + fp = (struct r700_fragment_program *)ctx->FragmentProgram._Current; + if (!fp) + { + return; + } + + paramList = fp->mesa_program.Base.Parameters; + + if (!paramList) + { + return; + } + + for (i = 0; i < paramList->NumParameters; i++) + { + if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) + { + r700FetchStateParameter(ctx, + paramList->Parameters[i]. + StateIndexes, + paramList->ParameterValues[i]); + } + } +} + +/** + * Called by Mesa after an internal state update. + */ +static void r700InvalidateState(GLcontext * ctx, GLuint new_state) +{ + context_t *context = R700_CONTEXT(ctx); + + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + + _swrast_InvalidateState(ctx, new_state); + _swsetup_InvalidateState(ctx, new_state); + _vbo_InvalidateState(ctx, new_state); + _tnl_InvalidateState(ctx, new_state); + _ae_invalidate_state(ctx, new_state); + + if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) + { + r700UpdateDrawBuffer(ctx); + } + + r700UpdateStateParameters(ctx, new_state); + + if(GL_TRUE == context->bEnablePerspective) + { + /* Do scale XY and Z by 1/W0 for perspective correction on pos. For orthogonal case, set both to one. */ + CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit); + CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit); + + SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit); + + CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit); + SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit); + } + else + { + /* For orthogonal case. */ + SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit); + SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit); + + SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit); + + SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit); + CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit); + } + + context->NewGLState |= new_state; +} + +static void r700SetDepthState(GLcontext * ctx) +{ + context_t *context = R700_CONTEXT(ctx); + + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + + if (ctx->Depth.Test) + { + SETbit(r700->DB_DEPTH_CONTROL.u32All, Z_ENABLE_bit); + if (ctx->Depth.Mask) + { + SETbit(r700->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit); + } + else + { + CLEARbit(r700->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit); + } + + switch (ctx->Depth.Func) + { + case GL_NEVER: + SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_NEVER, + ZFUNC_shift, ZFUNC_mask); + break; + case GL_LESS: + SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_LESS, + ZFUNC_shift, ZFUNC_mask); + break; + case GL_EQUAL: + SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_EQUAL, + ZFUNC_shift, ZFUNC_mask); + break; + case GL_LEQUAL: + SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_LEQUAL, + ZFUNC_shift, ZFUNC_mask); + break; + case GL_GREATER: + SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_GREATER, + ZFUNC_shift, ZFUNC_mask); + break; + case GL_NOTEQUAL: + SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_NOTEQUAL, + ZFUNC_shift, ZFUNC_mask); + break; + case GL_GEQUAL: + SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_GEQUAL, + ZFUNC_shift, ZFUNC_mask); + break; + case GL_ALWAYS: + SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS, + ZFUNC_shift, ZFUNC_mask); + break; + default: + SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS, + ZFUNC_shift, ZFUNC_mask); + break; + } + } + else + { + CLEARbit(r700->DB_DEPTH_CONTROL.u32All, Z_ENABLE_bit); + CLEARbit(r700->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit); + } +} + +static void r700AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref) +{ +} + + +static void r700BlendColor(GLcontext * ctx, const GLfloat cf[4]) +{ +} + +static void r700BlendEquationSeparate(GLcontext * ctx, + GLenum modeRGB, GLenum modeA) +{ +} + +static void r700BlendFuncSeparate(GLcontext * ctx, + GLenum sfactorRGB, GLenum dfactorRGB, + GLenum sfactorA, GLenum dfactorA) +{ +} + +static void r700UpdateCulling(GLcontext * ctx) +{ + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(R700_CONTEXT(ctx)->chipobj.pvChipObj); + + CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); + CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit); + CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit); + + if (ctx->Polygon.CullFlag) + { + switch (ctx->Polygon.CullFaceMode) + { + case GL_FRONT: + SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit); + CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit); + break; + case GL_BACK: + CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit); + SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit); + break; + case GL_FRONT_AND_BACK: + SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit); + SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit); + break; + default: + CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit); + CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit); + break; + } + } + + switch (ctx->Polygon.FrontFace) + { + case GL_CW: + SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); + break; + case GL_CCW: + CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); + break; + default: + CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); /* default: ccw */ + break; + } +} + +static void r700Enable(GLcontext * ctx, GLenum cap, GLboolean state) +{ + switch (cap) + { + case GL_TEXTURE_1D: + case GL_TEXTURE_2D: + case GL_TEXTURE_3D: + break; + case GL_FOG: + break; + case GL_ALPHA_TEST: + break; + case GL_COLOR_LOGIC_OP: + case GL_BLEND: + break; + case GL_CLIP_PLANE0: + case GL_CLIP_PLANE1: + case GL_CLIP_PLANE2: + case GL_CLIP_PLANE3: + case GL_CLIP_PLANE4: + case GL_CLIP_PLANE5: + break; + case GL_DEPTH_TEST: + r700SetDepthState(ctx); + break; + case GL_STENCIL_TEST: + break; + case GL_CULL_FACE: + r700UpdateCulling(ctx); + break; + case GL_POLYGON_OFFSET_POINT: + case GL_POLYGON_OFFSET_LINE: + case GL_POLYGON_OFFSET_FILL: + break; + default: + break; + } +} + +/** + * Handle glColorMask() + */ +static void r700ColorMask(GLcontext * ctx, + GLboolean r, GLboolean g, GLboolean b, GLboolean a) +{ +} + +/** + * Change the depth testing function. + * + * \note Mesa already filters redundant calls to this function. + */ +static void r700DepthFunc(GLcontext * ctx, GLenum func) +{ + r700SetDepthState(ctx); +} + +/** + * Enable/Disable depth writing. + * + * \note Mesa already filters redundant calls to this function. + */ +static void r700DepthMask(GLcontext * ctx, GLboolean mask) +{ + r700SetDepthState(ctx); +} + +/** + * Change the culling mode. + * + * \note Mesa already filters redundant calls to this function. + */ +static void r700CullFace(GLcontext * ctx, GLenum mode) +{ + r700UpdateCulling(ctx); +} + +/* ============================================================= + * Fog + */ +static void r700Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) +{ +} + +/** + * Change the polygon orientation. + * + * \note Mesa already filters redundant calls to this function. + */ +static void r700FrontFace(GLcontext * ctx, GLenum mode) +{ + r700UpdateCulling(ctx); +} + +static void r700ShadeModel(GLcontext * ctx, GLenum mode) +{ +} + +static void r700PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * param) +{ +} + +static void r700StencilFuncSeparate(GLcontext * ctx, GLenum face, + GLenum func, GLint ref, GLuint mask) +{ +} + + +static void r700StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) +{ +} + +static void r700StencilOpSeparate(GLcontext * ctx, GLenum face, + GLenum fail, GLenum zfail, GLenum zpass) +{ +} + +#define SUBPIXEL_X 0.125 +#define SUBPIXEL_Y 0.125 + +static void r700Viewport(GLcontext * ctx, + GLint x, + GLint y, + GLsizei width, + GLsizei height) +{ + context_t *context = R700_CONTEXT(ctx); + + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + + context->vport_x = x; + context->vport_y = y; + context->vport_width = width; + context->vport_height= height; + + __DRIdrawablePrivate *dPriv = context->currentDraw; + + GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0; + GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0; + + const GLfloat *v = ctx->Viewport._WindowMap.m; + + GLfloat sx, tx, sy, ty, sz, tz; + GLfloat scale; + + switch (ctx->Visual.depthBits) + { + case 16: + scale = 1.0 / (GLfloat) 0xffff; + break; + case 24: + scale = 1.0 / (GLfloat) 0xffffff; + break; + default: + fprintf(stderr, "Error: Unsupported depth %d... exiting\n", + ctx->Visual.depthBits); + _mesa_exit(-1); + } + + sx = v[MAT_SX]; + tx = v[MAT_TX] + xoffset + SUBPIXEL_X; + sy = -v[MAT_SY]; + ty = (-v[MAT_TY]) + yoffset + SUBPIXEL_Y; + sz = v[MAT_SZ] * scale; + tz = v[MAT_TZ] * scale; + + /* TODO : Need DMA flush as well. */ + + if(context->cmdbuf.count_used > 0) + { + (context->chipobj.FlushCmdBuffer)(context); + } + + r700->PA_CL_VPORT_XSCALE.u32All = *((unsigned int*)(&sx)); + r700->PA_CL_VPORT_XOFFSET.u32All = *((unsigned int*)(&tx)); + + r700->PA_CL_VPORT_YSCALE.u32All = *((unsigned int*)(&sy)); + r700->PA_CL_VPORT_YOFFSET.u32All = *((unsigned int*)(&ty)); + + r700->PA_CL_VPORT_ZSCALE.u32All = *((unsigned int*)(&sz)); + r700->PA_CL_VPORT_ZOFFSET.u32All = *((unsigned int*)(&tz)); +} + + +static void r700DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) +{ +} + +static void r700PointSize(GLcontext * ctx, GLfloat size) +{ +} + +static void r700LineWidth(GLcontext * ctx, GLfloat widthf) +{ +} + +static void r700PolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units) +{ +} + + +static void r700PolygonMode(GLcontext * ctx, GLenum face, GLenum mode) +{ +} + +static void r700RenderMode(GLcontext * ctx, GLenum mode) +{ +} + +static void r700ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) +{ +} + +static void r700Scissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h) +{ + if (ctx->Scissor.Enabled) + { + /* We don't pipeline cliprect changes */ + /* r700Flush(ctx); */ + + //__DRIdrawablePrivate *dPriv = radeon->dri.drawable; + //int x1 = dPriv->x + ctx->Scissor.X; + //int y1 = dPriv->y + dPriv->h - (ctx->Scissor.Y + ctx->Scissor.Height); + + //radeon->state.scissor.rect.x1 = x1; + //radeon->state.scissor.rect.y1 = y1; + //radeon->state.scissor.rect.x2 = x1 + ctx->Scissor.Width; + //radeon->state.scissor.rect.y2 = y1 + ctx->Scissor.Height; + /* radeonRecalcScissorRects(radeon); */ + } +} + + +/** + * Calculate initial hardware state and register state functions. + * Assumes that the command buffer and state atoms have been + * initialized already. + */ +void r700InitState(GLcontext * ctx) +{ + context_t *context = R700_CONTEXT(ctx); + + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + + if(context->ctx->Visual.doubleBufferMode && context->sarea->pfCurrentPage == 0) + { + context->target.rt = context->screen->backBuffer; + } + else + { + context->target.rt = context->screen->frontBuffer; + } + + SETfield(r700->CB_TARGET_MASK.u32All, 0xF, TARGET0_ENABLE_shift, TARGET0_ENABLE_mask); + SETfield(r700->CB_SHADER_MASK.u32All, 0xF, OUTPUT0_ENABLE_shift, OUTPUT0_ENABLE_mask); + + /* screen */ + r700->PA_SC_SCREEN_SCISSOR_TL.u32All = 0x0; + SETfield(r700->PA_SC_SCREEN_SCISSOR_BR.u32All, context->screen->width, + PA_SC_SCREEN_SCISSOR_BR__BR_X_shift, PA_SC_SCREEN_SCISSOR_BR__BR_X_mask); + SETfield(r700->PA_SC_SCREEN_SCISSOR_BR.u32All, context->screen->height, + PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift, PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask); + + /* window */ + SETbit(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit); + SETfield(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, context->currentDraw->x, + PA_SC_WINDOW_SCISSOR_TL__TL_X_shift, PA_SC_WINDOW_SCISSOR_TL__TL_X_mask); + SETfield(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, context->currentDraw->y, + PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift, PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask); + + SETfield(r700->PA_SC_WINDOW_SCISSOR_BR.u32All, context->currentDraw->x + context->currentDraw->w, + PA_SC_WINDOW_SCISSOR_BR__BR_X_shift, PA_SC_WINDOW_SCISSOR_BR__BR_X_mask); + SETfield(r700->PA_SC_WINDOW_SCISSOR_BR.u32All, context->currentDraw->y + context->currentDraw->h, + PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift, PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask); + + /* 4 clip rectangles */ /* TODO : set these clip rects according to context->currentDraw->numClipRects */ + r700->PA_SC_CLIPRECT_RULE.u32All = 0x0000FFFF; + + SETfield(r700->PA_SC_CLIPRECT_0_TL.u32All, context->currentDraw->x, + PA_SC_CLIPRECT_0_TL__TL_X_shift, PA_SC_CLIPRECT_0_TL__TL_X_mask); + SETfield(r700->PA_SC_CLIPRECT_0_TL.u32All, context->currentDraw->y, + PA_SC_CLIPRECT_0_TL__TL_Y_shift, PA_SC_CLIPRECT_0_TL__TL_Y_mask); + SETfield(r700->PA_SC_CLIPRECT_0_BR.u32All, context->currentDraw->x + context->currentDraw->w, + PA_SC_CLIPRECT_0_BR__BR_X_shift, PA_SC_CLIPRECT_0_BR__BR_X_mask); + SETfield(r700->PA_SC_CLIPRECT_0_BR.u32All, context->currentDraw->y + context->currentDraw->h, + PA_SC_CLIPRECT_0_BR__BR_Y_shift, PA_SC_CLIPRECT_0_BR__BR_Y_mask); + + r700->PA_SC_CLIPRECT_1_TL.u32All = r700->PA_SC_CLIPRECT_0_TL.u32All; + r700->PA_SC_CLIPRECT_1_BR.u32All = r700->PA_SC_CLIPRECT_0_BR.u32All; + r700->PA_SC_CLIPRECT_2_TL.u32All = r700->PA_SC_CLIPRECT_0_TL.u32All; + r700->PA_SC_CLIPRECT_2_BR.u32All = r700->PA_SC_CLIPRECT_0_BR.u32All; + r700->PA_SC_CLIPRECT_3_TL.u32All = r700->PA_SC_CLIPRECT_0_TL.u32All; + r700->PA_SC_CLIPRECT_3_BR.u32All = r700->PA_SC_CLIPRECT_0_BR.u32All; + + /* more....2d clip */ + SETbit(r700->PA_SC_GENERIC_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit); + SETfield(r700->PA_SC_GENERIC_SCISSOR_TL.u32All, context->currentDraw->x, + PA_SC_GENERIC_SCISSOR_TL__TL_X_shift, PA_SC_GENERIC_SCISSOR_TL__TL_X_mask); + SETfield(r700->PA_SC_GENERIC_SCISSOR_TL.u32All, context->currentDraw->y, + PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift, PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask); + SETfield(r700->PA_SC_GENERIC_SCISSOR_BR.u32All, context->currentDraw->x + context->currentDraw->w, + PA_SC_GENERIC_SCISSOR_BR__BR_X_shift, PA_SC_GENERIC_SCISSOR_BR__BR_X_mask); + SETfield(r700->PA_SC_GENERIC_SCISSOR_BR.u32All, context->currentDraw->y + context->currentDraw->h, + PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift, PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask); + + SETbit(r700->PA_SC_VPORT_SCISSOR_0_TL.u32All, WINDOW_OFFSET_DISABLE_bit); + SETfield(r700->PA_SC_VPORT_SCISSOR_0_TL.u32All, context->currentDraw->x, + PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift, PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask); + SETfield(r700->PA_SC_VPORT_SCISSOR_0_TL.u32All, context->currentDraw->y, + PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift, PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask); + SETfield(r700->PA_SC_VPORT_SCISSOR_0_BR.u32All, context->currentDraw->x + context->currentDraw->w, + PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask); + SETfield(r700->PA_SC_VPORT_SCISSOR_0_BR.u32All, context->currentDraw->y + context->currentDraw->h, + PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask); + + SETbit(r700->PA_SC_VPORT_SCISSOR_1_TL.u32All, WINDOW_OFFSET_DISABLE_bit); + SETfield(r700->PA_SC_VPORT_SCISSOR_1_TL.u32All, context->currentDraw->x, + PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift, PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask); + SETfield(r700->PA_SC_VPORT_SCISSOR_1_TL.u32All, context->currentDraw->y, + PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift, PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask); + SETfield(r700->PA_SC_VPORT_SCISSOR_1_BR.u32All, context->currentDraw->x + context->currentDraw->w, + PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask); + SETfield(r700->PA_SC_VPORT_SCISSOR_1_BR.u32All, context->currentDraw->y + context->currentDraw->h, + PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask); + + /* setup viewport */ + r700Viewport(ctx, + 0, + 0, + context->currentDraw->w, + context->currentDraw->h); + + /* Turn off vgt reuse */ + r700->VGT_REUSE_OFF.u32All = 0; + SETbit(r700->VGT_REUSE_OFF.u32All, REUSE_OFF_bit); + + /* Specify offsetting and clamp values for vertices */ + r700->VGT_MAX_VTX_INDX.u32All = 0xFFFFFF; + r700->VGT_MIN_VTX_INDX.u32All = 0; + r700->VGT_INDX_OFFSET.u32All = 0; + + /* Specify the number of instances */ + r700->VGT_DMA_NUM_INSTANCES.u32All = 1; + + /* not alpha blend */ + CLEARfield(r700->SX_ALPHA_TEST_CONTROL.u32All, ALPHA_FUNC_mask); + CLEARbit(r700->SX_ALPHA_TEST_CONTROL.u32All, ALPHA_TEST_ENABLE_bit); + + /* defualt shader connections. */ + r700->SPI_VS_OUT_ID_0.u32All = 0x03020100; + r700->SPI_VS_OUT_ID_1.u32All = 0x07060504; + + r700->SPI_PS_INPUT_CNTL_0.u32All = 0x00000800; + r700->SPI_PS_INPUT_CNTL_1.u32All = 0x00000801; + r700->SPI_PS_INPUT_CNTL_2.u32All = 0x00000802; + + SETfield(r700->CB_COLOR_CONTROL.u32All, 0xCC, ROP3_shift, ROP3_mask); + CLEARbit(r700->CB_COLOR_CONTROL.u32All, PER_MRT_BLEND_bit); + CLEARfield(r700->CB_BLEND0_CONTROL.u32All, COLOR_SRCBLEND_mask); /* no dst blend */ + CLEARfield(r700->CB_BLEND0_CONTROL.u32All, ALPHA_SRCBLEND_mask); /* no dst blend */ + + r700->DB_SHADER_CONTROL.u32All = 0; + SETbit(r700->DB_SHADER_CONTROL.u32All, DUAL_EXPORT_ENABLE_bit); + + /* Set up the culling control register */ + SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_TRIANGLES, + POLYMODE_FRONT_PTYPE_shift, POLYMODE_FRONT_PTYPE_mask); + SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_TRIANGLES, + POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask); + + /* Do scale XY and Z by 1/W0. */ + context->bEnablePerspective = GL_TRUE; + CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit); + CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit); + SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit); + + /* Enable viewport scaling for all three axis */ + SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_X_SCALE_ENA_bit); + SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_X_OFFSET_ENA_bit); + SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Y_SCALE_ENA_bit); + SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Y_OFFSET_ENA_bit); + SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Z_SCALE_ENA_bit); + SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Z_OFFSET_ENA_bit); + + /* Set up point sizes and min/max values */ + SETfield(r700->PA_SU_POINT_SIZE.u32All, 0x8, + PA_SU_POINT_SIZE__HEIGHT_shift, PA_SU_POINT_SIZE__HEIGHT_mask); + SETfield(r700->PA_SU_POINT_SIZE.u32All, 0x8, + PA_SU_POINT_SIZE__WIDTH_shift, PA_SU_POINT_SIZE__WIDTH_mask); + CLEARfield(r700->PA_SU_POINT_MINMAX.u32All, MIN_SIZE_mask); + SETfield(r700->PA_SU_POINT_MINMAX.u32All, 0x8000, MAX_SIZE_shift, MAX_SIZE_mask); + + /* Set up line control */ + SETfield(r700->PA_SU_LINE_CNTL.u32All, 0x8, + PA_SU_LINE_CNTL__WIDTH_shift, PA_SU_LINE_CNTL__WIDTH_mask); + + r700->PA_SC_LINE_CNTL.u32All = 0; + CLEARbit(r700->PA_SC_LINE_CNTL.u32All, EXPAND_LINE_WIDTH_bit); + SETbit(r700->PA_SC_LINE_CNTL.u32All, LAST_PIXEL_bit); + + /* Set up vertex control */ + r700->PA_SU_VTX_CNTL.u32All = 0; + CLEARfield(r700->PA_SU_VTX_CNTL.u32All, QUANT_MODE_mask); + SETbit(r700->PA_SU_VTX_CNTL.u32All, PIX_CENTER_bit); + SETfield(r700->PA_SU_VTX_CNTL.u32All, X_ROUND_TO_EVEN, + PA_SU_VTX_CNTL__ROUND_MODE_shift, PA_SU_VTX_CNTL__ROUND_MODE_mask); + + /* to 1.0 = no guard band */ + r700->PA_CL_GB_VERT_CLIP_ADJ.u32All = 0x3F800000; /* 1.0 */ + r700->PA_CL_GB_VERT_DISC_ADJ.u32All = 0x3F800000; + r700->PA_CL_GB_HORZ_CLIP_ADJ.u32All = 0x3F800000; + r700->PA_CL_GB_HORZ_DISC_ADJ.u32All = 0x3F800000; + + /* Disble color compares */ + SETfield(r700->CB_CLRCMP_CONTROL.u32All, CLRCMP_DRAW_ALWAYS, + CLRCMP_FCN_SRC_shift, CLRCMP_FCN_SRC_mask); + SETfield(r700->CB_CLRCMP_CONTROL.u32All, CLRCMP_DRAW_ALWAYS, + CLRCMP_FCN_DST_shift, CLRCMP_FCN_DST_mask); + SETfield(r700->CB_CLRCMP_CONTROL.u32All, CLRCMP_SEL_SRC, + CLRCMP_FCN_SEL_shift, CLRCMP_FCN_SEL_mask); + + /* Zero out source */ + r700->CB_CLRCMP_SRC.u32All = 0x00000000; + + /* Put a compare color in for error checking */ + r700->CB_CLRCMP_DST.u32All = 0x000000FF; + + /* Set up color compare mask */ + r700->CB_CLRCMP_MSK.u32All = 0xFFFFFFFF; + + /* Enable all samples for multi-sample anti-aliasing */ + r700->PA_SC_AA_MASK.u32All = 0xFFFFFFFF; + /* Turn off AA */ + r700->PA_SC_AA_CONFIG.u32All = 0; + + SETfield(r700->VGT_OUT_DEALLOC_CNTL.u32All, 16, DEALLOC_DIST_shift, DEALLOC_DIST_mask); + SETfield(r700->VGT_VERTEX_REUSE_BLOCK_CNTL.u32All, 14, VTX_REUSE_DEPTH_shift, VTX_REUSE_DEPTH_mask); + + r700->SX_MISC.u32All = 0; + + /* depth buf */ + r700->DB_DEPTH_SIZE.u32All = 0; + SETfield(r700->DB_DEPTH_SIZE.u32All, (context->screen->depthBuffer.pitch/8)-1, + PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask); + SETfield(r700->DB_DEPTH_SIZE.u32All, ( (context->screen->depthBuffer.size / context->screen->cpp)/64 )-1, + SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); /* size in pixel / 64 - 1 */ + r700->DB_DEPTH_BASE.u32All = context->screen->depthBuffer.gpu >> 8; + r700->DB_DEPTH_INFO.u32All = 0; + if(4 == context->screen->cpp) /* TODO : in scrren create, gives z its own format alloc. */ + { + switch (ctx->Visual.depthBits) + { + case 16: + case 24: + SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_8_24, + DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask); + break; + default: + fprintf(stderr, "Error: Unsupported depth %d... exiting\n", + ctx->Visual.depthBits); + _mesa_exit(-1); + } + } + else + { + SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_16, + DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask); + } + SETfield(r700->DB_DEPTH_INFO.u32All, ARRAY_2D_TILED_THIN1, + DB_DEPTH_INFO__ARRAY_MODE_shift, DB_DEPTH_INFO__ARRAY_MODE_mask); + /* r700->DB_PREFETCH_LIMIT.bits.DEPTH_HEIGHT_TILE_MAX = (context->currentDraw->h >> 3) - 1; */ /* z buffer sie may much bigger than what need, so use actual used h. */ + r700->DB_DEPTH_CONTROL.u32All = 0; + r700->DB_DEPTH_CLEAR.u32All = 0x3F800000; + r700->DB_DEPTH_VIEW.u32All = 0; + r700->DB_RENDER_CONTROL.u32All = 0; + r700->DB_RENDER_OVERRIDE.u32All = 0; + SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask); + SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask); + SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask); + + /* color buffer */ + SETfield(r700->CB_COLOR0_SIZE.u32All, (context->screen->frontBuffer.pitch/8)-1, + PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask); + SETfield(r700->CB_COLOR0_SIZE.u32All, ( (context->screen->frontBuffer.size / context->screen->cpp)/64 )-1, + SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); + r700->CB_COLOR0_BASE.u32All = context->screen->frontBuffer.gpu >> 8; + SETfield(r700->CB_COLOR0_INFO.u32All, ENDIAN_NONE, ENDIAN_shift, ENDIAN_mask); + SETfield(r700->CB_COLOR0_INFO.u32All, ARRAY_LINEAR_GENERAL, + CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); + if(4 == context->screen->cpp) + { + SETfield(r700->CB_COLOR0_INFO.u32All, COLOR_8_8_8_8, + CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask); + SETfield(r700->CB_COLOR0_INFO.u32All, SWAP_ALT, COMP_SWAP_shift, COMP_SWAP_mask); + } + else + { + SETfield(r700->CB_COLOR0_INFO.u32All, COLOR_5_6_5, + CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask); + SETfield(r700->CB_COLOR0_INFO.u32All, SWAP_ALT_REV, + COMP_SWAP_shift, COMP_SWAP_mask); + } + SETbit(r700->CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit); + SETbit(r700->CB_COLOR0_INFO.u32All, BLEND_CLAMP_bit); + SETfield(r700->CB_COLOR0_INFO.u32All, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + r700->CB_COLOR0_VIEW.u32All = 0; + r700->CB_COLOR0_TILE.u32All = 0; + r700->CB_COLOR0_FRAG.u32All = 0; + r700->CB_COLOR0_MASK.u32All = 0; + + r700->PA_SC_VPORT_ZMAX_0.u32All = 0x3F800000; +} + +void r700InitStateFuncs(struct dd_function_table *functions) +{ + functions->UpdateState = r700InvalidateState; + functions->AlphaFunc = r700AlphaFunc; + functions->BlendColor = r700BlendColor; + functions->BlendEquationSeparate = r700BlendEquationSeparate; + functions->BlendFuncSeparate = r700BlendFuncSeparate; + functions->Enable = r700Enable; + functions->ColorMask = r700ColorMask; + functions->DepthFunc = r700DepthFunc; + functions->DepthMask = r700DepthMask; + functions->CullFace = r700CullFace; + functions->Fogfv = r700Fogfv; + functions->FrontFace = r700FrontFace; + functions->ShadeModel = r700ShadeModel; + + /* ARB_point_parameters */ + functions->PointParameterfv = r700PointParameter; + + /* Stencil related */ + functions->StencilFuncSeparate = r700StencilFuncSeparate; + functions->StencilMaskSeparate = r700StencilMaskSeparate; + functions->StencilOpSeparate = r700StencilOpSeparate; + + /* Viewport related */ + functions->Viewport = r700Viewport; + functions->DepthRange = r700DepthRange; + functions->PointSize = r700PointSize; + functions->LineWidth = r700LineWidth; + + functions->PolygonOffset = r700PolygonOffset; + functions->PolygonMode = r700PolygonMode; + + functions->RenderMode = r700RenderMode; + + functions->ClipPlane = r700ClipPlane; + + functions->Scissor = r700Scissor; +} + diff --git a/src/mesa/drivers/dri/r600/r700_state.h b/src/mesa/drivers/dri/r600/r700_state.h new file mode 100644 index 0000000000..ca59b8d119 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_state.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + */ + +#ifndef _R700_STATE_H +#define _R700_STATE_H + +#include "main/mtypes.h" + +#include "r600_common.h" + +#include "r700_chip.h" + +extern void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state); +extern void r700UpdateShaders (GLcontext * ctx); + +extern void r700UpdateViewportOffset(GLcontext * ctx); +extern void r700UpdateDrawBuffer (GLcontext * ctx); + +extern void r700InitState (GLcontext * ctx); +extern void r700InitStateFuncs (struct dd_function_table *functions); + +extern void r700SetDefaultStates(context_t * context); + +#endif /* _R600_SCREEN_H */ diff --git a/src/mesa/drivers/dri/r600/r700_tex.c b/src/mesa/drivers/dri/r600/r700_tex.c new file mode 100644 index 0000000000..2ebb8b7f06 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_tex.c @@ -0,0 +1,1743 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + */ + +#include "glheader.h" +#include "imports.h" +#include "colormac.h" +#include "context.h" +#include "simple_list.h" +#include "texformat.h" +#include "texstore.h" +#include "texmem.h" +#include "teximage.h" +#include "texobj.h" +#include "macros.h" +#include "xmlpool.h" + +#include "r700_interface.h" +#include "r700_chip.h" + +#include "r700_state.h" +#include "r700_tex.h" + +GLuint r700GetTexObjSize(void) +{ + return sizeof(r700TexObj); +} + +static GLboolean r700GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_format) +{ + r700TexObjPtr t = (r700TexObjPtr) tObj->DriverData; + + t->texture_state.SQ_TEX_RESOURCE4.u32All &= ~( SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask + |SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask + |SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask + |SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask ); + + switch (mesa_format) /* This is mesa format. */ + { + case MESA_FORMAT_RGBA8888: + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + + break; + case MESA_FORMAT_RGBA8888_REV: + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + + break; + case MESA_FORMAT_ARGB8888: + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_ARGB8888_REV: + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + + case MESA_FORMAT_RGB888: + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_RGB565: + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_5_6_5, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_RGB565_REV: + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_5_6_5, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_ARGB4444: + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_4_4_4_4, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_ARGB4444_REV: + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_4_4_4_4, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_ARGB1555: + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_1_5_5_5, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_ARGB1555_REV: + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_1_5_5_5, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_AL88: + case MESA_FORMAT_AL88_REV: /* TODO : Check this. */ + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_RGB332: + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_3_3_2, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_A8: /* ZERO, ZERO, ZERO, X */ + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_L8: /* X, X, X, ONE */ + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_I8: /* X, X, X, X */ + case MESA_FORMAT_CI8: + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + /* YUV422 TODO conversion */ /* X, Y, Z, ONE, G8R8_G8B8 */ + /* + case MESA_FORMAT_YCBCR: + t->texture_state.SQ_TEX_RESOURCE1.bitfields.DATA_FORMAT = ; + break; + */ + /* VUY422 TODO conversion */ /* X, Y, Z, ONE, G8R8_G8B8 */ + /* + case MESA_FORMAT_YCBCR_REV: + t->texture_state.SQ_TEX_RESOURCE1.bitfields.DATA_FORMAT = ; + break; + */ + case MESA_FORMAT_RGB_DXT1: /* not supported yet */ + + break; + case MESA_FORMAT_RGBA_DXT1: /* not supported yet */ + + break; + case MESA_FORMAT_RGBA_DXT3: /* not supported yet */ + + break; + case MESA_FORMAT_RGBA_DXT5: /* not supported yet */ + + break; + case MESA_FORMAT_RGBA_FLOAT32: + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_32_32_32_32_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_RGBA_FLOAT16: + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_16_16_16_16_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_RGB_FLOAT32: /* X, Y, Z, ONE */ + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_32_32_32_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_RGB_FLOAT16: + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_16_16_16_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_ALPHA_FLOAT32: /* ZERO, ZERO, ZERO, X */ + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_32_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_ALPHA_FLOAT16: /* ZERO, ZERO, ZERO, X */ + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_16_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_LUMINANCE_FLOAT32: /* X, X, X, ONE */ + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_32_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_LUMINANCE_FLOAT16: /* X, X, X, ONE */ + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_16_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32: + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_32_32_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16: + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_16_16_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */ + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_32_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */ + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_16_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case MESA_FORMAT_Z16: + case MESA_FORMAT_Z24_S8: + case MESA_FORMAT_Z32: + switch (mesa_format) + { + case MESA_FORMAT_Z16: + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_16, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + break; + case MESA_FORMAT_Z24_S8: + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_24_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + break; + case MESA_FORMAT_Z32: + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_32, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + }; + switch (tObj->DepthMode) + { + case GL_LUMINANCE: /* X, X, X, ONE */ + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case GL_INTENSITY: /* X, X, X, X */ + + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + case GL_ALPHA: /* ZERO, ZERO, ZERO, X */ + t->texture_state.SQ_TEX_RESOURCE4.u32All |= + (SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) + |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) + |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) + |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + break; + default: + return GL_FALSE; + } + break; + default: + /* Not supported format */ + return GL_FALSE; + }; + + return GL_TRUE; +} + +static void compute_tex_image_offset( + struct gl_texture_object *tObj, + GLuint face, + GLint level, + GLint* curOffset) +{ + r700TexObjPtr t = (r700TexObjPtr) tObj->DriverData; + const struct gl_texture_image* texImage; + GLuint blitWidth = R700_BLIT_WIDTH_BYTES; + GLuint texelBytes; + GLuint size; + GLuint pitch; + + texImage = tObj->Image[0][level + t->base.firstLevel]; + if (!texImage) + { + return; + } + + texelBytes = texImage->TexFormat->TexelBytes; + + pitch = (texImage->Width + R700_TEXEL_PITCH_ALIGNMENT_MASK) & ~R700_TEXEL_PITCH_ALIGNMENT_MASK; + + /* find image size in bytes */ + if (texImage->IsCompressed) + { + /* not supported yet */ + } + else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) + { + if( (ARRAY_LINEAR_ALIGNED << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift) + == (t->texture_state.SQ_TEX_RESOURCE0.u32All & SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask) ) + { + pitch = (texImage->Width * texelBytes + 255) & ~255; + } + else + { + if(0 == level) + { + pitch = (pitch * texelBytes + 63) & ~63; + } + else + { + pitch = texImage->Width * texelBytes; + } + } + size = pitch * texImage->Height; + blitWidth = 64 / texelBytes; + pitch /= texelBytes; + } + else + { + if( (ARRAY_LINEAR_ALIGNED << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift) + == (t->texture_state.SQ_TEX_RESOURCE0.u32All & SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask) ) + { + pitch = (texImage->Width * texelBytes + 255) & ~255; + } + else + { + if(0 == level) + { + pitch = (pitch * texelBytes + 31) & ~31; + } + else + { + pitch = texImage->Width * texelBytes; + } + } + size = pitch * texImage->Height * texImage->Depth; + blitWidth = MAX2(texImage->Width, 64 / texelBytes); + pitch /= texelBytes; + } + assert(size > 0); + + if( (0 == level) || (1 == level) ) /* 0 for BASE_ADDRESS, 1 for MIP_ADDRESS */ + { + *curOffset = (*curOffset + R700_TEXTURE_ALIGNMENT_MASK) & ~R700_TEXTURE_ALIGNMENT_MASK; + } + + if (texelBytes) + { + /* fix x and y coords up later together with offset */ + t->texel_pitch[face][level] = pitch; + t->level_offset[face][level] = *curOffset; + t->byte_per_texel = texelBytes; + t->src_width_in_pexel[face][level] = texImage->Width; + t->src_hight_in_pexel[face][level] = texImage->Height; + } + else + { + /* Do it like one byte texel. */ + pitch = (size + R700_TEXEL_PITCH_ALIGNMENT_MASK) & ~R700_TEXEL_PITCH_ALIGNMENT_MASK; + t->texel_pitch[face][level] = pitch; + t->level_offset[face][level] = *curOffset; + t->byte_per_texel = 1; + t->src_width_in_pexel[face][level] = size; + t->src_hight_in_pexel[face][level] = 1; + } + + *curOffset += size; +} + +void r700DestroyTexObj(context_t context, r700TexObjPtr t) +{ + /* TODO : nuke r700 chip texture and sampler pointer. */ + //int i; + + //for (i = 0; i < rmesa->ctx->Const.MaxTextureUnits; i++) + //{ + //if (rmesa->state.texture.unit[i].texobj == t) { + // rmesa->state.texture.unit[i].texobj = NULL; + //} + //} +} + +static void r700SetTexImages(context_t *context, struct gl_texture_object *tObj) +{ + r700TexObjPtr t = (r700TexObjPtr) tObj->DriverData; + const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; + GLint curOffset; + GLint i, texelBytes; + GLint numLevels; + GLint log2Width, log2Height, log2Depth; + GLuint uTexelPitch; + + if (!t->image_override) + { + if(GL_FALSE == r700GetTexFormat(tObj, baseImage->TexFormat->MesaFormat) ) + { + _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__); + return; + } + } + + texelBytes = baseImage->TexFormat->TexelBytes; + + switch (tObj->Target) + { + case GL_TEXTURE_1D: + SETfield(t->texture_state.SQ_TEX_RESOURCE0.u32All, SQ_TEX_DIM_1D, DIM_shift, DIM_mask); + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, 0, TEX_DEPTH_shift, TEX_DEPTH_mask); + break; + case GL_TEXTURE_2D: + case GL_TEXTURE_RECTANGLE_NV: + SETfield(t->texture_state.SQ_TEX_RESOURCE0.u32All, SQ_TEX_DIM_2D, DIM_shift, DIM_mask); + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, 0, TEX_DEPTH_shift, TEX_DEPTH_mask); + break; + case GL_TEXTURE_3D: + SETfield(t->texture_state.SQ_TEX_RESOURCE0.u32All, SQ_TEX_DIM_3D, DIM_shift, DIM_mask); + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, tObj->Image[0][t->base.firstLevel]->Depth - 1, + TEX_DEPTH_shift, TEX_DEPTH_mask); + break; + case GL_TEXTURE_CUBE_MAP: + SETfield(t->texture_state.SQ_TEX_RESOURCE0.u32All, SQ_TEX_DIM_CUBEMAP, DIM_shift, DIM_mask); + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, 0, TEX_DEPTH_shift, TEX_DEPTH_mask); + break; + default: + _mesa_problem(NULL, "unexpected texture target type in %s", __FUNCTION__); + return; + } + + /* Compute which mipmap levels we really want to send to the hardware. + */ + driCalculateTextureFirstLastLevel((driTextureObject *) t); + log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2; + log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2; + log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2; + + numLevels = t->base.lastLevel - t->base.firstLevel + 1; + + assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS); + + /* Calculate mipmap offsets and dimensions for blitting (uploading) + * The idea is that we lay out the mipmap levels within a block of + * memory organized as a rectangle of width BLIT_WIDTH_BYTES. + */ + t->tile_bits = 0; + + curOffset = 0; + + if (tObj->Target == GL_TEXTURE_CUBE_MAP) + { + ASSERT(log2Width == log2Height); + + for(i = 0; i < numLevels; i++) + { + /* i is hw level */ + GLuint face; + for(face = 0; face < 6; face++) + { + compute_tex_image_offset(tObj, face, i, &curOffset); + } + } + } + else + { + for (i = 0; i < numLevels; i++) + { + /* i is hw level */ + compute_tex_image_offset(tObj, 0, i, &curOffset); + } + } + + /* Align the total size of texture memory block. + */ + t->base.totalSize = (curOffset + R700_TEXTURE_ALIGNMENT_MASK) & ~R700_TEXTURE_ALIGNMENT_MASK; + + t->pitch = 0; + + /* TODO : baseImage->IsCompressed, tObj->Target == GL_TEXTURE_RECTANGLE_NV */ + + uTexelPitch = (tObj->Image[0][t->base.firstLevel]->Width + R700_TEXEL_PITCH_ALIGNMENT_MASK) + & ~R700_TEXEL_PITCH_ALIGNMENT_MASK; + + SETfield(t->texture_state.SQ_TEX_RESOURCE0.u32All, (uTexelPitch/8)-1, PITCH_shift, PITCH_mask); + SETfield(t->texture_state.SQ_TEX_RESOURCE0.u32All, tObj->Image[0][t->base.firstLevel]->Width - 1, + TEX_WIDTH_shift, TEX_WIDTH_mask); + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, tObj->Image[0][t->base.firstLevel]->Height - 1, + TEX_HEIGHT_shift, TEX_HEIGHT_mask); +} + +static void r700UploadSubImage(context_t *context, + r700TexObjPtr t, + GLint hwlevel, /* relative level to first real level. */ + GLint x, + GLint y, + GLuint face) +{ + struct gl_texture_image *texImage = NULL; + GLuint offset; + GLint imageWidth, imageHeight; + GLint ret; + const int level = hwlevel + t->base.firstLevel; + + unsigned char *pSrc; + + ASSERT(face < 6); + + /* Ensure we have a valid texture to upload */ + if ((hwlevel < 0) || (hwlevel >= RADEON_MAX_TEXTURE_LEVELS)) + { + _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__); + return; + } + + texImage = t->base.tObj->Image[face][level]; + + if (!texImage) + { + return; + } + if (!texImage->Data) + { + return; + } + + if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) + { + /* TODO : + assert(level == 0); + assert(hwlevel == 0); + + r300UploadRectSubImage(rmesa, t, texImage, x, y, width, height); + */ + return; + } + else if (texImage->IsClientData) + { + /* TODO : + r300UploadGARTClientSubImage(rmesa, t, texImage, hwlevel, x, y, + width, height); + */ + return; + } + + imageWidth = texImage->Width; + imageHeight = texImage->Height; + + /* use hwlevel for hwsurf. */ + offset = t->bufAddr + t->level_offset[face][hwlevel]; + + pSrc = (unsigned char*)(texImage->Data); + + (context->chipobj.LoadMemSurf)(context, + offset, /* gpu addr */ + t->texel_pitch[face][hwlevel], /* dst_pitch_in_pixel */ + t->src_width_in_pexel[face][hwlevel], /*src_width_in_pixel */ + t->src_hight_in_pexel[face][hwlevel], /* height */ + t->byte_per_texel, /* byte_per_pixel */ + pSrc); /* source data */ +} + +int r700UploadTexImages(GLcontext * ctx, struct gl_texture_object *tObj, GLuint face) +{ + context_t *context = R700_CONTEXT(ctx); + r700TexObjPtr t = (r700TexObjPtr) tObj->DriverData; + + int heap; + const int numLevels = t->base.lastLevel - t->base.firstLevel + 1; + + if (t->image_override) + { + return 0; + } + + if (t->base.totalSize == 0) + { + return 0; + } + /* TODO */ + /*LOCK_HARDWARE(&rmesa->radeon);*/ + + if (t->base.memBlock == NULL) + { + heap = RADEON_LOCAL_TEX_HEAP; + if( GL_FALSE == (context->chipobj.AllocMemSurf)(context, + &(t->base.memBlock), + &(t->base.heap), + &heap, /* prefered_heap, also return the actual heap used. */ + t->base.totalSize) ) + { + /* TODO */ + /* UNLOCK_HARDWARE(&rmesa->radeon); */ + return -1; + } + + /* Set the base offset of the texture image */ + t->bufAddr = context->screen->texOffset[heap] + t->base.memBlock->ofs; + t->offset = t->bufAddr; + + /* + if (!(t->base.tObj->Image[0][0]->IsClientData)) + { + t->offset |= t->tile_bits; + } + */ + } + + /* Let the world know we've used this memory recently. + */ + driUpdateTextureLRU((driTextureObject *) t); + + /* TODO */ + /* UNLOCK_HARDWARE(&rmesa->radeon); */ + + /* Upload any images that are new */ + if (t->my_dirty_images[face]) + { + int i; + for(i = 0; i < numLevels; i++) + { + if( (t->my_dirty_images[face] & (1 << (i + t->base.firstLevel))) !=0) + { + r700UploadSubImage(context, + t, + i, /* i is hw level */ + 0, + 0, + face); + } + } + t->base.dirty_images[face] = 0; + t->my_dirty_images[face] = 0; + } + + /* TODO : 3D, CUBE */ + t->texture_state.SQ_TEX_RESOURCE2.u32All = t->bufAddr / 256; + if( (t->base.lastLevel - t->base.firstLevel) > 0 ) + { + t->texture_state.SQ_TEX_RESOURCE3.u32All = (t->bufAddr + t->level_offset[0][1]) / 256; /* MIP_ADDRESS */ + + SETfield(t->texture_state.SQ_TEX_RESOURCE4.u32All, t->base.firstLevel, BASE_LEVEL_shift, BASE_LEVEL_mask); + SETfield(t->texture_state.SQ_TEX_RESOURCE5.u32All, t->base.lastLevel, LAST_LEVEL_shift, LAST_LEVEL_mask); + } + + return 0; +} + +static GLboolean r700EnableTexture2D(GLcontext * ctx, int unit) +{ + context_t *context = R700_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; + r700TexObjPtr t = (r700TexObjPtr) tObj->DriverData; + + ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D); + + if (t->base.dirty_images[0]) + { + r700SetTexImages(context, tObj); + r700UploadTexImages(ctx, tObj, 0); + if (!t->base.memBlock && !t->image_override) + { + return GL_FALSE; + } + } + return GL_TRUE; +} + +/* try to find a format which will only need a memcopy */ +static const struct gl_texture_format *r700Choose8888TexFormat(GLenum srcFormat, + GLenum srcType) +{ + struct gl_texture_format * gtfRet; + + const GLuint ui = 1; + const GLubyte littleEndian = *((const GLubyte *)&ui); + + if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) || + (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) || + (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || + (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) + { + gtfRet = &_mesa_texformat_rgba8888; + } + else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || + (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) || + (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) || + (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) + { + gtfRet = &_mesa_texformat_rgba8888_rev; + } + else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && !littleEndian) || + srcType == GL_UNSIGNED_INT_8_8_8_8)) + { + gtfRet = &_mesa_texformat_argb8888_rev; + } + else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && littleEndian) || + srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) + { + gtfRet = &_mesa_texformat_argb8888; + } + else + { + gtfRet = _dri_texformat_argb8888; + } + + return gtfRet; +} + +static const struct gl_texture_format *r700ChooseTextureFormat(GLcontext * ctx, + GLint + internalFormat, + GLenum format, + GLenum type) +{ + struct gl_texture_format * gtfRet = NULL; + + context_t *context = R700_CONTEXT(ctx); + + const GLboolean do32bpt = (context->texture_depth == DRI_CONF_TEXTURE_DEPTH_32); + + const GLboolean force16bpt = (context->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16); + + (void)format; + + switch (internalFormat) + { + case 4: + case GL_RGBA: + case GL_COMPRESSED_RGBA: + switch (type) + { + case GL_UNSIGNED_INT_10_10_10_2: + case GL_UNSIGNED_INT_2_10_10_10_REV: + gtfRet = do32bpt ? _dri_texformat_argb8888 : + _dri_texformat_argb1555; + break; + case GL_UNSIGNED_SHORT_4_4_4_4: + case GL_UNSIGNED_SHORT_4_4_4_4_REV: + gtfRet = _dri_texformat_argb4444; + break; + case GL_UNSIGNED_SHORT_5_5_5_1: + case GL_UNSIGNED_SHORT_1_5_5_5_REV: + gtfRet = _dri_texformat_argb1555; + break; + default: + gtfRet = do32bpt ? r700Choose8888TexFormat(format, type) : + _dri_texformat_argb4444; + break; + } + break; + + case 3: + case GL_RGB: + case GL_COMPRESSED_RGB: + switch (type) + { + case GL_UNSIGNED_SHORT_4_4_4_4: + case GL_UNSIGNED_SHORT_4_4_4_4_REV: + gtfRet = _dri_texformat_argb4444; + break; + case GL_UNSIGNED_SHORT_5_5_5_1: + case GL_UNSIGNED_SHORT_1_5_5_5_REV: + gtfRet = _dri_texformat_argb1555; + break; + case GL_UNSIGNED_SHORT_5_6_5: + case GL_UNSIGNED_SHORT_5_6_5_REV: + gtfRet = _dri_texformat_rgb565; + break; + default: + gtfRet = do32bpt ? _dri_texformat_argb8888 : + _dri_texformat_rgb565; + break; + } + + case GL_RGBA8: + case GL_RGB10_A2: + case GL_RGBA12: + case GL_RGBA16: + gtfRet = !force16bpt ? r700Choose8888TexFormat(format, type) : + _dri_texformat_argb4444; + break; + + case GL_RGBA4: + case GL_RGBA2: + gtfRet = _dri_texformat_argb4444; + break; + + case GL_RGB5_A1: + gtfRet = _dri_texformat_argb1555; + break; + + case GL_RGB8: + case GL_RGB10: + case GL_RGB12: + case GL_RGB16: + gtfRet = !force16bpt ? _dri_texformat_argb8888 : + _dri_texformat_rgb565; + break; + + case GL_RGB5: + case GL_RGB4: + case GL_R3_G3_B2: + gtfRet = _dri_texformat_rgb565; + break; + + case GL_ALPHA: + case GL_ALPHA4: + case GL_ALPHA8: + case GL_ALPHA12: + case GL_ALPHA16: + case GL_COMPRESSED_ALPHA: + gtfRet = _dri_texformat_a8; + break; + + case 1: + case GL_LUMINANCE: + case GL_LUMINANCE4: + case GL_LUMINANCE8: + case GL_LUMINANCE12: + case GL_LUMINANCE16: + case GL_COMPRESSED_LUMINANCE: + gtfRet = _dri_texformat_l8; + break; + + case 2: + case GL_LUMINANCE_ALPHA: + case GL_LUMINANCE4_ALPHA4: + case GL_LUMINANCE6_ALPHA2: + case GL_LUMINANCE8_ALPHA8: + case GL_LUMINANCE12_ALPHA4: + case GL_LUMINANCE12_ALPHA12: + case GL_LUMINANCE16_ALPHA16: + case GL_COMPRESSED_LUMINANCE_ALPHA: + gtfRet = _dri_texformat_al88; + break; + + case GL_INTENSITY: + case GL_INTENSITY4: + case GL_INTENSITY8: + case GL_INTENSITY12: + case GL_INTENSITY16: + case GL_COMPRESSED_INTENSITY: + gtfRet = _dri_texformat_i8; + break; + + case GL_YCBCR_MESA: + if (type == GL_UNSIGNED_SHORT_8_8_APPLE || + type == GL_UNSIGNED_BYTE) + { + gtfRet = &_mesa_texformat_ycbcr; + } + else + { + gtfRet = &_mesa_texformat_ycbcr_rev; + } + break; + + case GL_RGB_S3TC: + case GL_RGB4_S3TC: + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + gtfRet = &_mesa_texformat_rgb_dxt1; + break; + + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + gtfRet = &_mesa_texformat_rgba_dxt1; + break; + + case GL_RGBA_S3TC: + case GL_RGBA4_S3TC: + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + gtfRet = &_mesa_texformat_rgba_dxt3; + break; + + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + gtfRet = &_mesa_texformat_rgba_dxt5; + break; + + case GL_ALPHA16F_ARB: + gtfRet = &_mesa_texformat_alpha_float16; + break; + case GL_ALPHA32F_ARB: + gtfRet = &_mesa_texformat_alpha_float32; + break; + case GL_LUMINANCE16F_ARB: + gtfRet = &_mesa_texformat_luminance_float16; + break; + case GL_LUMINANCE32F_ARB: + gtfRet = &_mesa_texformat_luminance_float32; + break; + case GL_LUMINANCE_ALPHA16F_ARB: + gtfRet = &_mesa_texformat_luminance_alpha_float16; + break; + case GL_LUMINANCE_ALPHA32F_ARB: + gtfRet = &_mesa_texformat_luminance_alpha_float32; + break; + case GL_INTENSITY16F_ARB: + gtfRet = &_mesa_texformat_intensity_float16; + break; + case GL_INTENSITY32F_ARB: + gtfRet = &_mesa_texformat_intensity_float32; + break; + case GL_RGB16F_ARB: + gtfRet = &_mesa_texformat_rgba_float16; + break; + case GL_RGB32F_ARB: + gtfRet = &_mesa_texformat_rgba_float32; + break; + case GL_RGBA16F_ARB: + gtfRet = &_mesa_texformat_rgba_float16; + break; + case GL_RGBA32F_ARB: + gtfRet = &_mesa_texformat_rgba_float32; + break; + + case GL_DEPTH_COMPONENT: + case GL_DEPTH_COMPONENT16: + case GL_DEPTH_COMPONENT24: + case GL_DEPTH_COMPONENT32: + gtfRet = &_mesa_texformat_z16; + break; + default: + _mesa_problem(ctx, + "unexpected internalFormat 0x%x in r700ChooseTextureFormat", + (int)internalFormat); + gtfRet = NULL; + break; + } + + return gtfRet; +} + +static r700TexObjPtr r700AllocTexObj(struct gl_texture_object *texObj) +{ + r700TexObjPtr t; + + t = CALLOC_STRUCT(r700_tex_obj); + texObj->DriverData = t; + if (t != NULL) + { + /* Initialize non-image-dependent parts of the state: + */ + t->base.tObj = texObj; + t->border_fallback = GL_FALSE; + + make_empty_list(&t->base); + + /* Init text object to default states. */ + t->texture_state.SQ_TEX_RESOURCE0.u32All = 0; + SETfield(t->texture_state.SQ_TEX_RESOURCE0.u32All, SQ_TEX_DIM_2D, DIM_shift, DIM_mask); + SETfield(t->texture_state.SQ_TEX_RESOURCE0.u32All, ARRAY_LINEAR_GENERAL, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + CLEARbit(t->texture_state.SQ_TEX_RESOURCE0.u32All, TILE_TYPE_bit); + + t->texture_state.SQ_TEX_RESOURCE1.u32All = 0; + SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + t->texture_state.SQ_TEX_RESOURCE2.u32All = 0; + t->texture_state.SQ_TEX_RESOURCE3.u32All = 0; + + t->texture_state.SQ_TEX_RESOURCE4.u32All = 0; + SETfield(t->texture_state.SQ_TEX_RESOURCE4.u32All, SQ_FORMAT_COMP_UNSIGNED, + FORMAT_COMP_X_shift, FORMAT_COMP_X_mask); + SETfield(t->texture_state.SQ_TEX_RESOURCE4.u32All, SQ_FORMAT_COMP_UNSIGNED, + FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask); + SETfield(t->texture_state.SQ_TEX_RESOURCE4.u32All, SQ_FORMAT_COMP_UNSIGNED, + FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask); + SETfield(t->texture_state.SQ_TEX_RESOURCE4.u32All, SQ_FORMAT_COMP_UNSIGNED, + FORMAT_COMP_W_shift, FORMAT_COMP_W_mask); + SETfield(t->texture_state.SQ_TEX_RESOURCE4.u32All, SQ_NUM_FORMAT_NORM, + SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift, SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask); + CLEARbit(t->texture_state.SQ_TEX_RESOURCE4.u32All, SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit); + CLEARbit(t->texture_state.SQ_TEX_RESOURCE4.u32All, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); + SETfield(t->texture_state.SQ_TEX_RESOURCE4.u32All, SQ_ENDIAN_NONE, + SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift, SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask); + SETfield(t->texture_state.SQ_TEX_RESOURCE4.u32All, 1, REQUEST_SIZE_shift, REQUEST_SIZE_mask); + t->texture_state.SQ_TEX_RESOURCE4.u32All |= SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift + |SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift + |SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift + |SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift; + SETfield(t->texture_state.SQ_TEX_RESOURCE4.u32All, 0, BASE_LEVEL_shift, BASE_LEVEL_mask); /* mip-maps */ + + t->texture_state.SQ_TEX_RESOURCE5.u32All = 0; + + t->texture_state.SQ_TEX_RESOURCE6.u32All = 0; + + SETfield(t->texture_state.SQ_TEX_RESOURCE6.u32All, SQ_TEX_VTX_VALID_TEXTURE, + SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, SQ_TEX_RESOURCE_WORD6_0__TYPE_mask); + + /* Initialize sampler registers */ + t->sampler_state.SQ_TEX_SAMPLER0.u32All = 0; + t->sampler_state.SQ_TEX_SAMPLER0.u32All |= + SQ_TEX_WRAP << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift + |SQ_TEX_WRAP << CLAMP_Y_shift + |SQ_TEX_WRAP << CLAMP_Z_shift + |SQ_TEX_XY_FILTER_POINT << XY_MAG_FILTER_shift + |SQ_TEX_XY_FILTER_POINT << XY_MIN_FILTER_shift + |SQ_TEX_Z_FILTER_NONE << Z_FILTER_shift + |SQ_TEX_Z_FILTER_NONE << MIP_FILTER_shift + |SQ_TEX_BORDER_COLOR_TRANS_BLACK << BORDER_COLOR_TYPE_shift; + + t->sampler_state.SQ_TEX_SAMPLER1.u32All = 0x7FF << MAX_LOD_shift; + + t->sampler_state.SQ_TEX_SAMPLER2.u32All = 0; + SETbit(t->sampler_state.SQ_TEX_SAMPLER2.u32All, SQ_TEX_SAMPLER_WORD2_0__TYPE_bit); + } + + return t; +} + +static GLboolean +r700ValidateClientStorage(GLcontext * ctx, GLenum target, + GLint internalFormat, + GLint srcWidth, GLint srcHeight, + GLenum format, GLenum type, const void *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + if (!ctx->Unpack.ClientStorage) + { + return 0; + } + + if (ctx->_ImageTransferState || + texImage->IsCompressed || texObj->GenerateMipmap) + { + return 0; + } + + /* This list is incomplete, may be different on ppc??? + */ + switch (internalFormat) + { + case GL_RGBA: + if (format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV) + { + texImage->TexFormat = _dri_texformat_argb8888; + } + else + { + return 0; + } + break; + + case GL_RGB: + if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) + { + texImage->TexFormat = _dri_texformat_rgb565; + } + else + { + return 0; + } + break; + + case GL_YCBCR_MESA: + if (format == GL_YCBCR_MESA && + type == GL_UNSIGNED_SHORT_8_8_REV_APPLE) + { + texImage->TexFormat = &_mesa_texformat_ycbcr_rev; + } + else if( format == GL_YCBCR_MESA && + (type == GL_UNSIGNED_SHORT_8_8_APPLE || type == GL_UNSIGNED_BYTE)) + { + texImage->TexFormat = &_mesa_texformat_ycbcr; + } + else + { + return 0; + } + break; + + default: + return 0; + } + + /* Could deal with these packing issues, but currently don't: + */ + if (packing->SkipPixels || + packing->SkipRows || packing->SwapBytes || packing->LsbFirst) + { + return 0; + } + + GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth, format, type); + + /* Have validated that _mesa_transfer_teximage would be a straight + * memcpy at this point. NOTE: future calls to TexSubImage will + * overwrite the client data. This is explicitly mentioned in the + * extension spec. + */ + texImage->Data = (void *)pixels; + texImage->IsClientData = GL_TRUE; + texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes; + + return 1; +} + +static void r700TexImage1D(GLcontext * ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint border, + GLenum format, GLenum type, const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ +} + +static void r700TexImage2D(GLcontext * ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint border, + GLenum format, GLenum type, const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + r700TexObjPtr r700t = (r700TexObjPtr) texObj->DriverData; + + driTextureObject *t = (driTextureObject *) texObj->DriverData; + GLuint face; + + /* which cube face or ordinary 2D image */ + switch (target) + { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; + ASSERT(face < 6); + break; + default: + face = 0; + } + + if (t != NULL) + { + driSwapOutTextureObject(t); + } + else + { + t = (driTextureObject *) r700AllocTexObj(texObj); + if (!t) + { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D"); + return; + } + } + + texImage->IsClientData = GL_FALSE; + + if (r700ValidateClientStorage(ctx, target, + internalFormat, + width, height, + format, type, pixels, + packing, texObj, texImage)) + { + /* client maintained surface */ + } + else + { + /* Normal path: copy (to cached memory) and eventually upload + * via another copy to GART memory and then a blit... Could + * eliminate one copy by going straight to (permanent) GART. + * + * Note, this will call r700ChooseTextureFormat. + */ + _mesa_store_teximage2d(ctx, target, level, internalFormat, + width, height, border, format, type, + pixels, &ctx->Unpack, texObj, texImage); + + t->dirty_images[face] |= (1 << level); + + /* mesa dirty_images is not correct, so use own one for now, review it later. */ + r700t->my_dirty_images[face] |= (1 << level); + } +} + +static void r700TexImage3D(GLcontext * ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint depth, + GLint border, + GLenum format, GLenum type, const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ +} + +static void r700TexSubImage1D(GLcontext * ctx, GLenum target, GLint level, + GLint xoffset, + GLsizei width, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + +} + +static void r700TexSubImage2D(GLcontext * ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLsizei width, GLsizei height, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ +} + +static void r700TexSubImage3D(GLcontext * ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ +} + +/** + * Allocate a new texture object. + * Called via ctx->Driver.NewTextureObject. + * Note: this function will be called during context creation to + * allocate the default texture objects. + * Note: we could use containment here to 'derive' the driver-specific + * texture object from the core mesa gl_texture_object. Not done at this time. + */ +static struct gl_texture_object *r700NewTextureObject(GLcontext * ctx, + GLuint name, + GLenum target) +{ + context_t *context = R700_CONTEXT(ctx); + + struct gl_texture_object *obj; + + obj = _mesa_new_texture_object(ctx, name, target); + if (!obj) + { + return NULL; + } + + //obj->MaxAnisotropy = context->initialMaxAnisotropy; + + r700AllocTexObj(obj); + + return obj; +} + +static void r700BindTexture(GLcontext * ctx, GLenum target, + struct gl_texture_object *texObj) +{ + if ((target == GL_TEXTURE_1D) + || (target == GL_TEXTURE_2D) + || (target == GL_TEXTURE_3D) + || (target == GL_TEXTURE_CUBE_MAP) + || (target == GL_TEXTURE_RECTANGLE_NV)) + { + assert(texObj->DriverData != NULL); + } +} + +static void r700DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) +{ +} + +static void r700SetTexMinFilter(r700TexObjPtr t, GLenum minf) +{ + switch (minf) + { + case GL_NEAREST: + + SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_XYFilter_Point, + XY_MIN_FILTER_shift, XY_MIN_FILTER_mask); + + SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_MipFilter_None, + MIP_FILTER_shift, MIP_FILTER_mask); + break; + case GL_LINEAR: + + SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_XYFilter_Linear, + XY_MIN_FILTER_shift, XY_MIN_FILTER_mask); + + SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_MipFilter_None, + MIP_FILTER_shift, MIP_FILTER_mask); + break; + case GL_NEAREST_MIPMAP_NEAREST: + + SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_XYFilter_Point, + XY_MIN_FILTER_shift, XY_MIN_FILTER_mask); + + SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_MipFilter_Point, + MIP_FILTER_shift, MIP_FILTER_mask); + break; + case GL_LINEAR_MIPMAP_NEAREST: + + SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_XYFilter_Linear, + XY_MIN_FILTER_shift, XY_MIN_FILTER_mask); + + SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_MipFilter_Point, + MIP_FILTER_shift, MIP_FILTER_mask); + break; + case GL_NEAREST_MIPMAP_LINEAR: + + SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_XYFilter_Point, + XY_MIN_FILTER_shift, XY_MIN_FILTER_mask); + + SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_MipFilter_Linear, + MIP_FILTER_shift, MIP_FILTER_mask); + break; + case GL_LINEAR_MIPMAP_LINEAR: + + SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_XYFilter_Linear, + XY_MIN_FILTER_shift, XY_MIN_FILTER_mask); + + SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_MipFilter_Linear, + MIP_FILTER_shift, MIP_FILTER_mask); + break; + default: + /* no case */ + break; + } +} + +static void r700SetTexMagFilter(r700TexObjPtr t, GLenum magf) +{ + switch(magf) + { + case GL_NEAREST: + + SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_XYFilter_Point, + XY_MAG_FILTER_shift, XY_MAG_FILTER_mask); + + break; + case GL_LINEAR: + + SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_XYFilter_Linear, + XY_MAG_FILTER_shift, XY_MAG_FILTER_mask); + + break; + default: + break; + } +} + +static unsigned int r700GetWrapMode(GLenum wrapmode) +{ + switch(wrapmode) + { + case GL_REPEAT: + return SQ_TEX_WRAP; + case GL_CLAMP: + return SQ_TEX_CLAMP_HALF_BORDER; + case GL_CLAMP_TO_EDGE: + return SQ_TEX_CLAMP_LAST_TEXEL; + case GL_CLAMP_TO_BORDER: + return SQ_TEX_CLAMP_BORDER; + case GL_MIRRORED_REPEAT: + return SQ_TEX_MIRROR_ONCE_HALF_BORDER; + case GL_MIRROR_CLAMP_EXT: + return SQ_TEX_MIRROR; + case GL_MIRROR_CLAMP_TO_EDGE_EXT: + return SQ_TEX_MIRROR_ONCE_BORDER; + case GL_MIRROR_CLAMP_TO_BORDER_EXT: + return SQ_TEX_MIRROR_ONCE_LAST_TEXEL; + default: + _mesa_problem(NULL, "bad wrap mode in %s", __FUNCTION__); + return 0; + } +} + +static void r700TexParameter(GLcontext * ctx, GLenum target, + struct gl_texture_object *texObj, + GLenum pname, const GLfloat * params) +{ + r700TexObjPtr t = (r700TexObjPtr) texObj->DriverData; + + switch (pname) + { + case GL_TEXTURE_MIN_FILTER: + r700SetTexMinFilter(t, texObj->MinFilter); + break; + case GL_TEXTURE_MAG_FILTER: + r700SetTexMagFilter(t, texObj->MagFilter); + break; + case GL_TEXTURE_MAX_ANISOTROPY_EXT: + + r700SetTexMinFilter(t, texObj->MinFilter); + r700SetTexMagFilter(t, texObj->MagFilter); + + break; + + case GL_TEXTURE_WRAP_S: + SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, r700GetWrapMode(texObj->WrapS), + SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift, SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask); + break; + case GL_TEXTURE_WRAP_T: + SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, r700GetWrapMode(texObj->WrapT), + CLAMP_Y_shift, CLAMP_Y_mask); + break; + case GL_TEXTURE_WRAP_R: + SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, r700GetWrapMode(texObj->WrapR), + CLAMP_Z_shift, CLAMP_Z_mask); + break; + + case GL_TEXTURE_BORDER_COLOR: + /* TODO : set border color regs before rendering. */ + SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, SQ_TEX_BORDER_COLOR_REGISTER, + BORDER_COLOR_TYPE_shift, BORDER_COLOR_TYPE_mask); + break; + + case GL_TEXTURE_BASE_LEVEL: + case GL_TEXTURE_MAX_LEVEL: + case GL_TEXTURE_MIN_LOD: + case GL_TEXTURE_MAX_LOD: + /* TODO : we do support this, add it later. */ + driSwapOutTextureObject((driTextureObject *) t); + break; + + case GL_DEPTH_TEXTURE_MODE: + if (!texObj->Image[0][texObj->BaseLevel]) + { + return; + } + if (texObj->Image[0][texObj->BaseLevel]->TexFormat->BaseFormat + == GL_DEPTH_COMPONENT) + { + /* TODO : r700SetDepthTexMode(texObj); */ + break; + } + else + { + /* If not depth texture, just return. */ + return; + } + + default: + return; + } +} + +static void r700CompressedTexImage2D(GLcontext * ctx, GLenum target, + GLint level, GLint internalFormat, + GLint width, GLint height, GLint border, + GLsizei imageSize, const GLvoid * data, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ +} + +static void r700CompressedTexSubImage2D(GLcontext * ctx, GLenum target, + GLint level, GLint xoffset, + GLint yoffset, GLsizei width, + GLsizei height, GLenum format, + GLsizei imageSize, const GLvoid * data, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ +} + +void r700SetTexOffset(__DRIcontext * pDRICtx, GLint texname, + unsigned long long offset, GLint depth, GLuint pitch) +{ +} + +static GLboolean r700UpdateTextureUnit(GLcontext * ctx, int unit) +{ + return GL_TRUE; +} + +static GLboolean r700EnableTextureRect(GLcontext * ctx, int unit) +{ + return GL_TRUE; +} + +static GLboolean r700EnableTexture3D(GLcontext * ctx, int unit) +{ + return GL_TRUE; +} + +static GLboolean r700EnableTextureCube(GLcontext * ctx, int unit) +{ + return GL_TRUE; +} + +static GLboolean r700UpdateTexture(GLcontext * ctx, int unit) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; + r700TexObjPtr t = (r700TexObjPtr) tObj->DriverData; + + if( r700->texture_states.textures[unit] != &(t->texture_state) ) + { + if(NULL != r700->texture_states.textures[unit]) + { /* there is an old one. */ + } + + r700->texture_states.textures[unit] = &(t->texture_state); + r700->texture_states.samplers[unit] = &(t->sampler_state); + driUpdateTextureLRU((driTextureObject *) t); /* XXX: should be locked! */ + } + + return GL_TRUE; +} + +void r700UpdateTextureState(context_t * context) +{ + GLboolean bRet; + GLuint unit; + GLcontext * ctx = context->ctx; + struct gl_texture_unit *texUnit; + + for (unit = 0; unit < 8; unit++) + { + texUnit = &ctx->Texture.Unit[unit]; + + if (texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT)) + { + bRet = (r700EnableTextureRect(ctx, unit) && + r700UpdateTexture(ctx, unit)); + } + else if (texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) + { + bRet = (r700EnableTexture2D(ctx, unit) && + r700UpdateTexture(ctx, unit)); + } + else if (texUnit->_ReallyEnabled & (TEXTURE_3D_BIT)) + { + bRet = (r700EnableTexture3D(ctx, unit) && + r700UpdateTexture(ctx, unit)); + } + else if (texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT)) + { + bRet = (r700EnableTextureCube(ctx, unit) && + r700UpdateTexture(ctx, unit)); + } + else if (texUnit->_ReallyEnabled) + { + bRet = GL_FALSE; + } + else + { + bRet = GL_TRUE; + } + + if (!bRet) + { + _mesa_warning(ctx, "failed to update texture state for unit %d.\n", unit); + } + } +} + +void r700InitTextureFuncs(struct dd_function_table *functions) +{ + /* Note: we only plug in the functions we implement in the driver + * since _mesa_init_driver_functions() was already called. + */ + functions->ChooseTextureFormat = r700ChooseTextureFormat; + functions->TexImage1D = r700TexImage1D; + functions->TexImage2D = r700TexImage2D; + functions->TexImage3D = r700TexImage3D; + functions->TexSubImage1D = r700TexSubImage1D; + functions->TexSubImage2D = r700TexSubImage2D; + functions->TexSubImage3D = r700TexSubImage3D; + functions->NewTextureObject = r700NewTextureObject; + functions->BindTexture = r700BindTexture; + functions->DeleteTexture = r700DeleteTexture; + functions->IsTextureResident = driIsTextureResident; + + functions->TexParameter = r700TexParameter; + + functions->CompressedTexImage2D = r700CompressedTexImage2D; + functions->CompressedTexSubImage2D = r700CompressedTexSubImage2D; + + driInitTextureFormats(); +} + + diff --git a/src/mesa/drivers/dri/r600/r700_tex.h b/src/mesa/drivers/dri/r600/r700_tex.h new file mode 100644 index 0000000000..2350029530 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_tex.h @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + */ + +#ifndef __r700_TEX_H__ +#define __r700_TEX_H__ + +#include "texmem.h" + +#include "r700_chip.h" + +/* TODO : review this after texture load code. */ +#define R700_BLIT_WIDTH_BYTES 1024 +/* The BASE_ADDRESS and MIP_ADDRESS fields are 256-byte-aligned */ +#define R700_TEXTURE_ALIGNMENT_MASK 0x255 +/* Texel pitch is 8 alignment. */ +#define R700_TEXEL_PITCH_ALIGNMENT_MASK 0x7 + +#define R700_MAX_TEXTURE_UNITS 8 /* TODO : should be 16, lets make it work, review later */ + +typedef struct r700_tex_obj r700TexObj, *r700TexObjPtr; + +/* Texture object in locally shared texture space. + */ +struct r700_tex_obj +{ + driTextureObject base; + + /* r300 tex obj */ + GLuint bufAddr; + GLboolean image_override; + GLuint pitch; + GLuint filter; + GLuint filter_1; + GLuint pitch_reg; + GLuint size; + GLuint format; + GLuint offset; + GLuint unknown4; + GLuint unknown5; + GLboolean border_fallback; + GLuint tile_bits; + + /* r700 texture states */ + TEXTURE_STATE_STRUCT texture_state; + SAMPLER_STATE_STRUCT sampler_state; + + GLuint texel_pitch[6][RADEON_MAX_TEXTURE_LEVELS]; + GLuint level_offset[6][RADEON_MAX_TEXTURE_LEVELS]; + GLuint byte_per_texel; + GLuint src_width_in_pexel[6][RADEON_MAX_TEXTURE_LEVELS]; + GLuint src_hight_in_pexel[6][RADEON_MAX_TEXTURE_LEVELS]; + + GLuint my_dirty_images[6]; /* TODO : review */ +}; + +extern GLuint r700GetTexObjSize(void); +extern void r700UpdateTextureState(context_t * context); + +extern void r700SetTexOffset(__DRIcontext *pDRICtx, + GLint texname, + unsigned long long offset, + GLint depth, + GLuint pitch); + +extern void r700DestroyTexObj(context_t rmesa, r700TexObjPtr t); + +extern void r700InitTextureFuncs(struct dd_function_table *functions); + +#endif /* __r700_TEX_H__ */ diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c new file mode 100644 index 0000000000..bfcfce5950 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -0,0 +1,459 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + */ + + +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> + +#include "main/imports.h" +#include "main/mtypes.h" + +#include "tnl/t_context.h" +#include "shader/prog_parameter.h" +#include "shader/prog_statevars.h" + +#include "r700_interface.h" + +#include "r700_chip.h" +#include "r700_debug.h" +#include "r700_vertprog.h" +#include "r700_emit.h" + +unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm, + struct gl_vertex_program *mesa_vp, + unsigned int unStart) +{ + unsigned int i; + unsigned int unBit; + unsigned int unTotal = unStart; + + //!!!!!!! THE ORDER MATCH FS INPUT + + unBit = 1 << VERT_RESULT_HPOS; + if(mesa_vp->Base.OutputsWritten & unBit) + { + pAsm->ucVP_OutputMap[VERT_RESULT_HPOS] = unTotal++; + } + + unBit = 1 << VERT_RESULT_COL0; + if(mesa_vp->Base.OutputsWritten & unBit) + { + pAsm->ucVP_OutputMap[VERT_RESULT_COL0] = unTotal++; + } + + unBit = 1 << VERT_RESULT_COL1; + if(mesa_vp->Base.OutputsWritten & unBit) + { + pAsm->ucVP_OutputMap[VERT_RESULT_COL1] = unTotal++; + } + + //TODO : dealing back face. + //unBit = 1 << VERT_RESULT_BFC0; + //if(mesa_vp->Base.OutputsWritten & unBit) + //{ + // pAsm->ucVP_OutputMap[VERT_RESULT_COL0] = unTotal++; + //} + + //unBit = 1 << VERT_RESULT_BFC1; + //if(mesa_vp->Base.OutputsWritten & unBit) + //{ + // pAsm->ucVP_OutputMap[VERT_RESULT_COL1] = unTotal++; + //} + + //TODO : dealing fog. + //unBit = 1 << VERT_RESULT_FOGC; + //if(mesa_vp->Base.OutputsWritten & unBit) + //{ + // pAsm->ucVP_OutputMap[VERT_RESULT_FOGC] = unTotal++; + //} + + //TODO : dealing point size. + //unBit = 1 << VERT_RESULT_PSIZ; + //if(mesa_vp->Base.OutputsWritten & unBit) + //{ + // pAsm->ucVP_OutputMap[VERT_RESULT_PSIZ] = unTotal++; + //} + + for(i=0; i<8; i++) + { + unBit = 1 << (VERT_RESULT_TEX0 + i); + if(mesa_vp->Base.OutputsWritten & unBit) + { + pAsm->ucVP_OutputMap[VERT_RESULT_TEX0 + i] = unTotal++; + } + } + + return (unTotal - unStart); +} + +unsigned int Map_Vertex_Input(r700_AssemblerBase *pAsm, + struct gl_vertex_program *mesa_vp, + unsigned int unStart) +{ + int i; + unsigned int unBit; + unsigned int unTotal = unStart; + for(i=0; i<VERT_ATTRIB_MAX; i++) + { + unBit = 1 << i; + if(mesa_vp->Base.InputsRead & unBit) + { + pAsm->ucVP_AttributeMap[i] = unTotal++; + } + } + return (unTotal - unStart); +} + +GLboolean Process_Vertex_Program_Vfetch_Instructions( + struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp) +{ + int i; + unsigned int unBit; + VTX_FETCH_METHOD vtxFetchMethod; + vtxFetchMethod.bEnableMini = GL_FALSE; + vtxFetchMethod.mega_fetch_remainder = 0; + + for(i=0; i<VERT_ATTRIB_MAX; i++) + { + unBit = 1 << i; + if(mesa_vp->Base.InputsRead & unBit) + { + assemble_vfetch_instruction(&vp->r700AsmCode, + i, + vp->r700AsmCode.ucVP_AttributeMap[i], + vp->aos_desc[i].size, + vp->aos_desc[i].type, + &vtxFetchMethod); + } + } + + return GL_TRUE; +} + +void Map_Vertex_Program(struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp) +{ + GLuint ui; + r700_AssemblerBase *pAsm = &(vp->r700AsmCode); + unsigned int num_inputs; + + // R0 will always be used for index into vertex buffer + pAsm->number_used_registers = 1; + pAsm->starting_vfetch_register_number = pAsm->number_used_registers; + + // Map Inputs: Add 1 to mapping since R0 is used for index + num_inputs = Map_Vertex_Input(pAsm, mesa_vp, pAsm->number_used_registers); + pAsm->number_used_registers += num_inputs; + + // Create VFETCH instructions for inputs + if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) ) + { + r700_error(ERROR_ASM_VTX_CLAUSE, "Calling Process_Vertex_Program_Vfetch_Instructions return error. \n"); + return; //error + } + + // Map Outputs + pAsm->number_of_exports = Map_Vertex_Output(pAsm, mesa_vp, pAsm->number_used_registers); + + pAsm->starting_export_register_number = pAsm->number_used_registers; + + pAsm->number_used_registers += pAsm->number_of_exports; + + pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports); + + for(ui=0; ui<pAsm->number_of_exports; ui++) + { + pAsm->pucOutMask[ui] = 0x0; + } + + /* Map temporary registers (GPRs) */ + pAsm->starting_temp_register_number = pAsm->number_used_registers; + + if(mesa_vp->Base.NumNativeTemporaries >= mesa_vp->Base.NumTemporaries) + { /* arb uses NumNativeTemporaries */ + pAsm->number_used_registers += mesa_vp->Base.NumNativeTemporaries; + } + else + { /* fix func t_vp uses NumTemporaries */ + pAsm->number_used_registers += mesa_vp->Base.NumTemporaries; + } + + pAsm->uFirstHelpReg = pAsm->number_used_registers; +} + +GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp) +{ + GLuint i, j; + GLint * puiTEMPwrites; + struct prog_instruction *pILInst; + InstDeps *pInstDeps; + + puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_vp->Base.NumTemporaries); + for(i=0; i<mesa_vp->Base.NumTemporaries; i++) + { + puiTEMPwrites[i] = -1; + } + + pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_vp->Base.NumInstructions); + + for(i=0; i<mesa_vp->Base.NumInstructions; i++) + { + pInstDeps[i].nDstDep = -1; + pILInst = &(mesa_vp->Base.Instructions[i]); + + //Dst + if(pILInst->DstReg.File == PROGRAM_TEMPORARY) + { + //Set lastwrite for the temp + puiTEMPwrites[pILInst->DstReg.Index] = i; + } + + //Src + for(j=0; j<3; j++) + { + if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY) + { + //Set dep. + pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index]; + } + else + { + pInstDeps[i].nSrcDeps[j] = -1; + } + } + } + + vp->r700AsmCode.pInstDeps = pInstDeps; + + FREE(puiTEMPwrites); + + return GL_TRUE; +} + +GLboolean r700TranslateVertexShader(struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp) +{ + //Init_Program + Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) ); + Map_Vertex_Program( vp, mesa_vp ); + + if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, mesa_vp)) + { + return GL_FALSE; + } + + if(GL_FALSE == AssembleInstr(mesa_vp->Base.NumInstructions, + &(mesa_vp->Base.Instructions[0]), + &(vp->r700AsmCode)) ) + { + return GL_FALSE; + } + + if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), mesa_vp->Base.OutputsWritten) ) + { + return GL_FALSE; + } + + vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0 + : (vp->r700AsmCode.number_used_registers - 1); + + vp->r700Shader.nParamExports = vp->r700AsmCode.number_of_exports; + + vp->translated = GL_TRUE; + + return GL_TRUE; +} + +void r700SelectVertexShader(GLcontext *ctx) +{ + context_t *context = R700_CONTEXT(ctx); + struct r700_vertex_program *vpc + = (struct r700_vertex_program *)ctx->VertexProgram._Current; + if (context->screen->chip.type <= CHIP_TYPE_RV670) + { + vpc->r700AsmCode.bR6xx = 1; + } + + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + + unsigned int unBit; + unsigned int i; + for(i=0; i<VERT_ATTRIB_MAX; i++) + { + unBit = 1 << i; + if(vpc->mesa_program.Base.InputsRead & unBit) /* ctx->Array.ArrayObj->xxxxxxx */ + { + vpc->aos_desc[i].size = vb->AttribPtr[i]->size; + vpc->aos_desc[i].stride = vb->AttribPtr[i]->size * sizeof(GL_FLOAT);/* when emit array, data is packed. vb->AttribPtr[i]->stride;*/ + vpc->aos_desc[i].type = GL_FLOAT; + } + } + + if(GL_FALSE == vpc->translated) + { + r700TranslateVertexShader(vpc, + &(vpc->mesa_program) ); + } +} + +void r700SetupVTXConstans(GLcontext * ctx, + unsigned int nStreamID, + unsigned int aos_offset, + unsigned int size, /* number of elements in vector */ + unsigned int stride, + unsigned int count) /* number of vectors in stream */ +{ + context_t *context = R700_CONTEXT(ctx); + uint32_t *dest; + + unsigned int uSQ_VTX_CONSTANT_WORD0_0; + unsigned int uSQ_VTX_CONSTANT_WORD1_0; + unsigned int uSQ_VTX_CONSTANT_WORD2_0 = 0; + unsigned int uSQ_VTX_CONSTANT_WORD3_0 = 0; + unsigned int uSQ_VTX_CONSTANT_WORD6_0 = 0; + + uSQ_VTX_CONSTANT_WORD0_0 = aos_offset; + uSQ_VTX_CONSTANT_WORD1_0 = count * stride - 1; + + uSQ_VTX_CONSTANT_WORD2_0 |= 0 << BASE_ADDRESS_HI_shift /* TODO */ + |stride << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift + |GetSurfaceFormat(GL_FLOAT, size, NULL) << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift /* TODO : trace back api for initial data type, not only GL_FLOAT */ + |SQ_NUM_FORMAT_SCALED << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift + |SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit; + + uSQ_VTX_CONSTANT_WORD3_0 |= 1 << MEM_REQUEST_SIZE_shift; + + uSQ_VTX_CONSTANT_WORD6_0 |= SQ_TEX_VTX_VALID_BUFFER << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift; + + R700_CMDBUF_CHECK_SPACE(9); + R700EP3 (context, IT_SET_RESOURCE, 7); + R700E32 (context, (nStreamID + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); + + R700E32 (context, uSQ_VTX_CONSTANT_WORD0_0); + R700E32 (context, uSQ_VTX_CONSTANT_WORD1_0); + R700E32 (context, uSQ_VTX_CONSTANT_WORD2_0); + R700E32 (context, uSQ_VTX_CONSTANT_WORD3_0); + R700E32 (context, 0); + R700E32 (context, 0); + R700E32 (context, uSQ_VTX_CONSTANT_WORD6_0); +} + +GLboolean r700SetupVertexProgram(GLcontext * ctx) +{ + context_t *context = R700_CONTEXT(ctx); + + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + + struct r700_vertex_program *vp + = (struct r700_vertex_program *)ctx->VertexProgram._Current; + + struct gl_program_parameter_list *paramList; + unsigned int unNumParamData; + + unsigned int ui; + + if(GL_FALSE == vp->loaded) + { + if(vp->r700Shader.bNeedsAssembly == GL_TRUE) + { + Assemble( &(vp->r700Shader) ); + } + + /* Load vp to gpu */ + (context->chipobj.EmitShader)(ctx, + &(vp->shadercode), + (GLvoid *)(vp->r700Shader.pProgram), + vp->r700Shader.uShaderBinaryDWORDSize); + + vp->loaded = GL_TRUE; + } + + DumpHwBinary(DUMP_VERTEX_SHADER, (GLvoid *)(vp->r700Shader.pProgram), + vp->r700Shader.uShaderBinaryDWORDSize); + + /* TODO : enable this after MemUse fixed *= + (context->chipobj.MemUse)(context, vp->shadercode.buf->id); + */ + + r700->SQ_PGM_START_VS.u32All = (vp->shadercode.aos_offset >> 8) & 0x00FFFFFF; + + SETfield(r700->SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1, + NUM_GPRS_shift, NUM_GPRS_mask); + + if(vp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */ + { + SETfield(r700->SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.uStackSize, + STACK_SIZE_shift, STACK_SIZE_mask); + } + + SETfield(r700->SPI_VS_OUT_CONFIG.u32All, vp->r700Shader.nParamExports - 1, + VS_EXPORT_COUNT_shift, VS_EXPORT_COUNT_mask); + SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, vp->r700Shader.nParamExports, + NUM_INTERP_shift, NUM_INTERP_mask); + + /* + SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit); + CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit); + */ + + /* sent out shader constants. */ + + paramList = vp->mesa_program.Base.Parameters; + + if(NULL != paramList) + { + _mesa_load_state_parameters(ctx, paramList); + + unNumParamData = paramList->NumParameters * 4; + + R700_CMDBUF_CHECK_SPACE(unNumParamData + 2); + R700EP3 (context, IT_SET_ALU_CONST, unNumParamData); + /* assembler map const from very beginning. */ + R700E32 (context, SQ_ALU_CONSTANT_VS_OFFSET * 4); + + unNumParamData = paramList->NumParameters; + + for(ui=0; ui<unNumParamData; ui++) + { + R700E32 (context, *((unsigned int*)&(paramList->ParameterValues[ui][0]))); + R700E32 (context, *((unsigned int*)&(paramList->ParameterValues[ui][1]))); + R700E32 (context, *((unsigned int*)&(paramList->ParameterValues[ui][2]))); + R700E32 (context, *((unsigned int*)&(paramList->ParameterValues[ui][3]))); + } + } + + return GL_TRUE; +} + + + + diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.h b/src/mesa/drivers/dri/r600/r700_vertprog.h new file mode 100644 index 0000000000..0822335c10 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_vertprog.h @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + */ + + +#ifndef _R700_VERTPROG_H_ +#define _R700_VERTPROG_H_ + +#include "main/glheader.h" +#include "main/mtypes.h" + +#include "r700_shader.h" +#include "r700_assembler.h" + +typedef struct ArrayDesc //TEMP +{ + GLint size; //number of data element + GLenum type; //data element type + GLsizei stride; +} ArrayDesc; + +struct r700_vertex_program +{ + struct gl_vertex_program mesa_program; /* Must be first */ + + struct r700_vertex_program *next; + + r700_AssemblerBase r700AsmCode; + R700_Shader r700Shader; + + GLboolean translated; + GLboolean loaded; + + /* ... */ + + struct r600_dma_region shadercode; + ArrayDesc aos_desc[VERT_ATTRIB_MAX]; +}; + +//Internal +unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm, + struct gl_vertex_program *mesa_vp, + unsigned int unStart); +unsigned int Map_Vertex_Input(r700_AssemblerBase *pAsm, + struct gl_vertex_program *mesa_vp, + unsigned int unStart); +GLboolean Process_Vertex_Program_Vfetch_Instructions( + struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp); +void Map_Vertex_Program(struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp); +GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp); + +/* Interface */ +extern GLboolean r700TranslateVertexShader(struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp); + +extern void r700SelectVertexShader(GLcontext *ctx); + +extern GLboolean r700SetupVertexProgram(GLcontext * ctx); + +extern void r700SetupVTXConstans(GLcontext * ctx, + unsigned int nStreamID, + unsigned int aos_offset, + unsigned int size, /* number of elements in vector */ + unsigned int stride, + unsigned int Count); /* number of vectors in stream */ + +#endif /* _R700_VERTPROG_H_ */ diff --git a/src/mesa/drivers/dri/r600/sq_micro_reg.h b/src/mesa/drivers/dri/r600/sq_micro_reg.h new file mode 100644 index 0000000000..bfd21cef62 --- /dev/null +++ b/src/mesa/drivers/dri/r600/sq_micro_reg.h @@ -0,0 +1,2008 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Contacts: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + */ + +#if !defined (_SQ_MICRO_REG_H) +#define _SQ_MICRO_REG_H + +#if defined(LITTLEENDIAN_CPU) +#elif defined(BIGENDIAN_CPU) +#else +#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined" +#endif + +/* + * SQ_ALU_SRC_GPR_BASE value + */ + +#define SQ_ALU_SRC_GPR_BASE 0x00000000 + +/* + * SQ_ALU_SRC_GPR_SIZE value + */ + +#define SQ_ALU_SRC_GPR_SIZE 0x00000080 + +/* + * SQ_ALU_SRC_KCACHE0_BASE value + */ + +#define SQ_ALU_SRC_KCACHE0_BASE 0x00000080 + +/* + * SQ_ALU_SRC_KCACHE0_SIZE value + */ + +#define SQ_ALU_SRC_KCACHE0_SIZE 0x00000020 + +/* + * SQ_ALU_SRC_KCACHE1_BASE value + */ + +#define SQ_ALU_SRC_KCACHE1_BASE 0x000000a0 + +/* + * SQ_ALU_SRC_KCACHE1_SIZE value + */ + +#define SQ_ALU_SRC_KCACHE1_SIZE 0x00000020 + +/* + * SQ_ALU_SRC_CFILE_BASE value + */ + +#define SQ_ALU_SRC_CFILE_BASE 0x00000100 + +/* + * SQ_ALU_SRC_CFILE_SIZE value + */ + +#define SQ_ALU_SRC_CFILE_SIZE 0x00000100 + +/* + * SQ_SP_OP_REDUC_BEGIN value + */ + +#define SQ_SP_OP_REDUC_BEGIN 0x00000050 + +/* + * SQ_SP_OP_REDUC_END value + */ + +#define SQ_SP_OP_REDUC_END 0x00000053 + +/* + * SQ_SP_OP_TRANS_BEGIN value + */ + +#define SQ_SP_OP_TRANS_BEGIN 0x00000060 + +/* + * SQ_SP_OP_TRANS_END value + */ + +#define SQ_SP_OP_TRANS_END 0x0000007f + +/* + * SQ_CF_WORD0 struct + */ + +#define SQ_CF_WORD0_ADDR_SIZE 32 + +#define SQ_CF_WORD0_ADDR_SHIFT 0 + +#define SQ_CF_WORD0_ADDR_MASK 0xffffffff + +#define SQ_CF_WORD0_MASK \ + (SQ_CF_WORD0_ADDR_MASK) + +#define SQ_CF_WORD0_DEFAULT 0xcdcdcdcd + +#define SQ_CF_WORD0_GET_ADDR(sq_cf_word0) \ + ((sq_cf_word0 & SQ_CF_WORD0_ADDR_MASK) >> SQ_CF_WORD0_ADDR_SHIFT) + +#define SQ_CF_WORD0_SET_ADDR(sq_cf_word0_reg, addr) \ + sq_cf_word0_reg = (sq_cf_word0_reg & ~SQ_CF_WORD0_ADDR_MASK) | (addr << SQ_CF_WORD0_ADDR_SHIFT) + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _sq_cf_word0_t { + unsigned int addr : SQ_CF_WORD0_ADDR_SIZE; + } sq_cf_word0_t; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _sq_cf_word0_t { + unsigned int addr : SQ_CF_WORD0_ADDR_SIZE; + } sq_cf_word0_t; + +#endif + +typedef union { + unsigned int val : 32; + sq_cf_word0_t f; +} sq_cf_word0_u; + + +/* + * SQ_CF_WORD1 struct + */ + +#define SQ_CF_WORD1_POP_COUNT_SIZE 3 +#define SQ_CF_WORD1_CF_CONST_SIZE 5 +#define SQ_CF_WORD1_COND_SIZE 2 +#define SQ_CF_WORD1_COUNT_SIZE 3 +#define SQ_CF_WORD1_CALL_COUNT_SIZE 6 +#define SQ_CF_WORD1_COUNT_3_SIZE 1 +#define SQ_CF_WORD1_END_OF_PROGRAM_SIZE 1 +#define SQ_CF_WORD1_VALID_PIXEL_MODE_SIZE 1 +#define SQ_CF_WORD1_CF_INST_SIZE 7 +#define SQ_CF_WORD1_WHOLE_QUAD_MODE_SIZE 1 +#define SQ_CF_WORD1_BARRIER_SIZE 1 + +#define SQ_CF_WORD1_POP_COUNT_SHIFT 0 +#define SQ_CF_WORD1_CF_CONST_SHIFT 3 +#define SQ_CF_WORD1_COND_SHIFT 8 +#define SQ_CF_WORD1_COUNT_SHIFT 10 +#define SQ_CF_WORD1_CALL_COUNT_SHIFT 13 +#define SQ_CF_WORD1_COUNT_3_SHIFT 19 +#define SQ_CF_WORD1_END_OF_PROGRAM_SHIFT 21 +#define SQ_CF_WORD1_VALID_PIXEL_MODE_SHIFT 22 +#define SQ_CF_WORD1_CF_INST_SHIFT 23 +#define SQ_CF_WORD1_WHOLE_QUAD_MODE_SHIFT 30 +#define SQ_CF_WORD1_BARRIER_SHIFT 31 + +#define SQ_CF_WORD1_POP_COUNT_MASK 0x00000007 +#define SQ_CF_WORD1_CF_CONST_MASK 0x000000f8 +#define SQ_CF_WORD1_COND_MASK 0x00000300 +#define SQ_CF_WORD1_COUNT_MASK 0x00001c00 +#define SQ_CF_WORD1_CALL_COUNT_MASK 0x0007e000 +#define SQ_CF_WORD1_COUNT_3_MASK 0x00080000 +#define SQ_CF_WORD1_END_OF_PROGRAM_MASK 0x00200000 +#define SQ_CF_WORD1_VALID_PIXEL_MODE_MASK 0x00400000 +#define SQ_CF_WORD1_CF_INST_MASK 0x3f800000 +#define SQ_CF_WORD1_WHOLE_QUAD_MODE_MASK 0x40000000 +#define SQ_CF_WORD1_BARRIER_MASK 0x80000000 + +#define SQ_CF_WORD1_MASK \ + (SQ_CF_WORD1_POP_COUNT_MASK | \ + SQ_CF_WORD1_CF_CONST_MASK | \ + SQ_CF_WORD1_COND_MASK | \ + SQ_CF_WORD1_COUNT_MASK | \ + SQ_CF_WORD1_CALL_COUNT_MASK | \ + SQ_CF_WORD1_COUNT_3_MASK | \ + SQ_CF_WORD1_END_OF_PROGRAM_MASK | \ + SQ_CF_WORD1_VALID_PIXEL_MODE_MASK | \ + SQ_CF_WORD1_CF_INST_MASK | \ + SQ_CF_WORD1_WHOLE_QUAD_MODE_MASK | \ + SQ_CF_WORD1_BARRIER_MASK) + +#define SQ_CF_WORD1_DEFAULT 0xcdcdcdcd + +#define SQ_CF_WORD1_GET_POP_COUNT(sq_cf_word1) \ + ((sq_cf_word1 & SQ_CF_WORD1_POP_COUNT_MASK) >> SQ_CF_WORD1_POP_COUNT_SHIFT) +#define SQ_CF_WORD1_GET_CF_CONST(sq_cf_word1) \ + ((sq_cf_word1 & SQ_CF_WORD1_CF_CONST_MASK) >> SQ_CF_WORD1_CF_CONST_SHIFT) +#define SQ_CF_WORD1_GET_COND(sq_cf_word1) \ + ((sq_cf_word1 & SQ_CF_WORD1_COND_MASK) >> SQ_CF_WORD1_COND_SHIFT) +#define SQ_CF_WORD1_GET_COUNT(sq_cf_word1) \ + ((sq_cf_word1 & SQ_CF_WORD1_COUNT_MASK) >> SQ_CF_WORD1_COUNT_SHIFT) +#define SQ_CF_WORD1_GET_CALL_COUNT(sq_cf_word1) \ + ((sq_cf_word1 & SQ_CF_WORD1_CALL_COUNT_MASK) >> SQ_CF_WORD1_CALL_COUNT_SHIFT) +#define SQ_CF_WORD1_GET_COUNT_3(sq_cf_word1) \ + ((sq_cf_word1 & SQ_CF_WORD1_COUNT_3_MASK) >> SQ_CF_WORD1_COUNT_3_SHIFT) +#define SQ_CF_WORD1_GET_END_OF_PROGRAM(sq_cf_word1) \ + ((sq_cf_word1 & SQ_CF_WORD1_END_OF_PROGRAM_MASK) >> SQ_CF_WORD1_END_OF_PROGRAM_SHIFT) +#define SQ_CF_WORD1_GET_VALID_PIXEL_MODE(sq_cf_word1) \ + ((sq_cf_word1 & SQ_CF_WORD1_VALID_PIXEL_MODE_MASK) >> SQ_CF_WORD1_VALID_PIXEL_MODE_SHIFT) +#define SQ_CF_WORD1_GET_CF_INST(sq_cf_word1) \ + ((sq_cf_word1 & SQ_CF_WORD1_CF_INST_MASK) >> SQ_CF_WORD1_CF_INST_SHIFT) +#define SQ_CF_WORD1_GET_WHOLE_QUAD_MODE(sq_cf_word1) \ + ((sq_cf_word1 & SQ_CF_WORD1_WHOLE_QUAD_MODE_MASK) >> SQ_CF_WORD1_WHOLE_QUAD_MODE_SHIFT) +#define SQ_CF_WORD1_GET_BARRIER(sq_cf_word1) \ + ((sq_cf_word1 & SQ_CF_WORD1_BARRIER_MASK) >> SQ_CF_WORD1_BARRIER_SHIFT) + +#define SQ_CF_WORD1_SET_POP_COUNT(sq_cf_word1_reg, pop_count) \ + sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_POP_COUNT_MASK) | (pop_count << SQ_CF_WORD1_POP_COUNT_SHIFT) +#define SQ_CF_WORD1_SET_CF_CONST(sq_cf_word1_reg, cf_const) \ + sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_CF_CONST_MASK) | (cf_const << SQ_CF_WORD1_CF_CONST_SHIFT) +#define SQ_CF_WORD1_SET_COND(sq_cf_word1_reg, cond) \ + sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_COND_MASK) | (cond << SQ_CF_WORD1_COND_SHIFT) +#define SQ_CF_WORD1_SET_COUNT(sq_cf_word1_reg, count) \ + sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_COUNT_MASK) | (count << SQ_CF_WORD1_COUNT_SHIFT) +#define SQ_CF_WORD1_SET_CALL_COUNT(sq_cf_word1_reg, call_count) \ + sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_CALL_COUNT_MASK) | (call_count << SQ_CF_WORD1_CALL_COUNT_SHIFT) +#define SQ_CF_WORD1_SET_COUNT_3(sq_cf_word1_reg, count_3) \ + sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_COUNT_3_MASK) | (count_3 << SQ_CF_WORD1_COUNT_3_SHIFT) +#define SQ_CF_WORD1_SET_END_OF_PROGRAM(sq_cf_word1_reg, end_of_program) \ + sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_END_OF_PROGRAM_MASK) | (end_of_program << SQ_CF_WORD1_END_OF_PROGRAM_SHIFT) +#define SQ_CF_WORD1_SET_VALID_PIXEL_MODE(sq_cf_word1_reg, valid_pixel_mode) \ + sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_VALID_PIXEL_MODE_MASK) | (valid_pixel_mode << SQ_CF_WORD1_VALID_PIXEL_MODE_SHIFT) +#define SQ_CF_WORD1_SET_CF_INST(sq_cf_word1_reg, cf_inst) \ + sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_CF_INST_MASK) | (cf_inst << SQ_CF_WORD1_CF_INST_SHIFT) +#define SQ_CF_WORD1_SET_WHOLE_QUAD_MODE(sq_cf_word1_reg, whole_quad_mode) \ + sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_WHOLE_QUAD_MODE_MASK) | (whole_quad_mode << SQ_CF_WORD1_WHOLE_QUAD_MODE_SHIFT) +#define SQ_CF_WORD1_SET_BARRIER(sq_cf_word1_reg, barrier) \ + sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_BARRIER_MASK) | (barrier << SQ_CF_WORD1_BARRIER_SHIFT) + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _sq_cf_word1_t { + unsigned int pop_count : SQ_CF_WORD1_POP_COUNT_SIZE; + unsigned int cf_const : SQ_CF_WORD1_CF_CONST_SIZE; + unsigned int cond : SQ_CF_WORD1_COND_SIZE; + unsigned int count : SQ_CF_WORD1_COUNT_SIZE; + unsigned int call_count : SQ_CF_WORD1_CALL_COUNT_SIZE; + unsigned int count_3 : SQ_CF_WORD1_COUNT_3_SIZE; + unsigned int : 1; + unsigned int end_of_program : SQ_CF_WORD1_END_OF_PROGRAM_SIZE; + unsigned int valid_pixel_mode : SQ_CF_WORD1_VALID_PIXEL_MODE_SIZE; + unsigned int cf_inst : SQ_CF_WORD1_CF_INST_SIZE; + unsigned int whole_quad_mode : SQ_CF_WORD1_WHOLE_QUAD_MODE_SIZE; + unsigned int barrier : SQ_CF_WORD1_BARRIER_SIZE; + } sq_cf_word1_t; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _sq_cf_word1_t { + unsigned int barrier : SQ_CF_WORD1_BARRIER_SIZE; + unsigned int whole_quad_mode : SQ_CF_WORD1_WHOLE_QUAD_MODE_SIZE; + unsigned int cf_inst : SQ_CF_WORD1_CF_INST_SIZE; + unsigned int valid_pixel_mode : SQ_CF_WORD1_VALID_PIXEL_MODE_SIZE; + unsigned int end_of_program : SQ_CF_WORD1_END_OF_PROGRAM_SIZE; + unsigned int : 1; + unsigned int count_3 : SQ_CF_WORD1_COUNT_3_SIZE; + unsigned int call_count : SQ_CF_WORD1_CALL_COUNT_SIZE; + unsigned int count : SQ_CF_WORD1_COUNT_SIZE; + unsigned int cond : SQ_CF_WORD1_COND_SIZE; + unsigned int cf_const : SQ_CF_WORD1_CF_CONST_SIZE; + unsigned int pop_count : SQ_CF_WORD1_POP_COUNT_SIZE; + } sq_cf_word1_t; + +#endif + +typedef union { + unsigned int val : 32; + sq_cf_word1_t f; +} sq_cf_word1_u; + + +/* + * SQ_CF_ALU_WORD0 struct + */ + +#define SQ_CF_ALU_WORD0_ADDR_SIZE 22 +#define SQ_CF_ALU_WORD0_KCACHE_BANK0_SIZE 4 +#define SQ_CF_ALU_WORD0_KCACHE_BANK1_SIZE 4 +#define SQ_CF_ALU_WORD0_KCACHE_MODE0_SIZE 2 + +#define SQ_CF_ALU_WORD0_ADDR_SHIFT 0 +#define SQ_CF_ALU_WORD0_KCACHE_BANK0_SHIFT 22 +#define SQ_CF_ALU_WORD0_KCACHE_BANK1_SHIFT 26 +#define SQ_CF_ALU_WORD0_KCACHE_MODE0_SHIFT 30 + +#define SQ_CF_ALU_WORD0_ADDR_MASK 0x003fffff +#define SQ_CF_ALU_WORD0_KCACHE_BANK0_MASK 0x03c00000 +#define SQ_CF_ALU_WORD0_KCACHE_BANK1_MASK 0x3c000000 +#define SQ_CF_ALU_WORD0_KCACHE_MODE0_MASK 0xc0000000 + +#define SQ_CF_ALU_WORD0_MASK \ + (SQ_CF_ALU_WORD0_ADDR_MASK | \ + SQ_CF_ALU_WORD0_KCACHE_BANK0_MASK | \ + SQ_CF_ALU_WORD0_KCACHE_BANK1_MASK | \ + SQ_CF_ALU_WORD0_KCACHE_MODE0_MASK) + +#define SQ_CF_ALU_WORD0_DEFAULT 0xcdcdcdcd + +#define SQ_CF_ALU_WORD0_GET_ADDR(sq_cf_alu_word0) \ + ((sq_cf_alu_word0 & SQ_CF_ALU_WORD0_ADDR_MASK) >> SQ_CF_ALU_WORD0_ADDR_SHIFT) +#define SQ_CF_ALU_WORD0_GET_KCACHE_BANK0(sq_cf_alu_word0) \ + ((sq_cf_alu_word0 & SQ_CF_ALU_WORD0_KCACHE_BANK0_MASK) >> SQ_CF_ALU_WORD0_KCACHE_BANK0_SHIFT) +#define SQ_CF_ALU_WORD0_GET_KCACHE_BANK1(sq_cf_alu_word0) \ + ((sq_cf_alu_word0 & SQ_CF_ALU_WORD0_KCACHE_BANK1_MASK) >> SQ_CF_ALU_WORD0_KCACHE_BANK1_SHIFT) +#define SQ_CF_ALU_WORD0_GET_KCACHE_MODE0(sq_cf_alu_word0) \ + ((sq_cf_alu_word0 & SQ_CF_ALU_WORD0_KCACHE_MODE0_MASK) >> SQ_CF_ALU_WORD0_KCACHE_MODE0_SHIFT) + +#define SQ_CF_ALU_WORD0_SET_ADDR(sq_cf_alu_word0_reg, addr) \ + sq_cf_alu_word0_reg = (sq_cf_alu_word0_reg & ~SQ_CF_ALU_WORD0_ADDR_MASK) | (addr << SQ_CF_ALU_WORD0_ADDR_SHIFT) +#define SQ_CF_ALU_WORD0_SET_KCACHE_BANK0(sq_cf_alu_word0_reg, kcache_bank0) \ + sq_cf_alu_word0_reg = (sq_cf_alu_word0_reg & ~SQ_CF_ALU_WORD0_KCACHE_BANK0_MASK) | (kcache_bank0 << SQ_CF_ALU_WORD0_KCACHE_BANK0_SHIFT) +#define SQ_CF_ALU_WORD0_SET_KCACHE_BANK1(sq_cf_alu_word0_reg, kcache_bank1) \ + sq_cf_alu_word0_reg = (sq_cf_alu_word0_reg & ~SQ_CF_ALU_WORD0_KCACHE_BANK1_MASK) | (kcache_bank1 << SQ_CF_ALU_WORD0_KCACHE_BANK1_SHIFT) +#define SQ_CF_ALU_WORD0_SET_KCACHE_MODE0(sq_cf_alu_word0_reg, kcache_mode0) \ + sq_cf_alu_word0_reg = (sq_cf_alu_word0_reg & ~SQ_CF_ALU_WORD0_KCACHE_MODE0_MASK) | (kcache_mode0 << SQ_CF_ALU_WORD0_KCACHE_MODE0_SHIFT) + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _sq_cf_alu_word0_t { + unsigned int addr : SQ_CF_ALU_WORD0_ADDR_SIZE; + unsigned int kcache_bank0 : SQ_CF_ALU_WORD0_KCACHE_BANK0_SIZE; + unsigned int kcache_bank1 : SQ_CF_ALU_WORD0_KCACHE_BANK1_SIZE; + unsigned int kcache_mode0 : SQ_CF_ALU_WORD0_KCACHE_MODE0_SIZE; + } sq_cf_alu_word0_t; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _sq_cf_alu_word0_t { + unsigned int kcache_mode0 : SQ_CF_ALU_WORD0_KCACHE_MODE0_SIZE; + unsigned int kcache_bank1 : SQ_CF_ALU_WORD0_KCACHE_BANK1_SIZE; + unsigned int kcache_bank0 : SQ_CF_ALU_WORD0_KCACHE_BANK0_SIZE; + unsigned int addr : SQ_CF_ALU_WORD0_ADDR_SIZE; + } sq_cf_alu_word0_t; + +#endif + +typedef union { + unsigned int val : 32; + sq_cf_alu_word0_t f; +} sq_cf_alu_word0_u; + + +/* + * SQ_CF_ALU_WORD1 struct + */ + +#define SQ_CF_ALU_WORD1_KCACHE_MODE1_SIZE 2 +#define SQ_CF_ALU_WORD1_KCACHE_ADDR0_SIZE 8 +#define SQ_CF_ALU_WORD1_KCACHE_ADDR1_SIZE 8 +#define SQ_CF_ALU_WORD1_COUNT_SIZE 7 +#define SQ_CF_ALU_WORD1_ALT_CONST_SIZE 1 +#define SQ_CF_ALU_WORD1_CF_INST_SIZE 4 +#define SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SIZE 1 +#define SQ_CF_ALU_WORD1_BARRIER_SIZE 1 + +#define SQ_CF_ALU_WORD1_KCACHE_MODE1_SHIFT 0 +#define SQ_CF_ALU_WORD1_KCACHE_ADDR0_SHIFT 2 +#define SQ_CF_ALU_WORD1_KCACHE_ADDR1_SHIFT 10 +#define SQ_CF_ALU_WORD1_COUNT_SHIFT 18 +#define SQ_CF_ALU_WORD1_ALT_CONST_SHIFT 25 +#define SQ_CF_ALU_WORD1_CF_INST_SHIFT 26 +#define SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SHIFT 30 +#define SQ_CF_ALU_WORD1_BARRIER_SHIFT 31 + +#define SQ_CF_ALU_WORD1_KCACHE_MODE1_MASK 0x00000003 +#define SQ_CF_ALU_WORD1_KCACHE_ADDR0_MASK 0x000003fc +#define SQ_CF_ALU_WORD1_KCACHE_ADDR1_MASK 0x0003fc00 +#define SQ_CF_ALU_WORD1_COUNT_MASK 0x01fc0000 +#define SQ_CF_ALU_WORD1_ALT_CONST_MASK 0x02000000 +#define SQ_CF_ALU_WORD1_CF_INST_MASK 0x3c000000 +#define SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_MASK 0x40000000 +#define SQ_CF_ALU_WORD1_BARRIER_MASK 0x80000000 + +#define SQ_CF_ALU_WORD1_MASK \ + (SQ_CF_ALU_WORD1_KCACHE_MODE1_MASK | \ + SQ_CF_ALU_WORD1_KCACHE_ADDR0_MASK | \ + SQ_CF_ALU_WORD1_KCACHE_ADDR1_MASK | \ + SQ_CF_ALU_WORD1_COUNT_MASK | \ + SQ_CF_ALU_WORD1_ALT_CONST_MASK | \ + SQ_CF_ALU_WORD1_CF_INST_MASK | \ + SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_MASK | \ + SQ_CF_ALU_WORD1_BARRIER_MASK) + +#define SQ_CF_ALU_WORD1_DEFAULT 0xcdcdcdcd + +#define SQ_CF_ALU_WORD1_GET_KCACHE_MODE1(sq_cf_alu_word1) \ + ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_KCACHE_MODE1_MASK) >> SQ_CF_ALU_WORD1_KCACHE_MODE1_SHIFT) +#define SQ_CF_ALU_WORD1_GET_KCACHE_ADDR0(sq_cf_alu_word1) \ + ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_KCACHE_ADDR0_MASK) >> SQ_CF_ALU_WORD1_KCACHE_ADDR0_SHIFT) +#define SQ_CF_ALU_WORD1_GET_KCACHE_ADDR1(sq_cf_alu_word1) \ + ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_KCACHE_ADDR1_MASK) >> SQ_CF_ALU_WORD1_KCACHE_ADDR1_SHIFT) +#define SQ_CF_ALU_WORD1_GET_COUNT(sq_cf_alu_word1) \ + ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_COUNT_MASK) >> SQ_CF_ALU_WORD1_COUNT_SHIFT) +#define SQ_CF_ALU_WORD1_GET_ALT_CONST(sq_cf_alu_word1) \ + ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_ALT_CONST_MASK) >> SQ_CF_ALU_WORD1_ALT_CONST_SHIFT) +#define SQ_CF_ALU_WORD1_GET_CF_INST(sq_cf_alu_word1) \ + ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_CF_INST_MASK) >> SQ_CF_ALU_WORD1_CF_INST_SHIFT) +#define SQ_CF_ALU_WORD1_GET_WHOLE_QUAD_MODE(sq_cf_alu_word1) \ + ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_MASK) >> SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SHIFT) +#define SQ_CF_ALU_WORD1_GET_BARRIER(sq_cf_alu_word1) \ + ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_BARRIER_MASK) >> SQ_CF_ALU_WORD1_BARRIER_SHIFT) + +#define SQ_CF_ALU_WORD1_SET_KCACHE_MODE1(sq_cf_alu_word1_reg, kcache_mode1) \ + sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_KCACHE_MODE1_MASK) | (kcache_mode1 << SQ_CF_ALU_WORD1_KCACHE_MODE1_SHIFT) +#define SQ_CF_ALU_WORD1_SET_KCACHE_ADDR0(sq_cf_alu_word1_reg, kcache_addr0) \ + sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_KCACHE_ADDR0_MASK) | (kcache_addr0 << SQ_CF_ALU_WORD1_KCACHE_ADDR0_SHIFT) +#define SQ_CF_ALU_WORD1_SET_KCACHE_ADDR1(sq_cf_alu_word1_reg, kcache_addr1) \ + sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_KCACHE_ADDR1_MASK) | (kcache_addr1 << SQ_CF_ALU_WORD1_KCACHE_ADDR1_SHIFT) +#define SQ_CF_ALU_WORD1_SET_COUNT(sq_cf_alu_word1_reg, count) \ + sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_COUNT_MASK) | (count << SQ_CF_ALU_WORD1_COUNT_SHIFT) +#define SQ_CF_ALU_WORD1_SET_ALT_CONST(sq_cf_alu_word1_reg, alt_const) \ + sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_ALT_CONST_MASK) | (alt_const << SQ_CF_ALU_WORD1_ALT_CONST_SHIFT) +#define SQ_CF_ALU_WORD1_SET_CF_INST(sq_cf_alu_word1_reg, cf_inst) \ + sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_CF_INST_MASK) | (cf_inst << SQ_CF_ALU_WORD1_CF_INST_SHIFT) +#define SQ_CF_ALU_WORD1_SET_WHOLE_QUAD_MODE(sq_cf_alu_word1_reg, whole_quad_mode) \ + sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_MASK) | (whole_quad_mode << SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SHIFT) +#define SQ_CF_ALU_WORD1_SET_BARRIER(sq_cf_alu_word1_reg, barrier) \ + sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_BARRIER_MASK) | (barrier << SQ_CF_ALU_WORD1_BARRIER_SHIFT) + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _sq_cf_alu_word1_t { + unsigned int kcache_mode1 : SQ_CF_ALU_WORD1_KCACHE_MODE1_SIZE; + unsigned int kcache_addr0 : SQ_CF_ALU_WORD1_KCACHE_ADDR0_SIZE; + unsigned int kcache_addr1 : SQ_CF_ALU_WORD1_KCACHE_ADDR1_SIZE; + unsigned int count : SQ_CF_ALU_WORD1_COUNT_SIZE; + unsigned int alt_const : SQ_CF_ALU_WORD1_ALT_CONST_SIZE; + unsigned int cf_inst : SQ_CF_ALU_WORD1_CF_INST_SIZE; + unsigned int whole_quad_mode : SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SIZE; + unsigned int barrier : SQ_CF_ALU_WORD1_BARRIER_SIZE; + } sq_cf_alu_word1_t; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _sq_cf_alu_word1_t { + unsigned int barrier : SQ_CF_ALU_WORD1_BARRIER_SIZE; + unsigned int whole_quad_mode : SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SIZE; + unsigned int cf_inst : SQ_CF_ALU_WORD1_CF_INST_SIZE; + unsigned int alt_const : SQ_CF_ALU_WORD1_ALT_CONST_SIZE; + unsigned int count : SQ_CF_ALU_WORD1_COUNT_SIZE; + unsigned int kcache_addr1 : SQ_CF_ALU_WORD1_KCACHE_ADDR1_SIZE; + unsigned int kcache_addr0 : SQ_CF_ALU_WORD1_KCACHE_ADDR0_SIZE; + unsigned int kcache_mode1 : SQ_CF_ALU_WORD1_KCACHE_MODE1_SIZE; + } sq_cf_alu_word1_t; + +#endif + +typedef union { + unsigned int val : 32; + sq_cf_alu_word1_t f; +} sq_cf_alu_word1_u; + + +/* + * SQ_CF_ALLOC_EXPORT_WORD0 struct + */ + +#define SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SIZE 13 +#define SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SIZE 2 +#define SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SIZE 7 +#define SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SIZE 1 +#define SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SIZE 7 +#define SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SIZE 2 + +#define SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SHIFT 0 +#define SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SHIFT 13 +#define SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SHIFT 15 +#define SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SHIFT 22 +#define SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SHIFT 23 +#define SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SHIFT 30 + +#define SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_MASK 0x00001fff +#define SQ_CF_ALLOC_EXPORT_WORD0_TYPE_MASK 0x00006000 +#define SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_MASK 0x003f8000 +#define SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_MASK 0x00400000 +#define SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_MASK 0x3f800000 +#define SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_MASK 0xc0000000 + +#define SQ_CF_ALLOC_EXPORT_WORD0_MASK \ + (SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_MASK | \ + SQ_CF_ALLOC_EXPORT_WORD0_TYPE_MASK | \ + SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_MASK | \ + SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_MASK | \ + SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_MASK | \ + SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_MASK) + +#define SQ_CF_ALLOC_EXPORT_WORD0_DEFAULT 0xcdcdcdcd + +#define SQ_CF_ALLOC_EXPORT_WORD0_GET_ARRAY_BASE(sq_cf_alloc_export_word0) \ + ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD0_GET_TYPE(sq_cf_alloc_export_word0) \ + ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_TYPE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD0_GET_RW_GPR(sq_cf_alloc_export_word0) \ + ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD0_GET_RW_REL(sq_cf_alloc_export_word0) \ + ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD0_GET_INDEX_GPR(sq_cf_alloc_export_word0) \ + ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD0_GET_ELEM_SIZE(sq_cf_alloc_export_word0) \ + ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SHIFT) + +#define SQ_CF_ALLOC_EXPORT_WORD0_SET_ARRAY_BASE(sq_cf_alloc_export_word0_reg, array_base) \ + sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_MASK) | (array_base << SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD0_SET_TYPE(sq_cf_alloc_export_word0_reg, type) \ + sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_TYPE_MASK) | (type << SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD0_SET_RW_GPR(sq_cf_alloc_export_word0_reg, rw_gpr) \ + sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_MASK) | (rw_gpr << SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD0_SET_RW_REL(sq_cf_alloc_export_word0_reg, rw_rel) \ + sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_MASK) | (rw_rel << SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD0_SET_INDEX_GPR(sq_cf_alloc_export_word0_reg, index_gpr) \ + sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_MASK) | (index_gpr << SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD0_SET_ELEM_SIZE(sq_cf_alloc_export_word0_reg, elem_size) \ + sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_MASK) | (elem_size << SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SHIFT) + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _sq_cf_alloc_export_word0_t { + unsigned int array_base : SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SIZE; + unsigned int type : SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SIZE; + unsigned int rw_gpr : SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SIZE; + unsigned int rw_rel : SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SIZE; + unsigned int index_gpr : SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SIZE; + unsigned int elem_size : SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SIZE; + } sq_cf_alloc_export_word0_t; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _sq_cf_alloc_export_word0_t { + unsigned int elem_size : SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SIZE; + unsigned int index_gpr : SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SIZE; + unsigned int rw_rel : SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SIZE; + unsigned int rw_gpr : SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SIZE; + unsigned int type : SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SIZE; + unsigned int array_base : SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SIZE; + } sq_cf_alloc_export_word0_t; + +#endif + +typedef union { + unsigned int val : 32; + sq_cf_alloc_export_word0_t f; +} sq_cf_alloc_export_word0_u; + + +/* + * SQ_CF_ALLOC_EXPORT_WORD1 struct + */ + +#define SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SIZE 4 +#define SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SIZE 1 +#define SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SIZE 1 +#define SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SIZE 7 +#define SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SIZE 1 +#define SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SIZE 1 + +#define SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SHIFT 17 +#define SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SHIFT 21 +#define SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SHIFT 22 +#define SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SHIFT 23 +#define SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SHIFT 30 +#define SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SHIFT 31 + +#define SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_MASK 0x001e0000 +#define SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_MASK 0x00200000 +#define SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_MASK 0x00400000 +#define SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_MASK 0x3f800000 +#define SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_MASK 0x40000000 +#define SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_MASK 0x80000000 + +#define SQ_CF_ALLOC_EXPORT_WORD1_MASK \ + (SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_MASK | \ + SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_MASK | \ + SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_MASK | \ + SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_MASK | \ + SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_MASK | \ + SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_MASK) + +#define SQ_CF_ALLOC_EXPORT_WORD1_DEFAULT 0xcdcc0000 + +#define SQ_CF_ALLOC_EXPORT_WORD1_GET_BURST_COUNT(sq_cf_alloc_export_word1) \ + ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD1_GET_END_OF_PROGRAM(sq_cf_alloc_export_word1) \ + ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD1_GET_VALID_PIXEL_MODE(sq_cf_alloc_export_word1) \ + ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD1_GET_CF_INST(sq_cf_alloc_export_word1) \ + ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD1_GET_WHOLE_QUAD_MODE(sq_cf_alloc_export_word1) \ + ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD1_GET_BARRIER(sq_cf_alloc_export_word1) \ + ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SHIFT) + +#define SQ_CF_ALLOC_EXPORT_WORD1_SET_BURST_COUNT(sq_cf_alloc_export_word1_reg, burst_count) \ + sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_MASK) | (burst_count << SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD1_SET_END_OF_PROGRAM(sq_cf_alloc_export_word1_reg, end_of_program) \ + sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_MASK) | (end_of_program << SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD1_SET_VALID_PIXEL_MODE(sq_cf_alloc_export_word1_reg, valid_pixel_mode) \ + sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_MASK) | (valid_pixel_mode << SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD1_SET_CF_INST(sq_cf_alloc_export_word1_reg, cf_inst) \ + sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_MASK) | (cf_inst << SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD1_SET_WHOLE_QUAD_MODE(sq_cf_alloc_export_word1_reg, whole_quad_mode) \ + sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_MASK) | (whole_quad_mode << SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD1_SET_BARRIER(sq_cf_alloc_export_word1_reg, barrier) \ + sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_MASK) | (barrier << SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SHIFT) + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _sq_cf_alloc_export_word1_t { + unsigned int : 17; + unsigned int burst_count : SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SIZE; + unsigned int end_of_program : SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SIZE; + unsigned int valid_pixel_mode : SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SIZE; + unsigned int cf_inst : SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SIZE; + unsigned int whole_quad_mode : SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SIZE; + unsigned int barrier : SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SIZE; + } sq_cf_alloc_export_word1_t; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _sq_cf_alloc_export_word1_t { + unsigned int barrier : SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SIZE; + unsigned int whole_quad_mode : SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SIZE; + unsigned int cf_inst : SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SIZE; + unsigned int valid_pixel_mode : SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SIZE; + unsigned int end_of_program : SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SIZE; + unsigned int burst_count : SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SIZE; + unsigned int : 17; + } sq_cf_alloc_export_word1_t; + +#endif + +typedef union { + unsigned int val : 32; + sq_cf_alloc_export_word1_t f; +} sq_cf_alloc_export_word1_u; + + +/* + * SQ_CF_ALLOC_EXPORT_WORD1_BUF struct + */ + +#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SIZE 12 +#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SIZE 4 + +#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SHIFT 0 +#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SHIFT 12 + +#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_MASK 0x00000fff +#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_MASK 0x0000f000 + +#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_MASK \ + (SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_MASK | \ + SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_MASK) + +#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_DEFAULT 0x0000cdcd + +#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_GET_ARRAY_SIZE(sq_cf_alloc_export_word1_buf) \ + ((sq_cf_alloc_export_word1_buf & SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_GET_COMP_MASK(sq_cf_alloc_export_word1_buf) \ + ((sq_cf_alloc_export_word1_buf & SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SHIFT) + +#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_SET_ARRAY_SIZE(sq_cf_alloc_export_word1_buf_reg, array_size) \ + sq_cf_alloc_export_word1_buf_reg = (sq_cf_alloc_export_word1_buf_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_MASK) | (array_size << SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_SET_COMP_MASK(sq_cf_alloc_export_word1_buf_reg, comp_mask) \ + sq_cf_alloc_export_word1_buf_reg = (sq_cf_alloc_export_word1_buf_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_MASK) | (comp_mask << SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SHIFT) + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _sq_cf_alloc_export_word1_buf_t { + unsigned int array_size : SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SIZE; + unsigned int comp_mask : SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SIZE; + unsigned int : 16; + } sq_cf_alloc_export_word1_buf_t; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _sq_cf_alloc_export_word1_buf_t { + unsigned int : 16; + unsigned int comp_mask : SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SIZE; + unsigned int array_size : SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SIZE; + } sq_cf_alloc_export_word1_buf_t; + +#endif + +typedef union { + unsigned int val : 32; + sq_cf_alloc_export_word1_buf_t f; +} sq_cf_alloc_export_word1_buf_u; + + +/* + * SQ_CF_ALLOC_EXPORT_WORD1_SWIZ struct + */ + +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SIZE 3 +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SIZE 3 +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SIZE 3 +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SIZE 3 + +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SHIFT 0 +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SHIFT 3 +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SHIFT 6 +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SHIFT 9 + +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_MASK 0x00000007 +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_MASK 0x00000038 +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_MASK 0x000001c0 +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_MASK 0x00000e00 + +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_MASK \ + (SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_MASK | \ + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_MASK | \ + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_MASK | \ + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_MASK) + +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_DEFAULT 0x00000dcd + +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_GET_SEL_X(sq_cf_alloc_export_word1_swiz) \ + ((sq_cf_alloc_export_word1_swiz & SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_GET_SEL_Y(sq_cf_alloc_export_word1_swiz) \ + ((sq_cf_alloc_export_word1_swiz & SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_GET_SEL_Z(sq_cf_alloc_export_word1_swiz) \ + ((sq_cf_alloc_export_word1_swiz & SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_GET_SEL_W(sq_cf_alloc_export_word1_swiz) \ + ((sq_cf_alloc_export_word1_swiz & SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SHIFT) + +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SET_SEL_X(sq_cf_alloc_export_word1_swiz_reg, sel_x) \ + sq_cf_alloc_export_word1_swiz_reg = (sq_cf_alloc_export_word1_swiz_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_MASK) | (sel_x << SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SET_SEL_Y(sq_cf_alloc_export_word1_swiz_reg, sel_y) \ + sq_cf_alloc_export_word1_swiz_reg = (sq_cf_alloc_export_word1_swiz_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_MASK) | (sel_y << SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SET_SEL_Z(sq_cf_alloc_export_word1_swiz_reg, sel_z) \ + sq_cf_alloc_export_word1_swiz_reg = (sq_cf_alloc_export_word1_swiz_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_MASK) | (sel_z << SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SHIFT) +#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SET_SEL_W(sq_cf_alloc_export_word1_swiz_reg, sel_w) \ + sq_cf_alloc_export_word1_swiz_reg = (sq_cf_alloc_export_word1_swiz_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_MASK) | (sel_w << SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SHIFT) + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _sq_cf_alloc_export_word1_swiz_t { + unsigned int sel_x : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SIZE; + unsigned int sel_y : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SIZE; + unsigned int sel_z : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SIZE; + unsigned int sel_w : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SIZE; + unsigned int : 20; + } sq_cf_alloc_export_word1_swiz_t; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _sq_cf_alloc_export_word1_swiz_t { + unsigned int : 20; + unsigned int sel_w : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SIZE; + unsigned int sel_z : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SIZE; + unsigned int sel_y : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SIZE; + unsigned int sel_x : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SIZE; + } sq_cf_alloc_export_word1_swiz_t; + +#endif + +typedef union { + unsigned int val : 32; + sq_cf_alloc_export_word1_swiz_t f; +} sq_cf_alloc_export_word1_swiz_u; + + +/* + * SQ_ALU_WORD0 struct + */ + +#define SQ_ALU_WORD0_SRC0_SEL_SIZE 9 +#define SQ_ALU_WORD0_SRC0_REL_SIZE 1 +#define SQ_ALU_WORD0_SRC0_CHAN_SIZE 2 +#define SQ_ALU_WORD0_SRC0_NEG_SIZE 1 +#define SQ_ALU_WORD0_SRC1_SEL_SIZE 9 +#define SQ_ALU_WORD0_SRC1_REL_SIZE 1 +#define SQ_ALU_WORD0_SRC1_CHAN_SIZE 2 +#define SQ_ALU_WORD0_SRC1_NEG_SIZE 1 +#define SQ_ALU_WORD0_INDEX_MODE_SIZE 3 +#define SQ_ALU_WORD0_PRED_SEL_SIZE 2 +#define SQ_ALU_WORD0_LAST_SIZE 1 + +#define SQ_ALU_WORD0_SRC0_SEL_SHIFT 0 +#define SQ_ALU_WORD0_SRC0_REL_SHIFT 9 +#define SQ_ALU_WORD0_SRC0_CHAN_SHIFT 10 +#define SQ_ALU_WORD0_SRC0_NEG_SHIFT 12 +#define SQ_ALU_WORD0_SRC1_SEL_SHIFT 13 +#define SQ_ALU_WORD0_SRC1_REL_SHIFT 22 +#define SQ_ALU_WORD0_SRC1_CHAN_SHIFT 23 +#define SQ_ALU_WORD0_SRC1_NEG_SHIFT 25 +#define SQ_ALU_WORD0_INDEX_MODE_SHIFT 26 +#define SQ_ALU_WORD0_PRED_SEL_SHIFT 29 +#define SQ_ALU_WORD0_LAST_SHIFT 31 + +#define SQ_ALU_WORD0_SRC0_SEL_MASK 0x000001ff +#define SQ_ALU_WORD0_SRC0_REL_MASK 0x00000200 +#define SQ_ALU_WORD0_SRC0_CHAN_MASK 0x00000c00 +#define SQ_ALU_WORD0_SRC0_NEG_MASK 0x00001000 +#define SQ_ALU_WORD0_SRC1_SEL_MASK 0x003fe000 +#define SQ_ALU_WORD0_SRC1_REL_MASK 0x00400000 +#define SQ_ALU_WORD0_SRC1_CHAN_MASK 0x01800000 +#define SQ_ALU_WORD0_SRC1_NEG_MASK 0x02000000 +#define SQ_ALU_WORD0_INDEX_MODE_MASK 0x1c000000 +#define SQ_ALU_WORD0_PRED_SEL_MASK 0x60000000 +#define SQ_ALU_WORD0_LAST_MASK 0x80000000 + +#define SQ_ALU_WORD0_MASK \ + (SQ_ALU_WORD0_SRC0_SEL_MASK | \ + SQ_ALU_WORD0_SRC0_REL_MASK | \ + SQ_ALU_WORD0_SRC0_CHAN_MASK | \ + SQ_ALU_WORD0_SRC0_NEG_MASK | \ + SQ_ALU_WORD0_SRC1_SEL_MASK | \ + SQ_ALU_WORD0_SRC1_REL_MASK | \ + SQ_ALU_WORD0_SRC1_CHAN_MASK | \ + SQ_ALU_WORD0_SRC1_NEG_MASK | \ + SQ_ALU_WORD0_INDEX_MODE_MASK | \ + SQ_ALU_WORD0_PRED_SEL_MASK | \ + SQ_ALU_WORD0_LAST_MASK) + +#define SQ_ALU_WORD0_DEFAULT 0xcdcdcdcd + +#define SQ_ALU_WORD0_GET_SRC0_SEL(sq_alu_word0) \ + ((sq_alu_word0 & SQ_ALU_WORD0_SRC0_SEL_MASK) >> SQ_ALU_WORD0_SRC0_SEL_SHIFT) +#define SQ_ALU_WORD0_GET_SRC0_REL(sq_alu_word0) \ + ((sq_alu_word0 & SQ_ALU_WORD0_SRC0_REL_MASK) >> SQ_ALU_WORD0_SRC0_REL_SHIFT) +#define SQ_ALU_WORD0_GET_SRC0_CHAN(sq_alu_word0) \ + ((sq_alu_word0 & SQ_ALU_WORD0_SRC0_CHAN_MASK) >> SQ_ALU_WORD0_SRC0_CHAN_SHIFT) +#define SQ_ALU_WORD0_GET_SRC0_NEG(sq_alu_word0) \ + ((sq_alu_word0 & SQ_ALU_WORD0_SRC0_NEG_MASK) >> SQ_ALU_WORD0_SRC0_NEG_SHIFT) +#define SQ_ALU_WORD0_GET_SRC1_SEL(sq_alu_word0) \ + ((sq_alu_word0 & SQ_ALU_WORD0_SRC1_SEL_MASK) >> SQ_ALU_WORD0_SRC1_SEL_SHIFT) +#define SQ_ALU_WORD0_GET_SRC1_REL(sq_alu_word0) \ + ((sq_alu_word0 & SQ_ALU_WORD0_SRC1_REL_MASK) >> SQ_ALU_WORD0_SRC1_REL_SHIFT) +#define SQ_ALU_WORD0_GET_SRC1_CHAN(sq_alu_word0) \ + ((sq_alu_word0 & SQ_ALU_WORD0_SRC1_CHAN_MASK) >> SQ_ALU_WORD0_SRC1_CHAN_SHIFT) +#define SQ_ALU_WORD0_GET_SRC1_NEG(sq_alu_word0) \ + ((sq_alu_word0 & SQ_ALU_WORD0_SRC1_NEG_MASK) >> SQ_ALU_WORD0_SRC1_NEG_SHIFT) +#define SQ_ALU_WORD0_GET_INDEX_MODE(sq_alu_word0) \ + ((sq_alu_word0 & SQ_ALU_WORD0_INDEX_MODE_MASK) >> SQ_ALU_WORD0_INDEX_MODE_SHIFT) +#define SQ_ALU_WORD0_GET_PRED_SEL(sq_alu_word0) \ + ((sq_alu_word0 & SQ_ALU_WORD0_PRED_SEL_MASK) >> SQ_ALU_WORD0_PRED_SEL_SHIFT) +#define SQ_ALU_WORD0_GET_LAST(sq_alu_word0) \ + ((sq_alu_word0 & SQ_ALU_WORD0_LAST_MASK) >> SQ_ALU_WORD0_LAST_SHIFT) + +#define SQ_ALU_WORD0_SET_SRC0_SEL(sq_alu_word0_reg, src0_sel) \ + sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC0_SEL_MASK) | (src0_sel << SQ_ALU_WORD0_SRC0_SEL_SHIFT) +#define SQ_ALU_WORD0_SET_SRC0_REL(sq_alu_word0_reg, src0_rel) \ + sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC0_REL_MASK) | (src0_rel << SQ_ALU_WORD0_SRC0_REL_SHIFT) +#define SQ_ALU_WORD0_SET_SRC0_CHAN(sq_alu_word0_reg, src0_chan) \ + sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC0_CHAN_MASK) | (src0_chan << SQ_ALU_WORD0_SRC0_CHAN_SHIFT) +#define SQ_ALU_WORD0_SET_SRC0_NEG(sq_alu_word0_reg, src0_neg) \ + sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC0_NEG_MASK) | (src0_neg << SQ_ALU_WORD0_SRC0_NEG_SHIFT) +#define SQ_ALU_WORD0_SET_SRC1_SEL(sq_alu_word0_reg, src1_sel) \ + sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC1_SEL_MASK) | (src1_sel << SQ_ALU_WORD0_SRC1_SEL_SHIFT) +#define SQ_ALU_WORD0_SET_SRC1_REL(sq_alu_word0_reg, src1_rel) \ + sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC1_REL_MASK) | (src1_rel << SQ_ALU_WORD0_SRC1_REL_SHIFT) +#define SQ_ALU_WORD0_SET_SRC1_CHAN(sq_alu_word0_reg, src1_chan) \ + sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC1_CHAN_MASK) | (src1_chan << SQ_ALU_WORD0_SRC1_CHAN_SHIFT) +#define SQ_ALU_WORD0_SET_SRC1_NEG(sq_alu_word0_reg, src1_neg) \ + sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC1_NEG_MASK) | (src1_neg << SQ_ALU_WORD0_SRC1_NEG_SHIFT) +#define SQ_ALU_WORD0_SET_INDEX_MODE(sq_alu_word0_reg, index_mode) \ + sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_INDEX_MODE_MASK) | (index_mode << SQ_ALU_WORD0_INDEX_MODE_SHIFT) +#define SQ_ALU_WORD0_SET_PRED_SEL(sq_alu_word0_reg, pred_sel) \ + sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_PRED_SEL_MASK) | (pred_sel << SQ_ALU_WORD0_PRED_SEL_SHIFT) +#define SQ_ALU_WORD0_SET_LAST(sq_alu_word0_reg, last) \ + sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_LAST_MASK) | (last << SQ_ALU_WORD0_LAST_SHIFT) + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _sq_alu_word0_t { + unsigned int src0_sel : SQ_ALU_WORD0_SRC0_SEL_SIZE; + unsigned int src0_rel : SQ_ALU_WORD0_SRC0_REL_SIZE; + unsigned int src0_chan : SQ_ALU_WORD0_SRC0_CHAN_SIZE; + unsigned int src0_neg : SQ_ALU_WORD0_SRC0_NEG_SIZE; + unsigned int src1_sel : SQ_ALU_WORD0_SRC1_SEL_SIZE; + unsigned int src1_rel : SQ_ALU_WORD0_SRC1_REL_SIZE; + unsigned int src1_chan : SQ_ALU_WORD0_SRC1_CHAN_SIZE; + unsigned int src1_neg : SQ_ALU_WORD0_SRC1_NEG_SIZE; + unsigned int index_mode : SQ_ALU_WORD0_INDEX_MODE_SIZE; + unsigned int pred_sel : SQ_ALU_WORD0_PRED_SEL_SIZE; + unsigned int last : SQ_ALU_WORD0_LAST_SIZE; + } sq_alu_word0_t; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _sq_alu_word0_t { + unsigned int last : SQ_ALU_WORD0_LAST_SIZE; + unsigned int pred_sel : SQ_ALU_WORD0_PRED_SEL_SIZE; + unsigned int index_mode : SQ_ALU_WORD0_INDEX_MODE_SIZE; + unsigned int src1_neg : SQ_ALU_WORD0_SRC1_NEG_SIZE; + unsigned int src1_chan : SQ_ALU_WORD0_SRC1_CHAN_SIZE; + unsigned int src1_rel : SQ_ALU_WORD0_SRC1_REL_SIZE; + unsigned int src1_sel : SQ_ALU_WORD0_SRC1_SEL_SIZE; + unsigned int src0_neg : SQ_ALU_WORD0_SRC0_NEG_SIZE; + unsigned int src0_chan : SQ_ALU_WORD0_SRC0_CHAN_SIZE; + unsigned int src0_rel : SQ_ALU_WORD0_SRC0_REL_SIZE; + unsigned int src0_sel : SQ_ALU_WORD0_SRC0_SEL_SIZE; + } sq_alu_word0_t; + +#endif + +typedef union { + unsigned int val : 32; + sq_alu_word0_t f; +} sq_alu_word0_u; + + +/* + * SQ_ALU_WORD1 struct + */ + +#define SQ_ALU_WORD1_ENCODING_SIZE 3 +#define SQ_ALU_WORD1_BANK_SWIZZLE_SIZE 3 +#define SQ_ALU_WORD1_DST_GPR_SIZE 7 +#define SQ_ALU_WORD1_DST_REL_SIZE 1 +#define SQ_ALU_WORD1_DST_CHAN_SIZE 2 +#define SQ_ALU_WORD1_CLAMP_SIZE 1 + +#define SQ_ALU_WORD1_ENCODING_SHIFT 15 +#define SQ_ALU_WORD1_BANK_SWIZZLE_SHIFT 18 +#define SQ_ALU_WORD1_DST_GPR_SHIFT 21 +#define SQ_ALU_WORD1_DST_REL_SHIFT 28 +#define SQ_ALU_WORD1_DST_CHAN_SHIFT 29 +#define SQ_ALU_WORD1_CLAMP_SHIFT 31 + +#define SQ_ALU_WORD1_ENCODING_MASK 0x00038000 +#define SQ_ALU_WORD1_BANK_SWIZZLE_MASK 0x001c0000 +#define SQ_ALU_WORD1_DST_GPR_MASK 0x0fe00000 +#define SQ_ALU_WORD1_DST_REL_MASK 0x10000000 +#define SQ_ALU_WORD1_DST_CHAN_MASK 0x60000000 +#define SQ_ALU_WORD1_CLAMP_MASK 0x80000000 + +#define SQ_ALU_WORD1_MASK \ + (SQ_ALU_WORD1_ENCODING_MASK | \ + SQ_ALU_WORD1_BANK_SWIZZLE_MASK | \ + SQ_ALU_WORD1_DST_GPR_MASK | \ + SQ_ALU_WORD1_DST_REL_MASK | \ + SQ_ALU_WORD1_DST_CHAN_MASK | \ + SQ_ALU_WORD1_CLAMP_MASK) + +#define SQ_ALU_WORD1_DEFAULT 0xcdcd8000 + +#define SQ_ALU_WORD1_GET_ENCODING(sq_alu_word1) \ + ((sq_alu_word1 & SQ_ALU_WORD1_ENCODING_MASK) >> SQ_ALU_WORD1_ENCODING_SHIFT) +#define SQ_ALU_WORD1_GET_BANK_SWIZZLE(sq_alu_word1) \ + ((sq_alu_word1 & SQ_ALU_WORD1_BANK_SWIZZLE_MASK) >> SQ_ALU_WORD1_BANK_SWIZZLE_SHIFT) +#define SQ_ALU_WORD1_GET_DST_GPR(sq_alu_word1) \ + ((sq_alu_word1 & SQ_ALU_WORD1_DST_GPR_MASK) >> SQ_ALU_WORD1_DST_GPR_SHIFT) +#define SQ_ALU_WORD1_GET_DST_REL(sq_alu_word1) \ + ((sq_alu_word1 & SQ_ALU_WORD1_DST_REL_MASK) >> SQ_ALU_WORD1_DST_REL_SHIFT) +#define SQ_ALU_WORD1_GET_DST_CHAN(sq_alu_word1) \ + ((sq_alu_word1 & SQ_ALU_WORD1_DST_CHAN_MASK) >> SQ_ALU_WORD1_DST_CHAN_SHIFT) +#define SQ_ALU_WORD1_GET_CLAMP(sq_alu_word1) \ + ((sq_alu_word1 & SQ_ALU_WORD1_CLAMP_MASK) >> SQ_ALU_WORD1_CLAMP_SHIFT) + +#define SQ_ALU_WORD1_SET_ENCODING(sq_alu_word1_reg, encoding) \ + sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_ENCODING_MASK) | (encoding << SQ_ALU_WORD1_ENCODING_SHIFT) +#define SQ_ALU_WORD1_SET_BANK_SWIZZLE(sq_alu_word1_reg, bank_swizzle) \ + sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_BANK_SWIZZLE_MASK) | (bank_swizzle << SQ_ALU_WORD1_BANK_SWIZZLE_SHIFT) +#define SQ_ALU_WORD1_SET_DST_GPR(sq_alu_word1_reg, dst_gpr) \ + sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_DST_GPR_MASK) | (dst_gpr << SQ_ALU_WORD1_DST_GPR_SHIFT) +#define SQ_ALU_WORD1_SET_DST_REL(sq_alu_word1_reg, dst_rel) \ + sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_DST_REL_MASK) | (dst_rel << SQ_ALU_WORD1_DST_REL_SHIFT) +#define SQ_ALU_WORD1_SET_DST_CHAN(sq_alu_word1_reg, dst_chan) \ + sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_DST_CHAN_MASK) | (dst_chan << SQ_ALU_WORD1_DST_CHAN_SHIFT) +#define SQ_ALU_WORD1_SET_CLAMP(sq_alu_word1_reg, clamp) \ + sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_CLAMP_MASK) | (clamp << SQ_ALU_WORD1_CLAMP_SHIFT) + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _sq_alu_word1_t { + unsigned int : 15; + unsigned int encoding : SQ_ALU_WORD1_ENCODING_SIZE; + unsigned int bank_swizzle : SQ_ALU_WORD1_BANK_SWIZZLE_SIZE; + unsigned int dst_gpr : SQ_ALU_WORD1_DST_GPR_SIZE; + unsigned int dst_rel : SQ_ALU_WORD1_DST_REL_SIZE; + unsigned int dst_chan : SQ_ALU_WORD1_DST_CHAN_SIZE; + unsigned int clamp : SQ_ALU_WORD1_CLAMP_SIZE; + } sq_alu_word1_t; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _sq_alu_word1_t { + unsigned int clamp : SQ_ALU_WORD1_CLAMP_SIZE; + unsigned int dst_chan : SQ_ALU_WORD1_DST_CHAN_SIZE; + unsigned int dst_rel : SQ_ALU_WORD1_DST_REL_SIZE; + unsigned int dst_gpr : SQ_ALU_WORD1_DST_GPR_SIZE; + unsigned int bank_swizzle : SQ_ALU_WORD1_BANK_SWIZZLE_SIZE; + unsigned int encoding : SQ_ALU_WORD1_ENCODING_SIZE; + unsigned int : 15; + } sq_alu_word1_t; + +#endif + +typedef union { + unsigned int val : 32; + sq_alu_word1_t f; +} sq_alu_word1_u; + + +/* + * SQ_ALU_WORD1_OP2_V2 struct + */ + +#define SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SIZE 1 +#define SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SIZE 1 +#define SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SIZE 1 +#define SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SIZE 1 +#define SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SIZE 1 +#define SQ_ALU_WORD1_OP2_V2_OMOD_SIZE 2 +#define SQ_ALU_WORD1_OP2_V2_ALU_INST_SIZE 11 + +#define SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SHIFT 0 +#define SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SHIFT 1 +#define SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SHIFT 2 +#define SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SHIFT 3 +#define SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SHIFT 4 +#define SQ_ALU_WORD1_OP2_V2_OMOD_SHIFT 5 +#define SQ_ALU_WORD1_OP2_V2_ALU_INST_SHIFT 7 + +#define SQ_ALU_WORD1_OP2_V2_SRC0_ABS_MASK 0x00000001 +#define SQ_ALU_WORD1_OP2_V2_SRC1_ABS_MASK 0x00000002 +#define SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_MASK 0x00000004 +#define SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_MASK 0x00000008 +#define SQ_ALU_WORD1_OP2_V2_WRITE_MASK_MASK 0x00000010 +#define SQ_ALU_WORD1_OP2_V2_OMOD_MASK 0x00000060 +#define SQ_ALU_WORD1_OP2_V2_ALU_INST_MASK 0x0003ff80 + +#define SQ_ALU_WORD1_OP2_V2_MASK \ + (SQ_ALU_WORD1_OP2_V2_SRC0_ABS_MASK | \ + SQ_ALU_WORD1_OP2_V2_SRC1_ABS_MASK | \ + SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_MASK | \ + SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_MASK | \ + SQ_ALU_WORD1_OP2_V2_WRITE_MASK_MASK | \ + SQ_ALU_WORD1_OP2_V2_OMOD_MASK | \ + SQ_ALU_WORD1_OP2_V2_ALU_INST_MASK) + +#define SQ_ALU_WORD1_OP2_V2_DEFAULT 0x0001cdcd + +#define SQ_ALU_WORD1_OP2_V2_GET_SRC0_ABS(sq_alu_word1_op2_v2) \ + ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_SRC0_ABS_MASK) >> SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SHIFT) +#define SQ_ALU_WORD1_OP2_V2_GET_SRC1_ABS(sq_alu_word1_op2_v2) \ + ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_SRC1_ABS_MASK) >> SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SHIFT) +#define SQ_ALU_WORD1_OP2_V2_GET_UPDATE_EXECUTE_MASK(sq_alu_word1_op2_v2) \ + ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_MASK) >> SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SHIFT) +#define SQ_ALU_WORD1_OP2_V2_GET_UPDATE_PRED(sq_alu_word1_op2_v2) \ + ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_MASK) >> SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SHIFT) +#define SQ_ALU_WORD1_OP2_V2_GET_WRITE_MASK(sq_alu_word1_op2_v2) \ + ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_WRITE_MASK_MASK) >> SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SHIFT) +#define SQ_ALU_WORD1_OP2_V2_GET_OMOD(sq_alu_word1_op2_v2) \ + ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_OMOD_MASK) >> SQ_ALU_WORD1_OP2_V2_OMOD_SHIFT) +#define SQ_ALU_WORD1_OP2_V2_GET_ALU_INST(sq_alu_word1_op2_v2) \ + ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_ALU_INST_MASK) >> SQ_ALU_WORD1_OP2_V2_ALU_INST_SHIFT) + +#define SQ_ALU_WORD1_OP2_V2_SET_SRC0_ABS(sq_alu_word1_op2_v2_reg, src0_abs) \ + sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_SRC0_ABS_MASK) | (src0_abs << SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SHIFT) +#define SQ_ALU_WORD1_OP2_V2_SET_SRC1_ABS(sq_alu_word1_op2_v2_reg, src1_abs) \ + sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_SRC1_ABS_MASK) | (src1_abs << SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SHIFT) +#define SQ_ALU_WORD1_OP2_V2_SET_UPDATE_EXECUTE_MASK(sq_alu_word1_op2_v2_reg, update_execute_mask) \ + sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_MASK) | (update_execute_mask << SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SHIFT) +#define SQ_ALU_WORD1_OP2_V2_SET_UPDATE_PRED(sq_alu_word1_op2_v2_reg, update_pred) \ + sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_MASK) | (update_pred << SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SHIFT) +#define SQ_ALU_WORD1_OP2_V2_SET_WRITE_MASK(sq_alu_word1_op2_v2_reg, write_mask) \ + sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_WRITE_MASK_MASK) | (write_mask << SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SHIFT) +#define SQ_ALU_WORD1_OP2_V2_SET_OMOD(sq_alu_word1_op2_v2_reg, omod) \ + sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_OMOD_MASK) | (omod << SQ_ALU_WORD1_OP2_V2_OMOD_SHIFT) +#define SQ_ALU_WORD1_OP2_V2_SET_ALU_INST(sq_alu_word1_op2_v2_reg, alu_inst) \ + sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_ALU_INST_MASK) | (alu_inst << SQ_ALU_WORD1_OP2_V2_ALU_INST_SHIFT) + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _sq_alu_word1_op2_v2_t { + unsigned int src0_abs : SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SIZE; + unsigned int src1_abs : SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SIZE; + unsigned int update_execute_mask : SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SIZE; + unsigned int update_pred : SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SIZE; + unsigned int write_mask : SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SIZE; + unsigned int omod : SQ_ALU_WORD1_OP2_V2_OMOD_SIZE; + unsigned int alu_inst : SQ_ALU_WORD1_OP2_V2_ALU_INST_SIZE; + unsigned int : 14; + } sq_alu_word1_op2_v2_t; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _sq_alu_word1_op2_v2_t { + unsigned int : 14; + unsigned int alu_inst : SQ_ALU_WORD1_OP2_V2_ALU_INST_SIZE; + unsigned int omod : SQ_ALU_WORD1_OP2_V2_OMOD_SIZE; + unsigned int write_mask : SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SIZE; + unsigned int update_pred : SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SIZE; + unsigned int update_execute_mask : SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SIZE; + unsigned int src1_abs : SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SIZE; + unsigned int src0_abs : SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SIZE; + } sq_alu_word1_op2_v2_t; + +#endif + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _sq_alu_word1_op2_r6xx_t { + unsigned int src0_abs : SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SIZE; + unsigned int src1_abs : SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SIZE; + unsigned int update_execute_mask : SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SIZE; + unsigned int update_pred : SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SIZE; + unsigned int write_mask : SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SIZE; + unsigned int fog_export : 1; + unsigned int omod : SQ_ALU_WORD1_OP2_V2_OMOD_SIZE; + unsigned int alu_inst : 10; + unsigned int : 14; + } sq_alu_word1_op2_v1_t; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _sq_alu_word1_op2_r6xx_t { + unsigned int : 14; + unsigned int alu_inst : 10; + unsigned int omod : SQ_ALU_WORD1_OP2_V2_OMOD_SIZE; + unsigned int fog_export : 1; + unsigned int write_mask : SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SIZE; + unsigned int update_pred : SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SIZE; + unsigned int update_execute_mask : SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SIZE; + unsigned int src1_abs : SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SIZE; + unsigned int src0_abs : SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SIZE; + } sq_alu_word1_op2_v1_t; + +#endif + +typedef union { + unsigned int val : 32; + sq_alu_word1_op2_v2_t f; + sq_alu_word1_op2_v1_t f6; +} sq_alu_word1_op2_v2_u; + + +/* + * SQ_ALU_WORD1_OP3 struct + */ + +#define SQ_ALU_WORD1_OP3_SRC2_SEL_SIZE 9 +#define SQ_ALU_WORD1_OP3_SRC2_REL_SIZE 1 +#define SQ_ALU_WORD1_OP3_SRC2_CHAN_SIZE 2 +#define SQ_ALU_WORD1_OP3_SRC2_NEG_SIZE 1 +#define SQ_ALU_WORD1_OP3_ALU_INST_SIZE 5 + +#define SQ_ALU_WORD1_OP3_SRC2_SEL_SHIFT 0 +#define SQ_ALU_WORD1_OP3_SRC2_REL_SHIFT 9 +#define SQ_ALU_WORD1_OP3_SRC2_CHAN_SHIFT 10 +#define SQ_ALU_WORD1_OP3_SRC2_NEG_SHIFT 12 +#define SQ_ALU_WORD1_OP3_ALU_INST_SHIFT 13 + +#define SQ_ALU_WORD1_OP3_SRC2_SEL_MASK 0x000001ff +#define SQ_ALU_WORD1_OP3_SRC2_REL_MASK 0x00000200 +#define SQ_ALU_WORD1_OP3_SRC2_CHAN_MASK 0x00000c00 +#define SQ_ALU_WORD1_OP3_SRC2_NEG_MASK 0x00001000 +#define SQ_ALU_WORD1_OP3_ALU_INST_MASK 0x0003e000 + +#define SQ_ALU_WORD1_OP3_MASK \ + (SQ_ALU_WORD1_OP3_SRC2_SEL_MASK | \ + SQ_ALU_WORD1_OP3_SRC2_REL_MASK | \ + SQ_ALU_WORD1_OP3_SRC2_CHAN_MASK | \ + SQ_ALU_WORD1_OP3_SRC2_NEG_MASK | \ + SQ_ALU_WORD1_OP3_ALU_INST_MASK) + +#define SQ_ALU_WORD1_OP3_DEFAULT 0x0001cdcd + +#define SQ_ALU_WORD1_OP3_GET_SRC2_SEL(sq_alu_word1_op3) \ + ((sq_alu_word1_op3 & SQ_ALU_WORD1_OP3_SRC2_SEL_MASK) >> SQ_ALU_WORD1_OP3_SRC2_SEL_SHIFT) +#define SQ_ALU_WORD1_OP3_GET_SRC2_REL(sq_alu_word1_op3) \ + ((sq_alu_word1_op3 & SQ_ALU_WORD1_OP3_SRC2_REL_MASK) >> SQ_ALU_WORD1_OP3_SRC2_REL_SHIFT) +#define SQ_ALU_WORD1_OP3_GET_SRC2_CHAN(sq_alu_word1_op3) \ + ((sq_alu_word1_op3 & SQ_ALU_WORD1_OP3_SRC2_CHAN_MASK) >> SQ_ALU_WORD1_OP3_SRC2_CHAN_SHIFT) +#define SQ_ALU_WORD1_OP3_GET_SRC2_NEG(sq_alu_word1_op3) \ + ((sq_alu_word1_op3 & SQ_ALU_WORD1_OP3_SRC2_NEG_MASK) >> SQ_ALU_WORD1_OP3_SRC2_NEG_SHIFT) +#define SQ_ALU_WORD1_OP3_GET_ALU_INST(sq_alu_word1_op3) \ + ((sq_alu_word1_op3 & SQ_ALU_WORD1_OP3_ALU_INST_MASK) >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) + +#define SQ_ALU_WORD1_OP3_SET_SRC2_SEL(sq_alu_word1_op3_reg, src2_sel) \ + sq_alu_word1_op3_reg = (sq_alu_word1_op3_reg & ~SQ_ALU_WORD1_OP3_SRC2_SEL_MASK) | (src2_sel << SQ_ALU_WORD1_OP3_SRC2_SEL_SHIFT) +#define SQ_ALU_WORD1_OP3_SET_SRC2_REL(sq_alu_word1_op3_reg, src2_rel) \ + sq_alu_word1_op3_reg = (sq_alu_word1_op3_reg & ~SQ_ALU_WORD1_OP3_SRC2_REL_MASK) | (src2_rel << SQ_ALU_WORD1_OP3_SRC2_REL_SHIFT) +#define SQ_ALU_WORD1_OP3_SET_SRC2_CHAN(sq_alu_word1_op3_reg, src2_chan) \ + sq_alu_word1_op3_reg = (sq_alu_word1_op3_reg & ~SQ_ALU_WORD1_OP3_SRC2_CHAN_MASK) | (src2_chan << SQ_ALU_WORD1_OP3_SRC2_CHAN_SHIFT) +#define SQ_ALU_WORD1_OP3_SET_SRC2_NEG(sq_alu_word1_op3_reg, src2_neg) \ + sq_alu_word1_op3_reg = (sq_alu_word1_op3_reg & ~SQ_ALU_WORD1_OP3_SRC2_NEG_MASK) | (src2_neg << SQ_ALU_WORD1_OP3_SRC2_NEG_SHIFT) +#define SQ_ALU_WORD1_OP3_SET_ALU_INST(sq_alu_word1_op3_reg, alu_inst) \ + sq_alu_word1_op3_reg = (sq_alu_word1_op3_reg & ~SQ_ALU_WORD1_OP3_ALU_INST_MASK) | (alu_inst << SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _sq_alu_word1_op3_t { + unsigned int src2_sel : SQ_ALU_WORD1_OP3_SRC2_SEL_SIZE; + unsigned int src2_rel : SQ_ALU_WORD1_OP3_SRC2_REL_SIZE; + unsigned int src2_chan : SQ_ALU_WORD1_OP3_SRC2_CHAN_SIZE; + unsigned int src2_neg : SQ_ALU_WORD1_OP3_SRC2_NEG_SIZE; + unsigned int alu_inst : SQ_ALU_WORD1_OP3_ALU_INST_SIZE; + unsigned int : 14; + } sq_alu_word1_op3_t; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _sq_alu_word1_op3_t { + unsigned int : 14; + unsigned int alu_inst : SQ_ALU_WORD1_OP3_ALU_INST_SIZE; + unsigned int src2_neg : SQ_ALU_WORD1_OP3_SRC2_NEG_SIZE; + unsigned int src2_chan : SQ_ALU_WORD1_OP3_SRC2_CHAN_SIZE; + unsigned int src2_rel : SQ_ALU_WORD1_OP3_SRC2_REL_SIZE; + unsigned int src2_sel : SQ_ALU_WORD1_OP3_SRC2_SEL_SIZE; + } sq_alu_word1_op3_t; + +#endif + +typedef union { + unsigned int val : 32; + sq_alu_word1_op3_t f; +} sq_alu_word1_op3_u; + + +/* + * SQ_TEX_WORD0 struct + */ + +#define SQ_TEX_WORD0_TEX_INST_SIZE 5 +#define SQ_TEX_WORD0_BC_FRAC_MODE_SIZE 1 +#define SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SIZE 1 +#define SQ_TEX_WORD0_RESOURCE_ID_SIZE 8 +#define SQ_TEX_WORD0_SRC_GPR_SIZE 7 +#define SQ_TEX_WORD0_SRC_REL_SIZE 1 +#define SQ_TEX_WORD0_ALT_CONST_SIZE 1 + +#define SQ_TEX_WORD0_TEX_INST_SHIFT 0 +#define SQ_TEX_WORD0_BC_FRAC_MODE_SHIFT 5 +#define SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SHIFT 7 +#define SQ_TEX_WORD0_RESOURCE_ID_SHIFT 8 +#define SQ_TEX_WORD0_SRC_GPR_SHIFT 16 +#define SQ_TEX_WORD0_SRC_REL_SHIFT 23 +#define SQ_TEX_WORD0_ALT_CONST_SHIFT 24 + +#define SQ_TEX_WORD0_TEX_INST_MASK 0x0000001f +#define SQ_TEX_WORD0_BC_FRAC_MODE_MASK 0x00000020 +#define SQ_TEX_WORD0_FETCH_WHOLE_QUAD_MASK 0x00000080 +#define SQ_TEX_WORD0_RESOURCE_ID_MASK 0x0000ff00 +#define SQ_TEX_WORD0_SRC_GPR_MASK 0x007f0000 +#define SQ_TEX_WORD0_SRC_REL_MASK 0x00800000 +#define SQ_TEX_WORD0_ALT_CONST_MASK 0x01000000 + +#define SQ_TEX_WORD0_MASK \ + (SQ_TEX_WORD0_TEX_INST_MASK | \ + SQ_TEX_WORD0_BC_FRAC_MODE_MASK | \ + SQ_TEX_WORD0_FETCH_WHOLE_QUAD_MASK | \ + SQ_TEX_WORD0_RESOURCE_ID_MASK | \ + SQ_TEX_WORD0_SRC_GPR_MASK | \ + SQ_TEX_WORD0_SRC_REL_MASK | \ + SQ_TEX_WORD0_ALT_CONST_MASK) + +#define SQ_TEX_WORD0_DEFAULT 0x01cdcd8d + +#define SQ_TEX_WORD0_GET_TEX_INST(sq_tex_word0) \ + ((sq_tex_word0 & SQ_TEX_WORD0_TEX_INST_MASK) >> SQ_TEX_WORD0_TEX_INST_SHIFT) +#define SQ_TEX_WORD0_GET_BC_FRAC_MODE(sq_tex_word0) \ + ((sq_tex_word0 & SQ_TEX_WORD0_BC_FRAC_MODE_MASK) >> SQ_TEX_WORD0_BC_FRAC_MODE_SHIFT) +#define SQ_TEX_WORD0_GET_FETCH_WHOLE_QUAD(sq_tex_word0) \ + ((sq_tex_word0 & SQ_TEX_WORD0_FETCH_WHOLE_QUAD_MASK) >> SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SHIFT) +#define SQ_TEX_WORD0_GET_RESOURCE_ID(sq_tex_word0) \ + ((sq_tex_word0 & SQ_TEX_WORD0_RESOURCE_ID_MASK) >> SQ_TEX_WORD0_RESOURCE_ID_SHIFT) +#define SQ_TEX_WORD0_GET_SRC_GPR(sq_tex_word0) \ + ((sq_tex_word0 & SQ_TEX_WORD0_SRC_GPR_MASK) >> SQ_TEX_WORD0_SRC_GPR_SHIFT) +#define SQ_TEX_WORD0_GET_SRC_REL(sq_tex_word0) \ + ((sq_tex_word0 & SQ_TEX_WORD0_SRC_REL_MASK) >> SQ_TEX_WORD0_SRC_REL_SHIFT) +#define SQ_TEX_WORD0_GET_ALT_CONST(sq_tex_word0) \ + ((sq_tex_word0 & SQ_TEX_WORD0_ALT_CONST_MASK) >> SQ_TEX_WORD0_ALT_CONST_SHIFT) + +#define SQ_TEX_WORD0_SET_TEX_INST(sq_tex_word0_reg, tex_inst) \ + sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_TEX_INST_MASK) | (tex_inst << SQ_TEX_WORD0_TEX_INST_SHIFT) +#define SQ_TEX_WORD0_SET_BC_FRAC_MODE(sq_tex_word0_reg, bc_frac_mode) \ + sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_BC_FRAC_MODE_MASK) | (bc_frac_mode << SQ_TEX_WORD0_BC_FRAC_MODE_SHIFT) +#define SQ_TEX_WORD0_SET_FETCH_WHOLE_QUAD(sq_tex_word0_reg, fetch_whole_quad) \ + sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_FETCH_WHOLE_QUAD_MASK) | (fetch_whole_quad << SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SHIFT) +#define SQ_TEX_WORD0_SET_RESOURCE_ID(sq_tex_word0_reg, resource_id) \ + sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_RESOURCE_ID_MASK) | (resource_id << SQ_TEX_WORD0_RESOURCE_ID_SHIFT) +#define SQ_TEX_WORD0_SET_SRC_GPR(sq_tex_word0_reg, src_gpr) \ + sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_SRC_GPR_MASK) | (src_gpr << SQ_TEX_WORD0_SRC_GPR_SHIFT) +#define SQ_TEX_WORD0_SET_SRC_REL(sq_tex_word0_reg, src_rel) \ + sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_SRC_REL_MASK) | (src_rel << SQ_TEX_WORD0_SRC_REL_SHIFT) +#define SQ_TEX_WORD0_SET_ALT_CONST(sq_tex_word0_reg, alt_const) \ + sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_ALT_CONST_MASK) | (alt_const << SQ_TEX_WORD0_ALT_CONST_SHIFT) + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _sq_tex_word0_t { + unsigned int tex_inst : SQ_TEX_WORD0_TEX_INST_SIZE; + unsigned int bc_frac_mode : SQ_TEX_WORD0_BC_FRAC_MODE_SIZE; + unsigned int : 1; + unsigned int fetch_whole_quad : SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SIZE; + unsigned int resource_id : SQ_TEX_WORD0_RESOURCE_ID_SIZE; + unsigned int src_gpr : SQ_TEX_WORD0_SRC_GPR_SIZE; + unsigned int src_rel : SQ_TEX_WORD0_SRC_REL_SIZE; + unsigned int alt_const : SQ_TEX_WORD0_ALT_CONST_SIZE; + unsigned int : 7; + } sq_tex_word0_t; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _sq_tex_word0_t { + unsigned int : 7; + unsigned int alt_const : SQ_TEX_WORD0_ALT_CONST_SIZE; + unsigned int src_rel : SQ_TEX_WORD0_SRC_REL_SIZE; + unsigned int src_gpr : SQ_TEX_WORD0_SRC_GPR_SIZE; + unsigned int resource_id : SQ_TEX_WORD0_RESOURCE_ID_SIZE; + unsigned int fetch_whole_quad : SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SIZE; + unsigned int : 1; + unsigned int bc_frac_mode : SQ_TEX_WORD0_BC_FRAC_MODE_SIZE; + unsigned int tex_inst : SQ_TEX_WORD0_TEX_INST_SIZE; + } sq_tex_word0_t; + +#endif + +typedef union { + unsigned int val : 32; + sq_tex_word0_t f; +} sq_tex_word0_u; + + +/* + * SQ_TEX_WORD1 struct + */ + +#define SQ_TEX_WORD1_DST_GPR_SIZE 7 +#define SQ_TEX_WORD1_DST_REL_SIZE 1 +#define SQ_TEX_WORD1_DST_SEL_X_SIZE 3 +#define SQ_TEX_WORD1_DST_SEL_Y_SIZE 3 +#define SQ_TEX_WORD1_DST_SEL_Z_SIZE 3 +#define SQ_TEX_WORD1_DST_SEL_W_SIZE 3 +#define SQ_TEX_WORD1_LOD_BIAS_SIZE 7 +#define SQ_TEX_WORD1_COORD_TYPE_X_SIZE 1 +#define SQ_TEX_WORD1_COORD_TYPE_Y_SIZE 1 +#define SQ_TEX_WORD1_COORD_TYPE_Z_SIZE 1 +#define SQ_TEX_WORD1_COORD_TYPE_W_SIZE 1 + +#define SQ_TEX_WORD1_DST_GPR_SHIFT 0 +#define SQ_TEX_WORD1_DST_REL_SHIFT 7 +#define SQ_TEX_WORD1_DST_SEL_X_SHIFT 9 +#define SQ_TEX_WORD1_DST_SEL_Y_SHIFT 12 +#define SQ_TEX_WORD1_DST_SEL_Z_SHIFT 15 +#define SQ_TEX_WORD1_DST_SEL_W_SHIFT 18 +#define SQ_TEX_WORD1_LOD_BIAS_SHIFT 21 +#define SQ_TEX_WORD1_COORD_TYPE_X_SHIFT 28 +#define SQ_TEX_WORD1_COORD_TYPE_Y_SHIFT 29 +#define SQ_TEX_WORD1_COORD_TYPE_Z_SHIFT 30 +#define SQ_TEX_WORD1_COORD_TYPE_W_SHIFT 31 + +#define SQ_TEX_WORD1_DST_GPR_MASK 0x0000007f +#define SQ_TEX_WORD1_DST_REL_MASK 0x00000080 +#define SQ_TEX_WORD1_DST_SEL_X_MASK 0x00000e00 +#define SQ_TEX_WORD1_DST_SEL_Y_MASK 0x00007000 +#define SQ_TEX_WORD1_DST_SEL_Z_MASK 0x00038000 +#define SQ_TEX_WORD1_DST_SEL_W_MASK 0x001c0000 +#define SQ_TEX_WORD1_LOD_BIAS_MASK 0x0fe00000 +#define SQ_TEX_WORD1_COORD_TYPE_X_MASK 0x10000000 +#define SQ_TEX_WORD1_COORD_TYPE_Y_MASK 0x20000000 +#define SQ_TEX_WORD1_COORD_TYPE_Z_MASK 0x40000000 +#define SQ_TEX_WORD1_COORD_TYPE_W_MASK 0x80000000 + +#define SQ_TEX_WORD1_MASK \ + (SQ_TEX_WORD1_DST_GPR_MASK | \ + SQ_TEX_WORD1_DST_REL_MASK | \ + SQ_TEX_WORD1_DST_SEL_X_MASK | \ + SQ_TEX_WORD1_DST_SEL_Y_MASK | \ + SQ_TEX_WORD1_DST_SEL_Z_MASK | \ + SQ_TEX_WORD1_DST_SEL_W_MASK | \ + SQ_TEX_WORD1_LOD_BIAS_MASK | \ + SQ_TEX_WORD1_COORD_TYPE_X_MASK | \ + SQ_TEX_WORD1_COORD_TYPE_Y_MASK | \ + SQ_TEX_WORD1_COORD_TYPE_Z_MASK | \ + SQ_TEX_WORD1_COORD_TYPE_W_MASK) + +#define SQ_TEX_WORD1_DEFAULT 0xcdcdcccd + +#define SQ_TEX_WORD1_GET_DST_GPR(sq_tex_word1) \ + ((sq_tex_word1 & SQ_TEX_WORD1_DST_GPR_MASK) >> SQ_TEX_WORD1_DST_GPR_SHIFT) +#define SQ_TEX_WORD1_GET_DST_REL(sq_tex_word1) \ + ((sq_tex_word1 & SQ_TEX_WORD1_DST_REL_MASK) >> SQ_TEX_WORD1_DST_REL_SHIFT) +#define SQ_TEX_WORD1_GET_DST_SEL_X(sq_tex_word1) \ + ((sq_tex_word1 & SQ_TEX_WORD1_DST_SEL_X_MASK) >> SQ_TEX_WORD1_DST_SEL_X_SHIFT) +#define SQ_TEX_WORD1_GET_DST_SEL_Y(sq_tex_word1) \ + ((sq_tex_word1 & SQ_TEX_WORD1_DST_SEL_Y_MASK) >> SQ_TEX_WORD1_DST_SEL_Y_SHIFT) +#define SQ_TEX_WORD1_GET_DST_SEL_Z(sq_tex_word1) \ + ((sq_tex_word1 & SQ_TEX_WORD1_DST_SEL_Z_MASK) >> SQ_TEX_WORD1_DST_SEL_Z_SHIFT) +#define SQ_TEX_WORD1_GET_DST_SEL_W(sq_tex_word1) \ + ((sq_tex_word1 & SQ_TEX_WORD1_DST_SEL_W_MASK) >> SQ_TEX_WORD1_DST_SEL_W_SHIFT) +#define SQ_TEX_WORD1_GET_LOD_BIAS(sq_tex_word1) \ + ((sq_tex_word1 & SQ_TEX_WORD1_LOD_BIAS_MASK) >> SQ_TEX_WORD1_LOD_BIAS_SHIFT) +#define SQ_TEX_WORD1_GET_COORD_TYPE_X(sq_tex_word1) \ + ((sq_tex_word1 & SQ_TEX_WORD1_COORD_TYPE_X_MASK) >> SQ_TEX_WORD1_COORD_TYPE_X_SHIFT) +#define SQ_TEX_WORD1_GET_COORD_TYPE_Y(sq_tex_word1) \ + ((sq_tex_word1 & SQ_TEX_WORD1_COORD_TYPE_Y_MASK) >> SQ_TEX_WORD1_COORD_TYPE_Y_SHIFT) +#define SQ_TEX_WORD1_GET_COORD_TYPE_Z(sq_tex_word1) \ + ((sq_tex_word1 & SQ_TEX_WORD1_COORD_TYPE_Z_MASK) >> SQ_TEX_WORD1_COORD_TYPE_Z_SHIFT) +#define SQ_TEX_WORD1_GET_COORD_TYPE_W(sq_tex_word1) \ + ((sq_tex_word1 & SQ_TEX_WORD1_COORD_TYPE_W_MASK) >> SQ_TEX_WORD1_COORD_TYPE_W_SHIFT) + +#define SQ_TEX_WORD1_SET_DST_GPR(sq_tex_word1_reg, dst_gpr) \ + sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_GPR_MASK) | (dst_gpr << SQ_TEX_WORD1_DST_GPR_SHIFT) +#define SQ_TEX_WORD1_SET_DST_REL(sq_tex_word1_reg, dst_rel) \ + sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_REL_MASK) | (dst_rel << SQ_TEX_WORD1_DST_REL_SHIFT) +#define SQ_TEX_WORD1_SET_DST_SEL_X(sq_tex_word1_reg, dst_sel_x) \ + sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_SEL_X_MASK) | (dst_sel_x << SQ_TEX_WORD1_DST_SEL_X_SHIFT) +#define SQ_TEX_WORD1_SET_DST_SEL_Y(sq_tex_word1_reg, dst_sel_y) \ + sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_SEL_Y_MASK) | (dst_sel_y << SQ_TEX_WORD1_DST_SEL_Y_SHIFT) +#define SQ_TEX_WORD1_SET_DST_SEL_Z(sq_tex_word1_reg, dst_sel_z) \ + sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_SEL_Z_MASK) | (dst_sel_z << SQ_TEX_WORD1_DST_SEL_Z_SHIFT) +#define SQ_TEX_WORD1_SET_DST_SEL_W(sq_tex_word1_reg, dst_sel_w) \ + sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_SEL_W_MASK) | (dst_sel_w << SQ_TEX_WORD1_DST_SEL_W_SHIFT) +#define SQ_TEX_WORD1_SET_LOD_BIAS(sq_tex_word1_reg, lod_bias) \ + sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_LOD_BIAS_MASK) | (lod_bias << SQ_TEX_WORD1_LOD_BIAS_SHIFT) +#define SQ_TEX_WORD1_SET_COORD_TYPE_X(sq_tex_word1_reg, coord_type_x) \ + sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_COORD_TYPE_X_MASK) | (coord_type_x << SQ_TEX_WORD1_COORD_TYPE_X_SHIFT) +#define SQ_TEX_WORD1_SET_COORD_TYPE_Y(sq_tex_word1_reg, coord_type_y) \ + sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_COORD_TYPE_Y_MASK) | (coord_type_y << SQ_TEX_WORD1_COORD_TYPE_Y_SHIFT) +#define SQ_TEX_WORD1_SET_COORD_TYPE_Z(sq_tex_word1_reg, coord_type_z) \ + sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_COORD_TYPE_Z_MASK) | (coord_type_z << SQ_TEX_WORD1_COORD_TYPE_Z_SHIFT) +#define SQ_TEX_WORD1_SET_COORD_TYPE_W(sq_tex_word1_reg, coord_type_w) \ + sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_COORD_TYPE_W_MASK) | (coord_type_w << SQ_TEX_WORD1_COORD_TYPE_W_SHIFT) + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _sq_tex_word1_t { + unsigned int dst_gpr : SQ_TEX_WORD1_DST_GPR_SIZE; + unsigned int dst_rel : SQ_TEX_WORD1_DST_REL_SIZE; + unsigned int : 1; + unsigned int dst_sel_x : SQ_TEX_WORD1_DST_SEL_X_SIZE; + unsigned int dst_sel_y : SQ_TEX_WORD1_DST_SEL_Y_SIZE; + unsigned int dst_sel_z : SQ_TEX_WORD1_DST_SEL_Z_SIZE; + unsigned int dst_sel_w : SQ_TEX_WORD1_DST_SEL_W_SIZE; + unsigned int lod_bias : SQ_TEX_WORD1_LOD_BIAS_SIZE; + unsigned int coord_type_x : SQ_TEX_WORD1_COORD_TYPE_X_SIZE; + unsigned int coord_type_y : SQ_TEX_WORD1_COORD_TYPE_Y_SIZE; + unsigned int coord_type_z : SQ_TEX_WORD1_COORD_TYPE_Z_SIZE; + unsigned int coord_type_w : SQ_TEX_WORD1_COORD_TYPE_W_SIZE; + } sq_tex_word1_t; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _sq_tex_word1_t { + unsigned int coord_type_w : SQ_TEX_WORD1_COORD_TYPE_W_SIZE; + unsigned int coord_type_z : SQ_TEX_WORD1_COORD_TYPE_Z_SIZE; + unsigned int coord_type_y : SQ_TEX_WORD1_COORD_TYPE_Y_SIZE; + unsigned int coord_type_x : SQ_TEX_WORD1_COORD_TYPE_X_SIZE; + unsigned int lod_bias : SQ_TEX_WORD1_LOD_BIAS_SIZE; + unsigned int dst_sel_w : SQ_TEX_WORD1_DST_SEL_W_SIZE; + unsigned int dst_sel_z : SQ_TEX_WORD1_DST_SEL_Z_SIZE; + unsigned int dst_sel_y : SQ_TEX_WORD1_DST_SEL_Y_SIZE; + unsigned int dst_sel_x : SQ_TEX_WORD1_DST_SEL_X_SIZE; + unsigned int : 1; + unsigned int dst_rel : SQ_TEX_WORD1_DST_REL_SIZE; + unsigned int dst_gpr : SQ_TEX_WORD1_DST_GPR_SIZE; + } sq_tex_word1_t; + +#endif + +typedef union { + unsigned int val : 32; + sq_tex_word1_t f; +} sq_tex_word1_u; + + +/* + * SQ_TEX_WORD2 struct + */ + +#define SQ_TEX_WORD2_OFFSET_X_SIZE 5 +#define SQ_TEX_WORD2_OFFSET_Y_SIZE 5 +#define SQ_TEX_WORD2_OFFSET_Z_SIZE 5 +#define SQ_TEX_WORD2_SAMPLER_ID_SIZE 5 +#define SQ_TEX_WORD2_SRC_SEL_X_SIZE 3 +#define SQ_TEX_WORD2_SRC_SEL_Y_SIZE 3 +#define SQ_TEX_WORD2_SRC_SEL_Z_SIZE 3 +#define SQ_TEX_WORD2_SRC_SEL_W_SIZE 3 + +#define SQ_TEX_WORD2_OFFSET_X_SHIFT 0 +#define SQ_TEX_WORD2_OFFSET_Y_SHIFT 5 +#define SQ_TEX_WORD2_OFFSET_Z_SHIFT 10 +#define SQ_TEX_WORD2_SAMPLER_ID_SHIFT 15 +#define SQ_TEX_WORD2_SRC_SEL_X_SHIFT 20 +#define SQ_TEX_WORD2_SRC_SEL_Y_SHIFT 23 +#define SQ_TEX_WORD2_SRC_SEL_Z_SHIFT 26 +#define SQ_TEX_WORD2_SRC_SEL_W_SHIFT 29 + +#define SQ_TEX_WORD2_OFFSET_X_MASK 0x0000001f +#define SQ_TEX_WORD2_OFFSET_Y_MASK 0x000003e0 +#define SQ_TEX_WORD2_OFFSET_Z_MASK 0x00007c00 +#define SQ_TEX_WORD2_SAMPLER_ID_MASK 0x000f8000 +#define SQ_TEX_WORD2_SRC_SEL_X_MASK 0x00700000 +#define SQ_TEX_WORD2_SRC_SEL_Y_MASK 0x03800000 +#define SQ_TEX_WORD2_SRC_SEL_Z_MASK 0x1c000000 +#define SQ_TEX_WORD2_SRC_SEL_W_MASK 0xe0000000 + +#define SQ_TEX_WORD2_MASK \ + (SQ_TEX_WORD2_OFFSET_X_MASK | \ + SQ_TEX_WORD2_OFFSET_Y_MASK | \ + SQ_TEX_WORD2_OFFSET_Z_MASK | \ + SQ_TEX_WORD2_SAMPLER_ID_MASK | \ + SQ_TEX_WORD2_SRC_SEL_X_MASK | \ + SQ_TEX_WORD2_SRC_SEL_Y_MASK | \ + SQ_TEX_WORD2_SRC_SEL_Z_MASK | \ + SQ_TEX_WORD2_SRC_SEL_W_MASK) + +#define SQ_TEX_WORD2_DEFAULT 0xcdcdcdcd + +#define SQ_TEX_WORD2_GET_OFFSET_X(sq_tex_word2) \ + ((sq_tex_word2 & SQ_TEX_WORD2_OFFSET_X_MASK) >> SQ_TEX_WORD2_OFFSET_X_SHIFT) +#define SQ_TEX_WORD2_GET_OFFSET_Y(sq_tex_word2) \ + ((sq_tex_word2 & SQ_TEX_WORD2_OFFSET_Y_MASK) >> SQ_TEX_WORD2_OFFSET_Y_SHIFT) +#define SQ_TEX_WORD2_GET_OFFSET_Z(sq_tex_word2) \ + ((sq_tex_word2 & SQ_TEX_WORD2_OFFSET_Z_MASK) >> SQ_TEX_WORD2_OFFSET_Z_SHIFT) +#define SQ_TEX_WORD2_GET_SAMPLER_ID(sq_tex_word2) \ + ((sq_tex_word2 & SQ_TEX_WORD2_SAMPLER_ID_MASK) >> SQ_TEX_WORD2_SAMPLER_ID_SHIFT) +#define SQ_TEX_WORD2_GET_SRC_SEL_X(sq_tex_word2) \ + ((sq_tex_word2 & SQ_TEX_WORD2_SRC_SEL_X_MASK) >> SQ_TEX_WORD2_SRC_SEL_X_SHIFT) +#define SQ_TEX_WORD2_GET_SRC_SEL_Y(sq_tex_word2) \ + ((sq_tex_word2 & SQ_TEX_WORD2_SRC_SEL_Y_MASK) >> SQ_TEX_WORD2_SRC_SEL_Y_SHIFT) +#define SQ_TEX_WORD2_GET_SRC_SEL_Z(sq_tex_word2) \ + ((sq_tex_word2 & SQ_TEX_WORD2_SRC_SEL_Z_MASK) >> SQ_TEX_WORD2_SRC_SEL_Z_SHIFT) +#define SQ_TEX_WORD2_GET_SRC_SEL_W(sq_tex_word2) \ + ((sq_tex_word2 & SQ_TEX_WORD2_SRC_SEL_W_MASK) >> SQ_TEX_WORD2_SRC_SEL_W_SHIFT) + +#define SQ_TEX_WORD2_SET_OFFSET_X(sq_tex_word2_reg, offset_x) \ + sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_OFFSET_X_MASK) | (offset_x << SQ_TEX_WORD2_OFFSET_X_SHIFT) +#define SQ_TEX_WORD2_SET_OFFSET_Y(sq_tex_word2_reg, offset_y) \ + sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_OFFSET_Y_MASK) | (offset_y << SQ_TEX_WORD2_OFFSET_Y_SHIFT) +#define SQ_TEX_WORD2_SET_OFFSET_Z(sq_tex_word2_reg, offset_z) \ + sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_OFFSET_Z_MASK) | (offset_z << SQ_TEX_WORD2_OFFSET_Z_SHIFT) +#define SQ_TEX_WORD2_SET_SAMPLER_ID(sq_tex_word2_reg, sampler_id) \ + sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_SAMPLER_ID_MASK) | (sampler_id << SQ_TEX_WORD2_SAMPLER_ID_SHIFT) +#define SQ_TEX_WORD2_SET_SRC_SEL_X(sq_tex_word2_reg, src_sel_x) \ + sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_SRC_SEL_X_MASK) | (src_sel_x << SQ_TEX_WORD2_SRC_SEL_X_SHIFT) +#define SQ_TEX_WORD2_SET_SRC_SEL_Y(sq_tex_word2_reg, src_sel_y) \ + sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_SRC_SEL_Y_MASK) | (src_sel_y << SQ_TEX_WORD2_SRC_SEL_Y_SHIFT) +#define SQ_TEX_WORD2_SET_SRC_SEL_Z(sq_tex_word2_reg, src_sel_z) \ + sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_SRC_SEL_Z_MASK) | (src_sel_z << SQ_TEX_WORD2_SRC_SEL_Z_SHIFT) +#define SQ_TEX_WORD2_SET_SRC_SEL_W(sq_tex_word2_reg, src_sel_w) \ + sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_SRC_SEL_W_MASK) | (src_sel_w << SQ_TEX_WORD2_SRC_SEL_W_SHIFT) + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _sq_tex_word2_t { + unsigned int offset_x : SQ_TEX_WORD2_OFFSET_X_SIZE; + unsigned int offset_y : SQ_TEX_WORD2_OFFSET_Y_SIZE; + unsigned int offset_z : SQ_TEX_WORD2_OFFSET_Z_SIZE; + unsigned int sampler_id : SQ_TEX_WORD2_SAMPLER_ID_SIZE; + unsigned int src_sel_x : SQ_TEX_WORD2_SRC_SEL_X_SIZE; + unsigned int src_sel_y : SQ_TEX_WORD2_SRC_SEL_Y_SIZE; + unsigned int src_sel_z : SQ_TEX_WORD2_SRC_SEL_Z_SIZE; + unsigned int src_sel_w : SQ_TEX_WORD2_SRC_SEL_W_SIZE; + } sq_tex_word2_t; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _sq_tex_word2_t { + unsigned int src_sel_w : SQ_TEX_WORD2_SRC_SEL_W_SIZE; + unsigned int src_sel_z : SQ_TEX_WORD2_SRC_SEL_Z_SIZE; + unsigned int src_sel_y : SQ_TEX_WORD2_SRC_SEL_Y_SIZE; + unsigned int src_sel_x : SQ_TEX_WORD2_SRC_SEL_X_SIZE; + unsigned int sampler_id : SQ_TEX_WORD2_SAMPLER_ID_SIZE; + unsigned int offset_z : SQ_TEX_WORD2_OFFSET_Z_SIZE; + unsigned int offset_y : SQ_TEX_WORD2_OFFSET_Y_SIZE; + unsigned int offset_x : SQ_TEX_WORD2_OFFSET_X_SIZE; + } sq_tex_word2_t; + +#endif + +typedef union { + unsigned int val : 32; + sq_tex_word2_t f; +} sq_tex_word2_u; + + +/* + * SQ_VTX_WORD0 struct + */ + +#define SQ_VTX_WORD0_VTX_INST_SIZE 5 +#define SQ_VTX_WORD0_FETCH_TYPE_SIZE 2 +#define SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SIZE 1 +#define SQ_VTX_WORD0_BUFFER_ID_SIZE 8 +#define SQ_VTX_WORD0_SRC_GPR_SIZE 7 +#define SQ_VTX_WORD0_SRC_REL_SIZE 1 +#define SQ_VTX_WORD0_SRC_SEL_X_SIZE 2 +#define SQ_VTX_WORD0_MEGA_FETCH_COUNT_SIZE 6 + +#define SQ_VTX_WORD0_VTX_INST_SHIFT 0 +#define SQ_VTX_WORD0_FETCH_TYPE_SHIFT 5 +#define SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SHIFT 7 +#define SQ_VTX_WORD0_BUFFER_ID_SHIFT 8 +#define SQ_VTX_WORD0_SRC_GPR_SHIFT 16 +#define SQ_VTX_WORD0_SRC_REL_SHIFT 23 +#define SQ_VTX_WORD0_SRC_SEL_X_SHIFT 24 +#define SQ_VTX_WORD0_MEGA_FETCH_COUNT_SHIFT 26 + +#define SQ_VTX_WORD0_VTX_INST_MASK 0x0000001f +#define SQ_VTX_WORD0_FETCH_TYPE_MASK 0x00000060 +#define SQ_VTX_WORD0_FETCH_WHOLE_QUAD_MASK 0x00000080 +#define SQ_VTX_WORD0_BUFFER_ID_MASK 0x0000ff00 +#define SQ_VTX_WORD0_SRC_GPR_MASK 0x007f0000 +#define SQ_VTX_WORD0_SRC_REL_MASK 0x00800000 +#define SQ_VTX_WORD0_SRC_SEL_X_MASK 0x03000000 +#define SQ_VTX_WORD0_MEGA_FETCH_COUNT_MASK 0xfc000000 + +#define SQ_VTX_WORD0_MASK \ + (SQ_VTX_WORD0_VTX_INST_MASK | \ + SQ_VTX_WORD0_FETCH_TYPE_MASK | \ + SQ_VTX_WORD0_FETCH_WHOLE_QUAD_MASK | \ + SQ_VTX_WORD0_BUFFER_ID_MASK | \ + SQ_VTX_WORD0_SRC_GPR_MASK | \ + SQ_VTX_WORD0_SRC_REL_MASK | \ + SQ_VTX_WORD0_SRC_SEL_X_MASK | \ + SQ_VTX_WORD0_MEGA_FETCH_COUNT_MASK) + +#define SQ_VTX_WORD0_DEFAULT 0xcdcdcdcd + +#define SQ_VTX_WORD0_GET_VTX_INST(sq_vtx_word0) \ + ((sq_vtx_word0 & SQ_VTX_WORD0_VTX_INST_MASK) >> SQ_VTX_WORD0_VTX_INST_SHIFT) +#define SQ_VTX_WORD0_GET_FETCH_TYPE(sq_vtx_word0) \ + ((sq_vtx_word0 & SQ_VTX_WORD0_FETCH_TYPE_MASK) >> SQ_VTX_WORD0_FETCH_TYPE_SHIFT) +#define SQ_VTX_WORD0_GET_FETCH_WHOLE_QUAD(sq_vtx_word0) \ + ((sq_vtx_word0 & SQ_VTX_WORD0_FETCH_WHOLE_QUAD_MASK) >> SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SHIFT) +#define SQ_VTX_WORD0_GET_BUFFER_ID(sq_vtx_word0) \ + ((sq_vtx_word0 & SQ_VTX_WORD0_BUFFER_ID_MASK) >> SQ_VTX_WORD0_BUFFER_ID_SHIFT) +#define SQ_VTX_WORD0_GET_SRC_GPR(sq_vtx_word0) \ + ((sq_vtx_word0 & SQ_VTX_WORD0_SRC_GPR_MASK) >> SQ_VTX_WORD0_SRC_GPR_SHIFT) +#define SQ_VTX_WORD0_GET_SRC_REL(sq_vtx_word0) \ + ((sq_vtx_word0 & SQ_VTX_WORD0_SRC_REL_MASK) >> SQ_VTX_WORD0_SRC_REL_SHIFT) +#define SQ_VTX_WORD0_GET_SRC_SEL_X(sq_vtx_word0) \ + ((sq_vtx_word0 & SQ_VTX_WORD0_SRC_SEL_X_MASK) >> SQ_VTX_WORD0_SRC_SEL_X_SHIFT) +#define SQ_VTX_WORD0_GET_MEGA_FETCH_COUNT(sq_vtx_word0) \ + ((sq_vtx_word0 & SQ_VTX_WORD0_MEGA_FETCH_COUNT_MASK) >> SQ_VTX_WORD0_MEGA_FETCH_COUNT_SHIFT) + +#define SQ_VTX_WORD0_SET_VTX_INST(sq_vtx_word0_reg, vtx_inst) \ + sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_VTX_INST_MASK) | (vtx_inst << SQ_VTX_WORD0_VTX_INST_SHIFT) +#define SQ_VTX_WORD0_SET_FETCH_TYPE(sq_vtx_word0_reg, fetch_type) \ + sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_FETCH_TYPE_MASK) | (fetch_type << SQ_VTX_WORD0_FETCH_TYPE_SHIFT) +#define SQ_VTX_WORD0_SET_FETCH_WHOLE_QUAD(sq_vtx_word0_reg, fetch_whole_quad) \ + sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_FETCH_WHOLE_QUAD_MASK) | (fetch_whole_quad << SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SHIFT) +#define SQ_VTX_WORD0_SET_BUFFER_ID(sq_vtx_word0_reg, buffer_id) \ + sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_BUFFER_ID_MASK) | (buffer_id << SQ_VTX_WORD0_BUFFER_ID_SHIFT) +#define SQ_VTX_WORD0_SET_SRC_GPR(sq_vtx_word0_reg, src_gpr) \ + sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_SRC_GPR_MASK) | (src_gpr << SQ_VTX_WORD0_SRC_GPR_SHIFT) +#define SQ_VTX_WORD0_SET_SRC_REL(sq_vtx_word0_reg, src_rel) \ + sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_SRC_REL_MASK) | (src_rel << SQ_VTX_WORD0_SRC_REL_SHIFT) +#define SQ_VTX_WORD0_SET_SRC_SEL_X(sq_vtx_word0_reg, src_sel_x) \ + sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_SRC_SEL_X_MASK) | (src_sel_x << SQ_VTX_WORD0_SRC_SEL_X_SHIFT) +#define SQ_VTX_WORD0_SET_MEGA_FETCH_COUNT(sq_vtx_word0_reg, mega_fetch_count) \ + sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_MEGA_FETCH_COUNT_MASK) | (mega_fetch_count << SQ_VTX_WORD0_MEGA_FETCH_COUNT_SHIFT) + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _sq_vtx_word0_t { + unsigned int vtx_inst : SQ_VTX_WORD0_VTX_INST_SIZE; + unsigned int fetch_type : SQ_VTX_WORD0_FETCH_TYPE_SIZE; + unsigned int fetch_whole_quad : SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SIZE; + unsigned int buffer_id : SQ_VTX_WORD0_BUFFER_ID_SIZE; + unsigned int src_gpr : SQ_VTX_WORD0_SRC_GPR_SIZE; + unsigned int src_rel : SQ_VTX_WORD0_SRC_REL_SIZE; + unsigned int src_sel_x : SQ_VTX_WORD0_SRC_SEL_X_SIZE; + unsigned int mega_fetch_count : SQ_VTX_WORD0_MEGA_FETCH_COUNT_SIZE; + } sq_vtx_word0_t; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _sq_vtx_word0_t { + unsigned int mega_fetch_count : SQ_VTX_WORD0_MEGA_FETCH_COUNT_SIZE; + unsigned int src_sel_x : SQ_VTX_WORD0_SRC_SEL_X_SIZE; + unsigned int src_rel : SQ_VTX_WORD0_SRC_REL_SIZE; + unsigned int src_gpr : SQ_VTX_WORD0_SRC_GPR_SIZE; + unsigned int buffer_id : SQ_VTX_WORD0_BUFFER_ID_SIZE; + unsigned int fetch_whole_quad : SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SIZE; + unsigned int fetch_type : SQ_VTX_WORD0_FETCH_TYPE_SIZE; + unsigned int vtx_inst : SQ_VTX_WORD0_VTX_INST_SIZE; + } sq_vtx_word0_t; + +#endif + +typedef union { + unsigned int val : 32; + sq_vtx_word0_t f; +} sq_vtx_word0_u; + + +/* + * SQ_VTX_WORD1 struct + */ + +#define SQ_VTX_WORD1_DST_SEL_X_SIZE 3 +#define SQ_VTX_WORD1_DST_SEL_Y_SIZE 3 +#define SQ_VTX_WORD1_DST_SEL_Z_SIZE 3 +#define SQ_VTX_WORD1_DST_SEL_W_SIZE 3 +#define SQ_VTX_WORD1_USE_CONST_FIELDS_SIZE 1 +#define SQ_VTX_WORD1_DATA_FORMAT_SIZE 6 +#define SQ_VTX_WORD1_NUM_FORMAT_ALL_SIZE 2 +#define SQ_VTX_WORD1_FORMAT_COMP_ALL_SIZE 1 +#define SQ_VTX_WORD1_SRF_MODE_ALL_SIZE 1 + +#define SQ_VTX_WORD1_DST_SEL_X_SHIFT 9 +#define SQ_VTX_WORD1_DST_SEL_Y_SHIFT 12 +#define SQ_VTX_WORD1_DST_SEL_Z_SHIFT 15 +#define SQ_VTX_WORD1_DST_SEL_W_SHIFT 18 +#define SQ_VTX_WORD1_USE_CONST_FIELDS_SHIFT 21 +#define SQ_VTX_WORD1_DATA_FORMAT_SHIFT 22 +#define SQ_VTX_WORD1_NUM_FORMAT_ALL_SHIFT 28 +#define SQ_VTX_WORD1_FORMAT_COMP_ALL_SHIFT 30 +#define SQ_VTX_WORD1_SRF_MODE_ALL_SHIFT 31 + +#define SQ_VTX_WORD1_DST_SEL_X_MASK 0x00000e00 +#define SQ_VTX_WORD1_DST_SEL_Y_MASK 0x00007000 +#define SQ_VTX_WORD1_DST_SEL_Z_MASK 0x00038000 +#define SQ_VTX_WORD1_DST_SEL_W_MASK 0x001c0000 +#define SQ_VTX_WORD1_USE_CONST_FIELDS_MASK 0x00200000 +#define SQ_VTX_WORD1_DATA_FORMAT_MASK 0x0fc00000 +#define SQ_VTX_WORD1_NUM_FORMAT_ALL_MASK 0x30000000 +#define SQ_VTX_WORD1_FORMAT_COMP_ALL_MASK 0x40000000 +#define SQ_VTX_WORD1_SRF_MODE_ALL_MASK 0x80000000 + +#define SQ_VTX_WORD1_MASK \ + (SQ_VTX_WORD1_DST_SEL_X_MASK | \ + SQ_VTX_WORD1_DST_SEL_Y_MASK | \ + SQ_VTX_WORD1_DST_SEL_Z_MASK | \ + SQ_VTX_WORD1_DST_SEL_W_MASK | \ + SQ_VTX_WORD1_USE_CONST_FIELDS_MASK | \ + SQ_VTX_WORD1_DATA_FORMAT_MASK | \ + SQ_VTX_WORD1_NUM_FORMAT_ALL_MASK | \ + SQ_VTX_WORD1_FORMAT_COMP_ALL_MASK | \ + SQ_VTX_WORD1_SRF_MODE_ALL_MASK) + +#define SQ_VTX_WORD1_DEFAULT 0xcdcdcc00 + +#define SQ_VTX_WORD1_GET_DST_SEL_X(sq_vtx_word1) \ + ((sq_vtx_word1 & SQ_VTX_WORD1_DST_SEL_X_MASK) >> SQ_VTX_WORD1_DST_SEL_X_SHIFT) +#define SQ_VTX_WORD1_GET_DST_SEL_Y(sq_vtx_word1) \ + ((sq_vtx_word1 & SQ_VTX_WORD1_DST_SEL_Y_MASK) >> SQ_VTX_WORD1_DST_SEL_Y_SHIFT) +#define SQ_VTX_WORD1_GET_DST_SEL_Z(sq_vtx_word1) \ + ((sq_vtx_word1 & SQ_VTX_WORD1_DST_SEL_Z_MASK) >> SQ_VTX_WORD1_DST_SEL_Z_SHIFT) +#define SQ_VTX_WORD1_GET_DST_SEL_W(sq_vtx_word1) \ + ((sq_vtx_word1 & SQ_VTX_WORD1_DST_SEL_W_MASK) >> SQ_VTX_WORD1_DST_SEL_W_SHIFT) +#define SQ_VTX_WORD1_GET_USE_CONST_FIELDS(sq_vtx_word1) \ + ((sq_vtx_word1 & SQ_VTX_WORD1_USE_CONST_FIELDS_MASK) >> SQ_VTX_WORD1_USE_CONST_FIELDS_SHIFT) +#define SQ_VTX_WORD1_GET_DATA_FORMAT(sq_vtx_word1) \ + ((sq_vtx_word1 & SQ_VTX_WORD1_DATA_FORMAT_MASK) >> SQ_VTX_WORD1_DATA_FORMAT_SHIFT) +#define SQ_VTX_WORD1_GET_NUM_FORMAT_ALL(sq_vtx_word1) \ + ((sq_vtx_word1 & SQ_VTX_WORD1_NUM_FORMAT_ALL_MASK) >> SQ_VTX_WORD1_NUM_FORMAT_ALL_SHIFT) +#define SQ_VTX_WORD1_GET_FORMAT_COMP_ALL(sq_vtx_word1) \ + ((sq_vtx_word1 & SQ_VTX_WORD1_FORMAT_COMP_ALL_MASK) >> SQ_VTX_WORD1_FORMAT_COMP_ALL_SHIFT) +#define SQ_VTX_WORD1_GET_SRF_MODE_ALL(sq_vtx_word1) \ + ((sq_vtx_word1 & SQ_VTX_WORD1_SRF_MODE_ALL_MASK) >> SQ_VTX_WORD1_SRF_MODE_ALL_SHIFT) + +#define SQ_VTX_WORD1_SET_DST_SEL_X(sq_vtx_word1_reg, dst_sel_x) \ + sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_DST_SEL_X_MASK) | (dst_sel_x << SQ_VTX_WORD1_DST_SEL_X_SHIFT) +#define SQ_VTX_WORD1_SET_DST_SEL_Y(sq_vtx_word1_reg, dst_sel_y) \ + sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_DST_SEL_Y_MASK) | (dst_sel_y << SQ_VTX_WORD1_DST_SEL_Y_SHIFT) +#define SQ_VTX_WORD1_SET_DST_SEL_Z(sq_vtx_word1_reg, dst_sel_z) \ + sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_DST_SEL_Z_MASK) | (dst_sel_z << SQ_VTX_WORD1_DST_SEL_Z_SHIFT) +#define SQ_VTX_WORD1_SET_DST_SEL_W(sq_vtx_word1_reg, dst_sel_w) \ + sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_DST_SEL_W_MASK) | (dst_sel_w << SQ_VTX_WORD1_DST_SEL_W_SHIFT) +#define SQ_VTX_WORD1_SET_USE_CONST_FIELDS(sq_vtx_word1_reg, use_const_fields) \ + sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_USE_CONST_FIELDS_MASK) | (use_const_fields << SQ_VTX_WORD1_USE_CONST_FIELDS_SHIFT) +#define SQ_VTX_WORD1_SET_DATA_FORMAT(sq_vtx_word1_reg, data_format) \ + sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_DATA_FORMAT_MASK) | (data_format << SQ_VTX_WORD1_DATA_FORMAT_SHIFT) +#define SQ_VTX_WORD1_SET_NUM_FORMAT_ALL(sq_vtx_word1_reg, num_format_all) \ + sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_NUM_FORMAT_ALL_MASK) | (num_format_all << SQ_VTX_WORD1_NUM_FORMAT_ALL_SHIFT) +#define SQ_VTX_WORD1_SET_FORMAT_COMP_ALL(sq_vtx_word1_reg, format_comp_all) \ + sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_FORMAT_COMP_ALL_MASK) | (format_comp_all << SQ_VTX_WORD1_FORMAT_COMP_ALL_SHIFT) +#define SQ_VTX_WORD1_SET_SRF_MODE_ALL(sq_vtx_word1_reg, srf_mode_all) \ + sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_SRF_MODE_ALL_MASK) | (srf_mode_all << SQ_VTX_WORD1_SRF_MODE_ALL_SHIFT) + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _sq_vtx_word1_t { + unsigned int : 9; + unsigned int dst_sel_x : SQ_VTX_WORD1_DST_SEL_X_SIZE; + unsigned int dst_sel_y : SQ_VTX_WORD1_DST_SEL_Y_SIZE; + unsigned int dst_sel_z : SQ_VTX_WORD1_DST_SEL_Z_SIZE; + unsigned int dst_sel_w : SQ_VTX_WORD1_DST_SEL_W_SIZE; + unsigned int use_const_fields : SQ_VTX_WORD1_USE_CONST_FIELDS_SIZE; + unsigned int data_format : SQ_VTX_WORD1_DATA_FORMAT_SIZE; + unsigned int num_format_all : SQ_VTX_WORD1_NUM_FORMAT_ALL_SIZE; + unsigned int format_comp_all : SQ_VTX_WORD1_FORMAT_COMP_ALL_SIZE; + unsigned int srf_mode_all : SQ_VTX_WORD1_SRF_MODE_ALL_SIZE; + } sq_vtx_word1_t; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _sq_vtx_word1_t { + unsigned int srf_mode_all : SQ_VTX_WORD1_SRF_MODE_ALL_SIZE; + unsigned int format_comp_all : SQ_VTX_WORD1_FORMAT_COMP_ALL_SIZE; + unsigned int num_format_all : SQ_VTX_WORD1_NUM_FORMAT_ALL_SIZE; + unsigned int data_format : SQ_VTX_WORD1_DATA_FORMAT_SIZE; + unsigned int use_const_fields : SQ_VTX_WORD1_USE_CONST_FIELDS_SIZE; + unsigned int dst_sel_w : SQ_VTX_WORD1_DST_SEL_W_SIZE; + unsigned int dst_sel_z : SQ_VTX_WORD1_DST_SEL_Z_SIZE; + unsigned int dst_sel_y : SQ_VTX_WORD1_DST_SEL_Y_SIZE; + unsigned int dst_sel_x : SQ_VTX_WORD1_DST_SEL_X_SIZE; + unsigned int : 9; + } sq_vtx_word1_t; + +#endif + +typedef union { + unsigned int val : 32; + sq_vtx_word1_t f; +} sq_vtx_word1_u; + + +/* + * SQ_VTX_WORD1_GPR struct + */ + +#define SQ_VTX_WORD1_GPR_DST_GPR_SIZE 7 +#define SQ_VTX_WORD1_GPR_DST_REL_SIZE 1 + +#define SQ_VTX_WORD1_GPR_DST_GPR_SHIFT 0 +#define SQ_VTX_WORD1_GPR_DST_REL_SHIFT 7 + +#define SQ_VTX_WORD1_GPR_DST_GPR_MASK 0x0000007f +#define SQ_VTX_WORD1_GPR_DST_REL_MASK 0x00000080 + +#define SQ_VTX_WORD1_GPR_MASK \ + (SQ_VTX_WORD1_GPR_DST_GPR_MASK | \ + SQ_VTX_WORD1_GPR_DST_REL_MASK) + +#define SQ_VTX_WORD1_GPR_DEFAULT 0x000000cd + +#define SQ_VTX_WORD1_GPR_GET_DST_GPR(sq_vtx_word1_gpr) \ + ((sq_vtx_word1_gpr & SQ_VTX_WORD1_GPR_DST_GPR_MASK) >> SQ_VTX_WORD1_GPR_DST_GPR_SHIFT) +#define SQ_VTX_WORD1_GPR_GET_DST_REL(sq_vtx_word1_gpr) \ + ((sq_vtx_word1_gpr & SQ_VTX_WORD1_GPR_DST_REL_MASK) >> SQ_VTX_WORD1_GPR_DST_REL_SHIFT) + +#define SQ_VTX_WORD1_GPR_SET_DST_GPR(sq_vtx_word1_gpr_reg, dst_gpr) \ + sq_vtx_word1_gpr_reg = (sq_vtx_word1_gpr_reg & ~SQ_VTX_WORD1_GPR_DST_GPR_MASK) | (dst_gpr << SQ_VTX_WORD1_GPR_DST_GPR_SHIFT) +#define SQ_VTX_WORD1_GPR_SET_DST_REL(sq_vtx_word1_gpr_reg, dst_rel) \ + sq_vtx_word1_gpr_reg = (sq_vtx_word1_gpr_reg & ~SQ_VTX_WORD1_GPR_DST_REL_MASK) | (dst_rel << SQ_VTX_WORD1_GPR_DST_REL_SHIFT) + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _sq_vtx_word1_gpr_t { + unsigned int dst_gpr : SQ_VTX_WORD1_GPR_DST_GPR_SIZE; + unsigned int dst_rel : SQ_VTX_WORD1_GPR_DST_REL_SIZE; + unsigned int : 24; + } sq_vtx_word1_gpr_t; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _sq_vtx_word1_gpr_t { + unsigned int : 24; + unsigned int dst_rel : SQ_VTX_WORD1_GPR_DST_REL_SIZE; + unsigned int dst_gpr : SQ_VTX_WORD1_GPR_DST_GPR_SIZE; + } sq_vtx_word1_gpr_t; + +#endif + +typedef union { + unsigned int val : 32; + sq_vtx_word1_gpr_t f; +} sq_vtx_word1_gpr_u; + + +/* + * SQ_VTX_WORD1_SEM struct + */ + +#define SQ_VTX_WORD1_SEM_SEMANTIC_ID_SIZE 8 + +#define SQ_VTX_WORD1_SEM_SEMANTIC_ID_SHIFT 0 + +#define SQ_VTX_WORD1_SEM_SEMANTIC_ID_MASK 0x000000ff + +#define SQ_VTX_WORD1_SEM_MASK \ + (SQ_VTX_WORD1_SEM_SEMANTIC_ID_MASK) + +#define SQ_VTX_WORD1_SEM_DEFAULT 0x000000cd + +#define SQ_VTX_WORD1_SEM_GET_SEMANTIC_ID(sq_vtx_word1_sem) \ + ((sq_vtx_word1_sem & SQ_VTX_WORD1_SEM_SEMANTIC_ID_MASK) >> SQ_VTX_WORD1_SEM_SEMANTIC_ID_SHIFT) + +#define SQ_VTX_WORD1_SEM_SET_SEMANTIC_ID(sq_vtx_word1_sem_reg, semantic_id) \ + sq_vtx_word1_sem_reg = (sq_vtx_word1_sem_reg & ~SQ_VTX_WORD1_SEM_SEMANTIC_ID_MASK) | (semantic_id << SQ_VTX_WORD1_SEM_SEMANTIC_ID_SHIFT) + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _sq_vtx_word1_sem_t { + unsigned int semantic_id : SQ_VTX_WORD1_SEM_SEMANTIC_ID_SIZE; + unsigned int : 24; + } sq_vtx_word1_sem_t; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _sq_vtx_word1_sem_t { + unsigned int : 24; + unsigned int semantic_id : SQ_VTX_WORD1_SEM_SEMANTIC_ID_SIZE; + } sq_vtx_word1_sem_t; + +#endif + +typedef union { + unsigned int val : 32; + sq_vtx_word1_sem_t f; +} sq_vtx_word1_sem_u; + + +/* + * SQ_VTX_WORD2 struct + */ + +#define SQ_VTX_WORD2_OFFSET_SIZE 16 +#define SQ_VTX_WORD2_ENDIAN_SWAP_SIZE 2 +#define SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SIZE 1 +#define SQ_VTX_WORD2_MEGA_FETCH_SIZE 1 +#define SQ_VTX_WORD2_ALT_CONST_SIZE 1 + +#define SQ_VTX_WORD2_OFFSET_SHIFT 0 +#define SQ_VTX_WORD2_ENDIAN_SWAP_SHIFT 16 +#define SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SHIFT 18 +#define SQ_VTX_WORD2_MEGA_FETCH_SHIFT 19 +#define SQ_VTX_WORD2_ALT_CONST_SHIFT 20 + +#define SQ_VTX_WORD2_OFFSET_MASK 0x0000ffff +#define SQ_VTX_WORD2_ENDIAN_SWAP_MASK 0x00030000 +#define SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_MASK 0x00040000 +#define SQ_VTX_WORD2_MEGA_FETCH_MASK 0x00080000 +#define SQ_VTX_WORD2_ALT_CONST_MASK 0x00100000 + +#define SQ_VTX_WORD2_MASK \ + (SQ_VTX_WORD2_OFFSET_MASK | \ + SQ_VTX_WORD2_ENDIAN_SWAP_MASK | \ + SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_MASK | \ + SQ_VTX_WORD2_MEGA_FETCH_MASK | \ + SQ_VTX_WORD2_ALT_CONST_MASK) + +#define SQ_VTX_WORD2_DEFAULT 0x000dcdcd + +#define SQ_VTX_WORD2_GET_OFFSET(sq_vtx_word2) \ + ((sq_vtx_word2 & SQ_VTX_WORD2_OFFSET_MASK) >> SQ_VTX_WORD2_OFFSET_SHIFT) +#define SQ_VTX_WORD2_GET_ENDIAN_SWAP(sq_vtx_word2) \ + ((sq_vtx_word2 & SQ_VTX_WORD2_ENDIAN_SWAP_MASK) >> SQ_VTX_WORD2_ENDIAN_SWAP_SHIFT) +#define SQ_VTX_WORD2_GET_CONST_BUF_NO_STRIDE(sq_vtx_word2) \ + ((sq_vtx_word2 & SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_MASK) >> SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SHIFT) +#define SQ_VTX_WORD2_GET_MEGA_FETCH(sq_vtx_word2) \ + ((sq_vtx_word2 & SQ_VTX_WORD2_MEGA_FETCH_MASK) >> SQ_VTX_WORD2_MEGA_FETCH_SHIFT) +#define SQ_VTX_WORD2_GET_ALT_CONST(sq_vtx_word2) \ + ((sq_vtx_word2 & SQ_VTX_WORD2_ALT_CONST_MASK) >> SQ_VTX_WORD2_ALT_CONST_SHIFT) + +#define SQ_VTX_WORD2_SET_OFFSET(sq_vtx_word2_reg, offset) \ + sq_vtx_word2_reg = (sq_vtx_word2_reg & ~SQ_VTX_WORD2_OFFSET_MASK) | (offset << SQ_VTX_WORD2_OFFSET_SHIFT) +#define SQ_VTX_WORD2_SET_ENDIAN_SWAP(sq_vtx_word2_reg, endian_swap) \ + sq_vtx_word2_reg = (sq_vtx_word2_reg & ~SQ_VTX_WORD2_ENDIAN_SWAP_MASK) | (endian_swap << SQ_VTX_WORD2_ENDIAN_SWAP_SHIFT) +#define SQ_VTX_WORD2_SET_CONST_BUF_NO_STRIDE(sq_vtx_word2_reg, const_buf_no_stride) \ + sq_vtx_word2_reg = (sq_vtx_word2_reg & ~SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_MASK) | (const_buf_no_stride << SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SHIFT) +#define SQ_VTX_WORD2_SET_MEGA_FETCH(sq_vtx_word2_reg, mega_fetch) \ + sq_vtx_word2_reg = (sq_vtx_word2_reg & ~SQ_VTX_WORD2_MEGA_FETCH_MASK) | (mega_fetch << SQ_VTX_WORD2_MEGA_FETCH_SHIFT) +#define SQ_VTX_WORD2_SET_ALT_CONST(sq_vtx_word2_reg, alt_const) \ + sq_vtx_word2_reg = (sq_vtx_word2_reg & ~SQ_VTX_WORD2_ALT_CONST_MASK) | (alt_const << SQ_VTX_WORD2_ALT_CONST_SHIFT) + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _sq_vtx_word2_t { + unsigned int offset : SQ_VTX_WORD2_OFFSET_SIZE; + unsigned int endian_swap : SQ_VTX_WORD2_ENDIAN_SWAP_SIZE; + unsigned int const_buf_no_stride : SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SIZE; + unsigned int mega_fetch : SQ_VTX_WORD2_MEGA_FETCH_SIZE; + unsigned int alt_const : SQ_VTX_WORD2_ALT_CONST_SIZE; + unsigned int : 11; + } sq_vtx_word2_t; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _sq_vtx_word2_t { + unsigned int : 11; + unsigned int alt_const : SQ_VTX_WORD2_ALT_CONST_SIZE; + unsigned int mega_fetch : SQ_VTX_WORD2_MEGA_FETCH_SIZE; + unsigned int const_buf_no_stride : SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SIZE; + unsigned int endian_swap : SQ_VTX_WORD2_ENDIAN_SWAP_SIZE; + unsigned int offset : SQ_VTX_WORD2_OFFSET_SIZE; + } sq_vtx_word2_t; + +#endif + +typedef union { + unsigned int val : 32; + sq_vtx_word2_t f; +} sq_vtx_word2_u; + +#endif /* _SQ_MICRO_REG_H */ + + |