diff options
author | Dave Airlie <airlied@kvothe.(none)> | 2011-04-29 13:40:05 +1000 |
---|---|---|
committer | Dave Airlie <airlied@kvothe.(none)> | 2011-04-29 13:40:05 +1000 |
commit | cb0d9ded8613dd1abab5eb5a9aa15c4f7480b15a (patch) | |
tree | 067ddc1474904ea2cc562f4991a92a96a1de5777 |
initial
-rw-r--r-- | Makefile.am | 29 | ||||
-rwxr-xr-x | autogen.sh | 14 | ||||
-rw-r--r-- | configure.ac | 36 | ||||
-rw-r--r-- | evergreen_accel.c | 1243 | ||||
-rw-r--r-- | evergreen_ops.c | 153 | ||||
-rw-r--r-- | evergreen_reg.h | 250 | ||||
-rw-r--r-- | evergreen_reg_auto.h | 4039 | ||||
-rw-r--r-- | evergreen_shader.c | 3155 | ||||
-rw-r--r-- | evergreen_shader.h | 292 | ||||
-rw-r--r-- | evergreen_state.h | 329 | ||||
-rw-r--r-- | radeon_vbo.c | 204 | ||||
-rw-r--r-- | radeon_vbo.h | 45 | ||||
-rw-r--r-- | radeondemo.c | 251 | ||||
-rw-r--r-- | radeondemo.h | 202 | ||||
-rw-r--r-- | simple_list.h | 202 |
15 files changed, 10444 insertions, 0 deletions
diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..438298f --- /dev/null +++ b/Makefile.am @@ -0,0 +1,29 @@ +# Copyright 2009 Dave Airlie +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# on the rights to use, copy, modify, merge, publish, distribute, sub +# license, and/or sell copies of the Software, and to permit persons to whom +# the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +AUTOMAKE_OPTIONS = foreign + +bin_PROGRAMS = radeondemo + +AM_CFLAGS = $(LIBDRM_CFLAGS) $(LIBDRM_RADEON_CFLAGS) + +radeondemo_SOURCES = radeondemo.c evergreen_accel.c evergreen_shader.c radeon_vbo.c evergreen_ops.c +radeondemo_LDADD = $(LIBDRM_LIBS) $(LIBDRM_RADEON_LIBS) + diff --git a/autogen.sh b/autogen.sh new file mode 100755 index 0000000..f028c2c --- /dev/null +++ b/autogen.sh @@ -0,0 +1,14 @@ +#! /bin/sh + +srcdir=`dirname $0` +test -z "$srcdir" && srcdir=. + +ORIGDIR=`pwd` +cd $srcdir + +autoreconf -v --install || exit 1 +cd $ORIGDIR || exit $? + +$srcdir/configure --enable-maintainer-mode "$@" + + diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..1c40a96 --- /dev/null +++ b/configure.ac @@ -0,0 +1,36 @@ +# Copyright 2009 Dave Airlie +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# on the rights to use, copy, modify, merge, publish, distribute, sub +# license, and/or sell copies of the Software, and to permit persons to whom +# the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +AC_PREREQ([2.60]) +AC_INIT([radeondemo], 1.6.2, [xorg-driver-ati@lists.x.org], radeondemo) +AM_INIT_AUTOMAKE([dist-bzip2]) + +#AM_CONFIG_HEADER([config.h]) + +AC_PROG_LIBTOOL + +AC_DISABLE_STATIC +AC_PROG_CC +AC_PROG_INSTALL + +PKG_CHECK_MODULES(LIBDRM, libdrm) +PKG_CHECK_MODULES(LIBDRM_RADEON, libdrm_radeon) + +AC_OUTPUT([Makefile]) diff --git a/evergreen_accel.c b/evergreen_accel.c new file mode 100644 index 0000000..1d66172 --- /dev/null +++ b/evergreen_accel.c @@ -0,0 +1,1243 @@ +/* + * Copyright 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: Alex Deucher <alexander.deucher@amd.com> + * + */ +#include <errno.h> +#include <stdlib.h> +#include "radeondemo.h" +#include "evergreen_shader.h" +#include "evergreen_reg.h" +#include "evergreen_state.h" +#include "radeon_drm.h" + +static const uint32_t EVERGREEN_ROP[16] = { + RADEON_ROP3_ZERO, /* GXclear */ + RADEON_ROP3_DSa, /* Gxand */ + RADEON_ROP3_SDna, /* GXandReverse */ + RADEON_ROP3_S, /* GXcopy */ + RADEON_ROP3_DSna, /* GXandInverted */ + RADEON_ROP3_D, /* GXnoop */ + RADEON_ROP3_DSx, /* GXxor */ + RADEON_ROP3_DSo, /* GXor */ + RADEON_ROP3_DSon, /* GXnor */ + RADEON_ROP3_DSxn, /* GXequiv */ + RADEON_ROP3_Dn, /* GXinvert */ + RADEON_ROP3_SDno, /* GXorReverse */ + RADEON_ROP3_Sn, /* GXcopyInverted */ + RADEON_ROP3_DSno, /* GXorInverted */ + RADEON_ROP3_DSan, /* GXnand */ + RADEON_ROP3_ONE, /* GXset */ +}; + +void +evergreen_start_3d(struct radeon *radeon) +{ + + + BEGIN_BATCH(3); + PACK3(IT_CONTEXT_CONTROL, 2); + E32(0x80000000); + E32(0x80000000); + END_BATCH(); + +} + +/* + * Setup of functional groups + */ + +// asic stack/thread/gpr limits - need to query the drm +static void +evergreen_sq_setup(struct radeon *radeon, sq_config_t *sq_conf) +{ + uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3; + uint32_t sq_thread_resource_mgmt, sq_thread_resource_mgmt_2; + uint32_t sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3; + + + sq_config = 0; + + sq_config |= (EXPORT_SRC_C_bit | + (sq_conf->cs_prio << CS_PRIO_shift) | + (sq_conf->ls_prio << LS_PRIO_shift) | + (sq_conf->hs_prio << HS_PRIO_shift) | + (sq_conf->ps_prio << PS_PRIO_shift) | + (sq_conf->vs_prio << VS_PRIO_shift) | + (sq_conf->gs_prio << GS_PRIO_shift) | + (sq_conf->es_prio << ES_PRIO_shift)); + + sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) | + (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) | + (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift)); + sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) | + (sq_conf->num_es_gprs << NUM_ES_GPRS_shift)); + sq_gpr_resource_mgmt_3 = ((sq_conf->num_hs_gprs << NUM_HS_GPRS_shift) | + (sq_conf->num_ls_gprs << NUM_LS_GPRS_shift)); + + sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) | + (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) | + (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) | + (sq_conf->num_es_threads << NUM_ES_THREADS_shift)); + sq_thread_resource_mgmt_2 = ((sq_conf->num_hs_threads << NUM_HS_THREADS_shift) | + (sq_conf->num_ls_threads << NUM_LS_THREADS_shift)); + + sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) | + (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift)); + + sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) | + (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift)); + + sq_stack_resource_mgmt_3 = ((sq_conf->num_hs_stack_entries << NUM_HS_STACK_ENTRIES_shift) | + (sq_conf->num_ls_stack_entries << NUM_LS_STACK_ENTRIES_shift)); + + BEGIN_BATCH(16); + /* disable dyn gprs */ + EREG(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0); + PACK0(SQ_CONFIG, 4); + E32(sq_config); + E32(sq_gpr_resource_mgmt_1); + E32(sq_gpr_resource_mgmt_2); + E32(sq_gpr_resource_mgmt_3); + PACK0(SQ_THREAD_RESOURCE_MGMT, 5); + E32(sq_thread_resource_mgmt); + E32(sq_thread_resource_mgmt_2); + E32(sq_stack_resource_mgmt_1); + E32(sq_stack_resource_mgmt_2); + E32(sq_stack_resource_mgmt_3); + END_BATCH(); +} + +void +evergreen_set_render_target(struct radeon *radeon, cb_config_t *cb_conf, uint32_t domain) +{ + uint32_t cb_color_info, cb_color_attrib = 0, cb_color_dim; + int pitch, slice, h; + + + cb_color_info = ((cb_conf->endian << ENDIAN_shift) | + (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) | + (cb_conf->array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) | + (cb_conf->number_type << NUMBER_TYPE_shift) | + (cb_conf->comp_swap << COMP_SWAP_shift) | + (cb_conf->source_format << SOURCE_FORMAT_shift) | + (cb_conf->resource_type << RESOURCE_TYPE_shift)); + if (cb_conf->blend_clamp) + cb_color_info |= BLEND_CLAMP_bit; + if (cb_conf->fast_clear) + cb_color_info |= FAST_CLEAR_bit; + if (cb_conf->compression) + cb_color_info |= COMPRESSION_bit; + if (cb_conf->blend_bypass) + cb_color_info |= BLEND_BYPASS_bit; + if (cb_conf->simple_float) + cb_color_info |= SIMPLE_FLOAT_bit; + if (cb_conf->round_mode) + cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit; + if (cb_conf->tile_compact) + cb_color_info |= CB_COLOR0_INFO__TILE_COMPACT_bit; + if (cb_conf->rat) + cb_color_info |= RAT_bit; + + /* bit 4 needs to be set for linear and depth/stencil surfaces */ + if (cb_conf->non_disp_tiling) + cb_color_attrib |= CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit; + + pitch = (cb_conf->w / 8) - 1; + h = RADEON_ALIGN(cb_conf->h, 8); + slice = ((cb_conf->w * h) / 64) - 1; + + switch (cb_conf->resource_type) { + case BUFFER: + /* number of elements in the surface */ + cb_color_dim = pitch * slice; + break; + default: + /* w/h of the surface */ + cb_color_dim = (((cb_conf->w - 1) << WIDTH_MAX_shift) | + ((cb_conf->h - 1) << HEIGHT_MAX_shift)); + break; + } + + BEGIN_BATCH(3 + 2); + EREG(CB_COLOR0_BASE + (0x3c * cb_conf->id), (cb_conf->base >> 8)); + RELOC_BATCH(cb_conf->bo, 0, domain); + END_BATCH(); + + /* Set CMASK & FMASK buffer to the offset of color buffer as + * we don't use those this shouldn't cause any issue and we + * then have a valid cmd stream + */ + BEGIN_BATCH(3 + 2); + EREG(CB_COLOR0_CMASK + (0x3c * cb_conf->id), (0 >> 8)); + RELOC_BATCH(cb_conf->bo, 0, domain); + END_BATCH(); + BEGIN_BATCH(3 + 2); + EREG(CB_COLOR0_FMASK + (0x3c * cb_conf->id), (0 >> 8)); + RELOC_BATCH(cb_conf->bo, 0, domain); + END_BATCH(); + + /* tiling config */ + BEGIN_BATCH(3 + 2); + EREG(CB_COLOR0_ATTRIB + (0x3c * cb_conf->id), cb_color_attrib); + RELOC_BATCH(cb_conf->bo, 0, domain); + END_BATCH(); + BEGIN_BATCH(3 + 2); + EREG(CB_COLOR0_INFO + (0x3c * cb_conf->id), cb_color_info); + RELOC_BATCH(cb_conf->bo, 0, domain); + END_BATCH(); + + BEGIN_BATCH(33); + EREG(CB_COLOR0_PITCH + (0x3c * cb_conf->id), pitch); + EREG(CB_COLOR0_SLICE + (0x3c * cb_conf->id), slice); + EREG(CB_COLOR0_VIEW + (0x3c * cb_conf->id), 0); + EREG(CB_COLOR0_DIM + (0x3c * cb_conf->id), cb_color_dim); + EREG(CB_COLOR0_CMASK_SLICE + (0x3c * cb_conf->id), 0); + EREG(CB_COLOR0_FMASK_SLICE + (0x3c * cb_conf->id), 0); + PACK0(CB_COLOR0_CLEAR_WORD0 + (0x3c * cb_conf->id), 4); + E32(0); + E32(0); + E32(0); + E32(0); + EREG(CB_TARGET_MASK, (cb_conf->pmask << TARGET0_ENABLE_shift)); + EREG(CB_COLOR_CONTROL, (EVERGREEN_ROP[cb_conf->rop] | + (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift))); + EREG(CB_BLEND0_CONTROL, cb_conf->blendcntl); + END_BATCH(); + +} + +static void +evergreen_cp_set_surface_sync(struct radeon *radeon, uint32_t sync_type, + uint32_t size, uint64_t mc_addr, + struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain) +{ + + uint32_t cp_coher_size; + if (size == 0xffffffff) + cp_coher_size = 0xffffffff; + else + cp_coher_size = ((size + 255) >> 8); + + BEGIN_BATCH(5 + 2); + PACK3(IT_SURFACE_SYNC, 4); + E32(sync_type); + E32(cp_coher_size); + E32((mc_addr >> 8)); + E32(10); /* poll interval */ + RELOC_BATCH(bo, rdomains, wdomain); + END_BATCH(); +} + +void +evergreen_set_spi(struct radeon *radeon, int vs_export_count, int num_interp) +{ + + + BEGIN_BATCH(8); + /* Interpolator setup */ + EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift)); + PACK0(SPI_PS_IN_CONTROL_0, 3); + E32(((num_interp << NUM_INTERP_shift) | + LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0 + E32(0); // SPI_PS_IN_CONTROL_1 + E32(0); // SPI_INTERP_CONTROL_0 + END_BATCH(); +} + +void +evergreen_fs_setup(struct radeon *radeon, shader_config_t *fs_conf, uint32_t domain) +{ + + uint32_t sq_pgm_resources; + + sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) | + (fs_conf->stack_size << STACK_SIZE_shift)); + + if (fs_conf->dx10_clamp) + sq_pgm_resources |= DX10_CLAMP_bit; + + BEGIN_BATCH(3 + 2); + EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8); + RELOC_BATCH(fs_conf->bo, domain, 0); + END_BATCH(); + + BEGIN_BATCH(3); + EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources); + END_BATCH(); +} + +void +evergreen_vs_setup(struct radeon *radeon, shader_config_t *vs_conf, uint32_t domain) +{ + + uint32_t sq_pgm_resources, sq_pgm_resources_2; + + sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) | + (vs_conf->stack_size << STACK_SIZE_shift)); + + if (vs_conf->dx10_clamp) + sq_pgm_resources |= DX10_CLAMP_bit; + if (vs_conf->uncached_first_inst) + sq_pgm_resources |= UNCACHED_FIRST_INST_bit; + + sq_pgm_resources_2 = ((vs_conf->single_round << SINGLE_ROUND_shift) | + (vs_conf->double_round << DOUBLE_ROUND_shift)); + + if (vs_conf->allow_sdi) + sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit; + if (vs_conf->allow_sd0) + sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit; + if (vs_conf->allow_ddi) + sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit; + if (vs_conf->allow_ddo) + sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit; + + /* flush SQ cache */ + evergreen_cp_set_surface_sync(radeon, SH_ACTION_ENA_bit, + vs_conf->shader_size, vs_conf->shader_addr, + vs_conf->bo, domain, 0); + + BEGIN_BATCH(3 + 2); + EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8); + RELOC_BATCH(vs_conf->bo, domain, 0); + END_BATCH(); + + BEGIN_BATCH(4); + PACK0(SQ_PGM_RESOURCES_VS, 2); + E32(sq_pgm_resources); + E32(sq_pgm_resources_2); + END_BATCH(); +} + +void +evergreen_ps_setup(struct radeon *radeon, shader_config_t *ps_conf, uint32_t domain) +{ + + uint32_t sq_pgm_resources, sq_pgm_resources_2; + + sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) | + (ps_conf->stack_size << STACK_SIZE_shift)); + + if (ps_conf->dx10_clamp) + sq_pgm_resources |= DX10_CLAMP_bit; + if (ps_conf->uncached_first_inst) + sq_pgm_resources |= UNCACHED_FIRST_INST_bit; + if (ps_conf->clamp_consts) + sq_pgm_resources |= CLAMP_CONSTS_bit; + + sq_pgm_resources_2 = ((ps_conf->single_round << SINGLE_ROUND_shift) | + (ps_conf->double_round << DOUBLE_ROUND_shift)); + + if (ps_conf->allow_sdi) + sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit; + if (ps_conf->allow_sd0) + sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit; + if (ps_conf->allow_ddi) + sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit; + if (ps_conf->allow_ddo) + sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit; + + /* flush SQ cache */ + evergreen_cp_set_surface_sync(radeon, SH_ACTION_ENA_bit, + ps_conf->shader_size, ps_conf->shader_addr, + ps_conf->bo, domain, 0); + + BEGIN_BATCH(3 + 2); + EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8); + RELOC_BATCH(ps_conf->bo, domain, 0); + END_BATCH(); + + BEGIN_BATCH(5); + PACK0(SQ_PGM_RESOURCES_PS, 3); + E32(sq_pgm_resources); + E32(sq_pgm_resources_2); + E32(ps_conf->export_mode); + END_BATCH(); +} + +void +evergreen_set_alu_consts(struct radeon *radeon, const_config_t *const_conf, uint32_t domain) +{ + + /* size reg is units of 16 consts (4 dwords each) */ + uint32_t size = const_conf->size_bytes >> 8; + + if (size == 0) + size = 1; + + /* flush SQ cache */ + evergreen_cp_set_surface_sync(radeon, SH_ACTION_ENA_bit, + const_conf->size_bytes, const_conf->const_addr, + const_conf->bo, domain, 0); + + switch (const_conf->type) { + case SHADER_TYPE_VS: + BEGIN_BATCH(3); + EREG(SQ_ALU_CONST_BUFFER_SIZE_VS_0, size); + END_BATCH(); + BEGIN_BATCH(3 + 2); + EREG(SQ_ALU_CONST_CACHE_VS_0, const_conf->const_addr >> 8); + RELOC_BATCH(const_conf->bo, domain, 0); + END_BATCH(); + break; + case SHADER_TYPE_PS: + BEGIN_BATCH(3); + EREG(SQ_ALU_CONST_BUFFER_SIZE_PS_0, size); + END_BATCH(); + BEGIN_BATCH(3 + 2); + EREG(SQ_ALU_CONST_CACHE_PS_0, const_conf->const_addr >> 8); + RELOC_BATCH(const_conf->bo, domain, 0); + END_BATCH(); + break; + default: + ErrorF("Unsupported const type %d\n", const_conf->type); + break; + } + +} + +void +evergreen_set_bool_consts(struct radeon *radeon, int offset, uint32_t val) +{ + + /* bool register order is: ps, vs/es, gs, hs, ls, cs; one register each + * 1 bits per bool; 32 bools each for ps, vs/es, gs, hs, ls, cs. + */ + BEGIN_BATCH(3); + EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val); + END_BATCH(); +} + +static void +evergreen_set_vtx_resource(struct radeon *radeon, vtx_resource_t *res, uint32_t domain) +{ + + struct radeon_accel_state *accel_state = &radeon->accel_state; + uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4; + + sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) | + ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) | + (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) | + (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) | + (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift)); + if (res->clamp_x) + sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit; + + if (res->format_comp_all) + sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit; + + if (res->srf_mode_all) + sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit; + + sq_vtx_constant_word3 = ((res->dst_sel_x << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift) | + (res->dst_sel_y << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift) | + (res->dst_sel_z << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift) | + (res->dst_sel_w << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift)); + + if (res->uncached) + sq_vtx_constant_word3 |= SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit; + + /* XXX ??? */ + sq_vtx_constant_word4 = 0; + + evergreen_cp_set_surface_sync(radeon, TC_ACTION_ENA_bit, + accel_state->vbo.vb_offset, 0, + res->bo, + domain, 0); + + BEGIN_BATCH(10 + 2); + PACK0(SQ_FETCH_RESOURCE + res->id * SQ_FETCH_RESOURCE_offset, 8); + E32(res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS + E32((res->vtx_num_entries << 2) - 1); // 1: SIZE + E32(sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN + E32(sq_vtx_constant_word3); // 3: swizzles + E32(sq_vtx_constant_word4); // 4: num elements + E32(0); // 5: n/a + E32(0); // 6: n/a + E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD7_0__TYPE_shift); // 7: TYPE + RELOC_BATCH(res->bo, domain, 0); + END_BATCH(); +} + +void +evergreen_set_tex_resource(struct radeon *radeon, tex_resource_t *tex_res, uint32_t domain) +{ + + uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; + uint32_t sq_tex_resource_word5, sq_tex_resource_word6, sq_tex_resource_word7; + + sq_tex_resource_word0 = (tex_res->dim << DIM_shift); + + if (tex_res->w) + sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) | + ((tex_res->w - 1) << TEX_WIDTH_shift)); + + if (tex_res->tile_type) + sq_tex_resource_word0 |= SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit; + + sq_tex_resource_word1 = (tex_res->array_mode << SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift); + + if (tex_res->h) + sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift); + if (tex_res->depth) + sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift); + + sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) | + (tex_res->format_comp_y << FORMAT_COMP_Y_shift) | + (tex_res->format_comp_z << FORMAT_COMP_Z_shift) | + (tex_res->format_comp_w << FORMAT_COMP_W_shift) | + (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) | + (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) | + (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) | + (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) | + (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) | + (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) | + (tex_res->base_level << BASE_LEVEL_shift)); + + if (tex_res->srf_mode_all) + sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit; + if (tex_res->force_degamma) + sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit; + + sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) | + (tex_res->base_array << BASE_ARRAY_shift) | + (tex_res->last_array << LAST_ARRAY_shift)); + + sq_tex_resource_word6 = ((tex_res->min_lod << SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift) | + (tex_res->perf_modulation << PERF_MODULATION_shift)); + + if (tex_res->interlaced) + sq_tex_resource_word6 |= INTERLACED_bit; + + sq_tex_resource_word7 = ((tex_res->format << SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift) | + (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD7_0__TYPE_shift)); + + /* flush texture cache */ + evergreen_cp_set_surface_sync(radeon, TC_ACTION_ENA_bit, + tex_res->size, tex_res->base, + tex_res->bo, domain, 0); + + BEGIN_BATCH(10 + 4); + PACK0(SQ_FETCH_RESOURCE + tex_res->id * SQ_FETCH_RESOURCE_offset, 8); + E32(sq_tex_resource_word0); + E32(sq_tex_resource_word1); + E32(((tex_res->base) >> 8)); + E32(((tex_res->mip_base) >> 8)); + E32(sq_tex_resource_word4); + E32(sq_tex_resource_word5); + E32(sq_tex_resource_word6); + E32(sq_tex_resource_word7); + RELOC_BATCH(tex_res->bo, domain, 0); + RELOC_BATCH(tex_res->mip_bo, domain, 0); + END_BATCH(); +} + +void +evergreen_set_tex_sampler (struct radeon *radeon, tex_sampler_t *s) +{ + + uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2; + + sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) | + (s->clamp_y << CLAMP_Y_shift) | + (s->clamp_z << CLAMP_Z_shift) | + (s->xy_mag_filter << XY_MAG_FILTER_shift) | + (s->xy_min_filter << XY_MIN_FILTER_shift) | + (s->z_filter << Z_FILTER_shift) | + (s->mip_filter << MIP_FILTER_shift) | + (s->border_color << BORDER_COLOR_TYPE_shift) | + (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) | + (s->chroma_key << CHROMA_KEY_shift)); + + sq_tex_sampler_word1 = ((s->min_lod << SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift) | + (s->max_lod << MAX_LOD_shift) | + (s->perf_mip << PERF_MIP_shift) | + (s->perf_z << PERF_Z_shift)); + + + sq_tex_sampler_word2 = ((s->lod_bias << SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift) | + (s->lod_bias2 << LOD_BIAS_SEC_shift)); + + if (s->mc_coord_truncate) + sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit; + if (s->force_degamma) + sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit; + if (s->truncate_coord) + sq_tex_sampler_word2 |= TRUNCATE_COORD_bit; + if (s->disable_cube_wrap) + sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit; + if (s->type) + sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit; + + BEGIN_BATCH(5); + PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3); + E32(sq_tex_sampler_word0); + E32(sq_tex_sampler_word1); + E32(sq_tex_sampler_word2); + END_BATCH(); +} + +//XXX deal with clip offsets in clip setup +void +evergreen_set_screen_scissor(struct radeon *radeon, int x1, int y1, int x2, int y2) +{ + + + BEGIN_BATCH(4); + PACK0(PA_SC_SCREEN_SCISSOR_TL, 2); + E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) | + (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift))); + E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) | + (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift))); + END_BATCH(); +} + +void +evergreen_set_vport_scissor(struct radeon *radeon, int id, int x1, int y1, int x2, int y2) +{ + + + BEGIN_BATCH(4); + PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2); + E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) | + (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) | + WINDOW_OFFSET_DISABLE_bit)); + E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) | + (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift))); + END_BATCH(); +} + +void +evergreen_set_generic_scissor(struct radeon *radeon, int x1, int y1, int x2, int y2) +{ + + + BEGIN_BATCH(4); + PACK0(PA_SC_GENERIC_SCISSOR_TL, 2); + E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) | + (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) | + WINDOW_OFFSET_DISABLE_bit)); + E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) | + (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift))); + END_BATCH(); +} + +void +evergreen_set_window_scissor(struct radeon *radeon, int x1, int y1, int x2, int y2) +{ + + + BEGIN_BATCH(4); + PACK0(PA_SC_WINDOW_SCISSOR_TL, 2); + E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) | + (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) | + WINDOW_OFFSET_DISABLE_bit)); + E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) | + (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift))); + END_BATCH(); +} + +void +evergreen_set_clip_rect(struct radeon *radeon, int id, int x1, int y1, int x2, int y2) +{ + + + BEGIN_BATCH(4); + PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2); + E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) | + (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift))); + E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) | + (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift))); + END_BATCH(); +} + +/* + * Setup of default state + */ + +void +evergreen_set_default_state(struct radeon *radeon) +{ + tex_resource_t tex_res; + shader_config_t fs_conf; + sq_config_t sq_conf; + int i; + + struct radeon_accel_state *accel_state = &radeon->accel_state; + + if (accel_state->XInited3D) + return; + + memset(&tex_res, 0, sizeof(tex_resource_t)); + memset(&fs_conf, 0, sizeof(shader_config_t)); + + accel_state->XInited3D = true; + + evergreen_start_3d(radeon); + + /* SQ */ + sq_conf.ps_prio = 0; + sq_conf.vs_prio = 1; + sq_conf.gs_prio = 2; + sq_conf.es_prio = 3; + sq_conf.hs_prio = 0; + sq_conf.ls_prio = 0; + sq_conf.cs_prio = 0; + + switch (radeon->ChipFamily) { + case CHIP_FAMILY_CEDAR: + default: + sq_conf.num_ps_gprs = 93; + sq_conf.num_vs_gprs = 46; + sq_conf.num_temp_gprs = 4; + sq_conf.num_gs_gprs = 31; + sq_conf.num_es_gprs = 31; + sq_conf.num_hs_gprs = 23; + sq_conf.num_ls_gprs = 23; + sq_conf.num_ps_threads = 96; + sq_conf.num_vs_threads = 16; + sq_conf.num_gs_threads = 16; + sq_conf.num_es_threads = 16; + sq_conf.num_hs_threads = 16; + sq_conf.num_ls_threads = 16; + sq_conf.num_ps_stack_entries = 42; + sq_conf.num_vs_stack_entries = 42; + sq_conf.num_gs_stack_entries = 42; + sq_conf.num_es_stack_entries = 42; + sq_conf.num_hs_stack_entries = 42; + sq_conf.num_ls_stack_entries = 42; + break; + case CHIP_FAMILY_REDWOOD: + sq_conf.num_ps_gprs = 93; + sq_conf.num_vs_gprs = 46; + sq_conf.num_temp_gprs = 4; + sq_conf.num_gs_gprs = 31; + sq_conf.num_es_gprs = 31; + sq_conf.num_hs_gprs = 23; + sq_conf.num_ls_gprs = 23; + sq_conf.num_ps_threads = 128; + sq_conf.num_vs_threads = 20; + sq_conf.num_gs_threads = 20; + sq_conf.num_es_threads = 20; + sq_conf.num_hs_threads = 20; + sq_conf.num_ls_threads = 20; + sq_conf.num_ps_stack_entries = 42; + sq_conf.num_vs_stack_entries = 42; + sq_conf.num_gs_stack_entries = 42; + sq_conf.num_es_stack_entries = 42; + sq_conf.num_hs_stack_entries = 42; + sq_conf.num_ls_stack_entries = 42; + break; + case CHIP_FAMILY_JUNIPER: + sq_conf.num_ps_gprs = 93; + sq_conf.num_vs_gprs = 46; + sq_conf.num_temp_gprs = 4; + sq_conf.num_gs_gprs = 31; + sq_conf.num_es_gprs = 31; + sq_conf.num_hs_gprs = 23; + sq_conf.num_ls_gprs = 23; + sq_conf.num_ps_threads = 128; + sq_conf.num_vs_threads = 20; + sq_conf.num_gs_threads = 20; + sq_conf.num_es_threads = 20; + sq_conf.num_hs_threads = 20; + sq_conf.num_ls_threads = 20; + sq_conf.num_ps_stack_entries = 85; + sq_conf.num_vs_stack_entries = 85; + sq_conf.num_gs_stack_entries = 85; + sq_conf.num_es_stack_entries = 85; + sq_conf.num_hs_stack_entries = 85; + sq_conf.num_ls_stack_entries = 85; + break; + case CHIP_FAMILY_CYPRESS: + case CHIP_FAMILY_HEMLOCK: + sq_conf.num_ps_gprs = 93; + sq_conf.num_vs_gprs = 46; + sq_conf.num_temp_gprs = 4; + sq_conf.num_gs_gprs = 31; + sq_conf.num_es_gprs = 31; + sq_conf.num_hs_gprs = 23; + sq_conf.num_ls_gprs = 23; + sq_conf.num_ps_threads = 128; + sq_conf.num_vs_threads = 20; + sq_conf.num_gs_threads = 20; + sq_conf.num_es_threads = 20; + sq_conf.num_hs_threads = 20; + sq_conf.num_ls_threads = 20; + sq_conf.num_ps_stack_entries = 85; + sq_conf.num_vs_stack_entries = 85; + sq_conf.num_gs_stack_entries = 85; + sq_conf.num_es_stack_entries = 85; + sq_conf.num_hs_stack_entries = 85; + sq_conf.num_ls_stack_entries = 85; + break; + case CHIP_FAMILY_PALM: + sq_conf.num_ps_gprs = 93; + sq_conf.num_vs_gprs = 46; + sq_conf.num_temp_gprs = 4; + sq_conf.num_gs_gprs = 31; + sq_conf.num_es_gprs = 31; + sq_conf.num_hs_gprs = 23; + sq_conf.num_ls_gprs = 23; + sq_conf.num_ps_threads = 96; + sq_conf.num_vs_threads = 16; + sq_conf.num_gs_threads = 16; + sq_conf.num_es_threads = 16; + sq_conf.num_hs_threads = 16; + sq_conf.num_ls_threads = 16; + sq_conf.num_ps_stack_entries = 42; + sq_conf.num_vs_stack_entries = 42; + sq_conf.num_gs_stack_entries = 42; + sq_conf.num_es_stack_entries = 42; + sq_conf.num_hs_stack_entries = 42; + sq_conf.num_ls_stack_entries = 42; + break; + case CHIP_FAMILY_BARTS: + sq_conf.num_ps_gprs = 93; + sq_conf.num_vs_gprs = 46; + sq_conf.num_temp_gprs = 4; + sq_conf.num_gs_gprs = 31; + sq_conf.num_es_gprs = 31; + sq_conf.num_hs_gprs = 23; + sq_conf.num_ls_gprs = 23; + sq_conf.num_ps_threads = 128; + sq_conf.num_vs_threads = 20; + sq_conf.num_gs_threads = 20; + sq_conf.num_es_threads = 20; + sq_conf.num_hs_threads = 20; + sq_conf.num_ls_threads = 20; + sq_conf.num_ps_stack_entries = 85; + sq_conf.num_vs_stack_entries = 85; + sq_conf.num_gs_stack_entries = 85; + sq_conf.num_es_stack_entries = 85; + sq_conf.num_hs_stack_entries = 85; + sq_conf.num_ls_stack_entries = 85; + break; + case CHIP_FAMILY_TURKS: + sq_conf.num_ps_gprs = 93; + sq_conf.num_vs_gprs = 46; + sq_conf.num_temp_gprs = 4; + sq_conf.num_gs_gprs = 31; + sq_conf.num_es_gprs = 31; + sq_conf.num_hs_gprs = 23; + sq_conf.num_ls_gprs = 23; + sq_conf.num_ps_threads = 128; + sq_conf.num_vs_threads = 20; + sq_conf.num_gs_threads = 20; + sq_conf.num_es_threads = 20; + sq_conf.num_hs_threads = 20; + sq_conf.num_ls_threads = 20; + sq_conf.num_ps_stack_entries = 42; + sq_conf.num_vs_stack_entries = 42; + sq_conf.num_gs_stack_entries = 42; + sq_conf.num_es_stack_entries = 42; + sq_conf.num_hs_stack_entries = 42; + sq_conf.num_ls_stack_entries = 42; + break; + case CHIP_FAMILY_CAICOS: + sq_conf.num_ps_gprs = 93; + sq_conf.num_vs_gprs = 46; + sq_conf.num_temp_gprs = 4; + sq_conf.num_gs_gprs = 31; + sq_conf.num_es_gprs = 31; + sq_conf.num_hs_gprs = 23; + sq_conf.num_ls_gprs = 23; + sq_conf.num_ps_threads = 128; + sq_conf.num_vs_threads = 10; + sq_conf.num_gs_threads = 10; + sq_conf.num_es_threads = 10; + sq_conf.num_hs_threads = 10; + sq_conf.num_ls_threads = 10; + sq_conf.num_ps_stack_entries = 42; + sq_conf.num_vs_stack_entries = 42; + sq_conf.num_gs_stack_entries = 42; + sq_conf.num_es_stack_entries = 42; + sq_conf.num_hs_stack_entries = 42; + sq_conf.num_ls_stack_entries = 42; + break; + } + + evergreen_sq_setup(radeon, &sq_conf); + + BEGIN_BATCH(24); + EREG(SQ_LDS_ALLOC_PS, 0); + EREG(SQ_DYN_GPR_RESOURCE_LIMIT_1, 0); + + PACK0(SQ_ESGS_RING_ITEMSIZE, 6); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + + PACK0(SQ_GS_VERT_ITEMSIZE, 4); + E32(0); + E32(0); + E32(0); + E32(0); + + PACK0(SQ_VTX_BASE_VTX_LOC, 2); + E32(0); + E32(0); + END_BATCH(); + + /* DB */ + BEGIN_BATCH(3 + 2); + EREG(DB_Z_INFO, 0); + RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + + BEGIN_BATCH(3 + 2); + EREG(DB_STENCIL_INFO, 0); + RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + + BEGIN_BATCH(3 + 2); + EREG(DB_HTILE_DATA_BASE, 0); + RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + + BEGIN_BATCH(49); + EREG(DB_DEPTH_CONTROL, 0); + + PACK0(PA_SC_VPORT_ZMIN_0, 2); + EFLOAT(0.0); // PA_SC_VPORT_ZMIN_0 + EFLOAT(1.0); // PA_SC_VPORT_ZMAX_0 + + PACK0(DB_RENDER_CONTROL, 5); + E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); // DB_RENDER_CONTROL + E32(0); // DB_COUNT_CONTROL + E32(0); // DB_DEPTH_VIEW + E32(0x2a); // DB_RENDER_OVERRIDE + E32(0); // DB_RENDER_OVERRIDE2 + + PACK0(DB_STENCIL_CLEAR, 2); + E32(0); // DB_STENCIL_CLEAR + E32(0); // DB_DEPTH_CLEAR + + EREG(DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) | + (2 << ALPHA_TO_MASK_OFFSET1_shift) | + (2 << ALPHA_TO_MASK_OFFSET2_shift) | + (2 << ALPHA_TO_MASK_OFFSET3_shift))); + + EREG(DB_SHADER_CONTROL, ((EARLY_Z_THEN_LATE_Z << Z_ORDER_shift) | + DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ + + // SX + EREG(SX_MISC, 0); + + // CB + PACK0(SX_ALPHA_TEST_CONTROL, 5); + E32(0); // SX_ALPHA_TEST_CONTROL + E32(0x00000000); //CB_BLEND_RED + E32(0x00000000); //CB_BLEND_GREEN + E32(0x00000000); //CB_BLEND_BLUE + E32(0x00000000); //CB_BLEND_ALPHA + + EREG(CB_SHADER_MASK, OUTPUT0_ENABLE_mask); + + // SC + EREG(PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) | + (0 << WINDOW_Y_OFFSET_shift))); + EREG(PA_SC_CLIPRECT_RULE, CLIP_RULE_mask); + EREG(PA_SC_EDGERULE, 0xAAAAAAAA); + EREG(PA_SU_HARDWARE_SCREEN_OFFSET, 0); + END_BATCH(); + + /* clip boolean is set to always visible -> doesn't matter */ + for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++) + evergreen_set_clip_rect (radeon, i, 0, 0, 8192, 8192); + + for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) + evergreen_set_vport_scissor (radeon, i, 0, 0, 8192, 8192); + + BEGIN_BATCH(57); + PACK0(PA_SC_MODE_CNTL_0, 2); + E32(0); // PA_SC_MODE_CNTL_0 + E32(0); // PA_SC_MODE_CNTL_1 + + PACK0(PA_SC_LINE_CNTL, 16); + E32(0); // PA_SC_LINE_CNTL + E32(0); // PA_SC_AA_CONFIG + E32(((X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) | + PIX_CENTER_bit)); // PA_SU_VTX_CNTL + EFLOAT(1.0); // PA_CL_GB_VERT_CLIP_ADJ + EFLOAT(1.0); // PA_CL_GB_VERT_DISC_ADJ + EFLOAT(1.0); // PA_CL_GB_HORZ_CLIP_ADJ + EFLOAT(1.0); // PA_CL_GB_HORZ_DISC_ADJ + E32(0); // PA_SC_AA_SAMPLE_LOCS_0 + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); // PA_SC_AA_SAMPLE_LOCS_7 + E32(0xFFFFFFFF); // PA_SC_AA_MASK + + // CL + PACK0(PA_CL_CLIP_CNTL, 8); + E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL + E32(FACE_bit); // PA_SU_SC_MODE_CNTL + E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL + E32(0); // PA_CL_VS_OUT_CNTL + E32(0); // PA_CL_NANINF_CNTL + E32(0); // PA_SU_LINE_STIPPLE_CNTL + E32(0); // PA_SU_LINE_STIPPLE_SCALE + E32(0); // PA_SU_PRIM_FILTER_CNTL + + // SU + PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + + /* src = semantic id 0; mask = semantic id 1 */ + EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) | + (1 << SEMANTIC_1_shift))); + PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2); + /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */ + E32(((0 << SEMANTIC_shift) | + (0x01 << DEFAULT_VAL_shift))); + /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */ + E32(((1 << SEMANTIC_shift) | + (0x01 << DEFAULT_VAL_shift))); + + PACK0(SPI_INPUT_Z, 8); + E32(0); // SPI_INPUT_Z + E32(0); // SPI_FOG_CNTL + E32(LINEAR_CENTROID_ENA__X_ON_AT_CENTROID << LINEAR_CENTROID_ENA_shift); // SPI_BARYC_CNTL + E32(0); // SPI_PS_IN_CONTROL_2 + E32(0); + E32(0); + E32(0); + E32(0); + END_BATCH(); + + // clear FS + fs_conf.bo = accel_state->shaders_bo; + evergreen_fs_setup(radeon, &fs_conf, RADEON_GEM_DOMAIN_VRAM); + + // VGT + BEGIN_BATCH(46); + + PACK0(VGT_MAX_VTX_INDX, 4); + E32(0xffffff); + E32(0); + E32(0); + E32(0); + + PACK0(VGT_INSTANCE_STEP_RATE_0, 2); + E32(0); + E32(0); + + PACK0(VGT_REUSE_OFF, 2); + E32(0); + E32(0); + + PACK0(PA_SU_POINT_SIZE, 17); + E32(0); // PA_SU_POINT_SIZE + E32(0); // PA_SU_POINT_MINMAX + E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL + E32(0); // PA_SC_LINE_STIPPLE + E32(0); // VGT_OUTPUT_PATH_CNTL + E32(0); // VGT_HOS_CNTL + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); // VGT_GS_MODE + + EREG(VGT_PRIMITIVEID_EN, 0); + EREG(VGT_MULTI_PRIM_IB_RESET_EN, 0); + EREG(VGT_SHADER_STAGES_EN, 0); + + PACK0(VGT_STRMOUT_CONFIG, 2); + E32(0); + E32(0); + END_BATCH(); +} + + +/* + * Commands + */ + +void +evergreen_draw_auto(struct radeon *radeon, draw_config_t *draw_conf) +{ + + + BEGIN_BATCH(10); + EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type); + PACK3(IT_INDEX_TYPE, 1); +#if X_BYTE_ORDER == X_BIG_ENDIAN + E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type); +#else + E32(draw_conf->index_type); +#endif + PACK3(IT_NUM_INSTANCES, 1); + E32(draw_conf->num_instances); + PACK3(IT_DRAW_INDEX_AUTO, 2); + E32(draw_conf->num_indices); + E32(draw_conf->vgt_draw_initiator); + END_BATCH(); +} + +void evergreen_finish_op(struct radeon *radeon, int vtx_size) +{ + + struct radeon_accel_state *accel_state = &radeon->accel_state; + draw_config_t draw_conf; + vtx_resource_t vtx_res; + + if (accel_state->vbo.vb_start_op == -1) + return; + + CLEAR (draw_conf); + CLEAR (vtx_res); + + if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) { + radeon_ib_discard(radeon); + radeon_cs_flush_indirect(radeon); + return; + } + + /* Vertex buffer setup */ + accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op; + vtx_res.id = SQ_FETCH_RESOURCE_vs; + vtx_res.vtx_size_dw = vtx_size / 4; + vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4; + vtx_res.vb_addr = accel_state->vbo.vb_start_op; + vtx_res.bo = accel_state->vbo.vb_bo; + vtx_res.dst_sel_x = SQ_SEL_X; + vtx_res.dst_sel_y = SQ_SEL_Y; + vtx_res.dst_sel_z = SQ_SEL_Z; + vtx_res.dst_sel_w = SQ_SEL_W; +#if X_BYTE_ORDER == X_BIG_ENDIAN + vtx_res.endian = SQ_ENDIAN_8IN32; +#endif + evergreen_set_vtx_resource(radeon, &vtx_res, RADEON_GEM_DOMAIN_GTT); + + /* Draw */ + draw_conf.prim_type = DI_PT_RECTLIST; + draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; + draw_conf.num_instances = 1; + draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; + draw_conf.index_type = DI_INDEX_SIZE_16_BIT; + + evergreen_draw_auto(radeon, &draw_conf); + + /* sync dst surface */ + evergreen_cp_set_surface_sync(radeon, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), + accel_state->dst_size, accel_state->dst_obj.offset, + accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain); + + accel_state->vbo.vb_start_op = -1; + accel_state->cbuf.vb_start_op = -1; + accel_state->ib_reset_op = 0; + +} + +bool +EVERGREENAllocShaders(struct radeon *radeon) +{ + struct radeon_accel_state *accel_state = &radeon->accel_state; + + /* 512 bytes per shader for now */ + int size = 512 * 9; + + accel_state->shaders_bo = radeon_bo_open(radeon->bufmgr, 0, size, 0, + RADEON_GEM_DOMAIN_VRAM, 0); + if (accel_state->shaders_bo == NULL) { + ErrorF("Allocating shader failed\n"); + return false; + } + return true; +} + + +bool +EVERGREENLoadShaders(struct radeon *radeon) +{ + struct radeon_accel_state *accel_state = &radeon->accel_state; + RADEONChipFamily ChipSet = radeon->ChipFamily; + uint32_t *shader; + int ret; + + ret = radeon_bo_map(accel_state->shaders_bo, 1); + if (ret) { + ErrorF("failed to map shader %d\n", ret); + exit(-1); + return false; + } + shader = accel_state->shaders_bo->ptr; + + /* solid vs --------------------------------------- */ + accel_state->solid_vs_offset = 0; + evergreen_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4); + + /* solid ps --------------------------------------- */ + accel_state->solid_ps_offset = 512; + evergreen_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4); + + /* copy vs --------------------------------------- */ + accel_state->copy_vs_offset = 1024; + evergreen_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4); + + /* copy ps --------------------------------------- */ + accel_state->copy_ps_offset = 1536; + evergreen_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4); + +#if 0 + /* comp vs --------------------------------------- */ + accel_state->comp_vs_offset = 2048; + evergreen_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4); + + /* comp ps --------------------------------------- */ + accel_state->comp_ps_offset = 2560; + evergreen_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4); + + /* xv vs --------------------------------------- */ + accel_state->xv_vs_offset = 3072; + evergreen_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4); + + /* xv ps --------------------------------------- */ + accel_state->xv_ps_offset = 3584; + evergreen_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4); +#endif + radeon_bo_unmap(accel_state->shaders_bo); + + return true; +} diff --git a/evergreen_ops.c b/evergreen_ops.c new file mode 100644 index 0000000..3235dab --- /dev/null +++ b/evergreen_ops.c @@ -0,0 +1,153 @@ + +#include "radeondemo.h" +#include "evergreen_reg.h" +#include "evergreen_state.h" + +void do_solid_fill_prepare(struct radeon *radeon, struct r600_accel_object *obj, int fg) +{ + struct radeon_accel_state *accel_state = &radeon->accel_state; + cb_config_t cb_conf; + shader_config_t vs_conf, ps_conf; + uint32_t a, r, g, b; + const_config_t ps_const_conf; + float *ps_alu_consts; + int ret; + + CLEAR (cb_conf); + CLEAR (vs_conf); + CLEAR (ps_conf); + CLEAR (ps_const_conf); + + accel_state->dst_obj = *obj; + memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object)); + memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object)); + + radeon_cs_space_reset_bos(radeon->cs); + radeon_cs_space_add_persistent_bo(radeon->cs, accel_state->shaders_bo, + RADEON_GEM_DOMAIN_VRAM, 0); + radeon_cs_space_add_persistent_bo(radeon->cs, accel_state->dst_obj.bo, + 0, accel_state->dst_obj.domain); + ret = radeon_cs_space_check(radeon->cs); + if (ret) { + fprintf(stderr,"fail\n"); + exit(-1); + } + + radeon_vbo_check(radeon, &accel_state->vbo, 16); + radeon_vbo_check(radeon, &accel_state->cbuf, 256); + radeon_cp_start(radeon); + + evergreen_set_default_state(radeon); + + evergreen_set_generic_scissor(radeon, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + evergreen_set_screen_scissor(radeon, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + evergreen_set_window_scissor(radeon, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + + /* Shader */ + vs_conf.shader_addr = accel_state->solid_vs_offset; + vs_conf.shader_size = 512; + vs_conf.num_gprs = 2; + vs_conf.stack_size = 0; + vs_conf.bo = accel_state->shaders_bo; + evergreen_vs_setup(radeon, &vs_conf, RADEON_GEM_DOMAIN_VRAM); + + ps_conf.shader_addr = accel_state->solid_ps_offset; + ps_conf.shader_size = 512; + ps_conf.num_gprs = 1; + ps_conf.stack_size = 0; + ps_conf.clamp_consts = 0; + ps_conf.export_mode = 2; + ps_conf.bo = accel_state->shaders_bo; + evergreen_ps_setup(radeon, &ps_conf, RADEON_GEM_DOMAIN_VRAM); + + cb_conf.id = 0; + cb_conf.w = accel_state->dst_obj.pitch; + cb_conf.h = accel_state->dst_obj.height; + cb_conf.base = accel_state->dst_obj.offset; + cb_conf.bo = accel_state->dst_obj.bo; + + if (accel_state->dst_obj.bpp == 8) { + cb_conf.format = COLOR_8; + cb_conf.comp_swap = 3; /* A */ + } else if (accel_state->dst_obj.bpp == 16) { + cb_conf.format = COLOR_5_6_5; + cb_conf.comp_swap = 2; /* RGB */ +#if X_BYTE_ORDER == X_BIG_ENDIAN + cb_conf.endian = ENDIAN_8IN16; +#endif + } else { + cb_conf.format = COLOR_8_8_8_8; + cb_conf.comp_swap = 1; /* ARGB */ +#if X_BYTE_ORDER == X_BIG_ENDIAN + cb_conf.endian = ENDIAN_8IN32; +#endif + } + cb_conf.source_format = EXPORT_4C_16BPC; + cb_conf.blend_clamp = 1; + /* Render setup */ + cb_conf.pmask |= 4; /* B */ + cb_conf.pmask |= 2; /* G */ + cb_conf.pmask |= 1; /* R */ + cb_conf.pmask |= 8; /* A */ + cb_conf.rop = RADEON_ROP3_P; + if (accel_state->dst_obj.tiling_flags == 0) { + cb_conf.array_mode = 1; + cb_conf.non_disp_tiling = 1; + } + evergreen_set_render_target(radeon, &cb_conf, accel_state->dst_obj.domain); + + evergreen_set_spi(radeon, 0, 0); + + /* PS alu constants */ + ps_const_conf.size_bytes = 256; + ps_const_conf.type = SHADER_TYPE_PS; + ps_alu_consts = radeon_vbo_space(radeon, &accel_state->cbuf, 256); + ps_const_conf.bo = accel_state->cbuf.vb_bo; + ps_const_conf.const_addr = accel_state->cbuf.vb_offset; + if (accel_state->dst_obj.bpp == 16) { + r = (fg >> 11) & 0x1f; + g = (fg >> 5) & 0x3f; + b = (fg >> 0) & 0x1f; + ps_alu_consts[0] = (float)r / 31; /* R */ + ps_alu_consts[1] = (float)g / 63; /* G */ + ps_alu_consts[2] = (float)b / 31; /* B */ + ps_alu_consts[3] = 1.0; /* A */ + } else if (accel_state->dst_obj.bpp == 8) { + a = (fg >> 0) & 0xff; + ps_alu_consts[0] = 0.0; /* R */ + ps_alu_consts[1] = 0.0; /* G */ + ps_alu_consts[2] = 0.0; /* B */ + ps_alu_consts[3] = (float)a / 255; /* A */ + } else { + a = (fg >> 24) & 0xff; + r = (fg >> 16) & 0xff; + g = (fg >> 8) & 0xff; + b = (fg >> 0) & 0xff; + ps_alu_consts[0] = (float)r / 255; /* R */ + ps_alu_consts[1] = (float)g / 255; /* G */ + ps_alu_consts[2] = (float)b / 255; /* B */ + ps_alu_consts[3] = (float)a / 255; /* A */ + } + radeon_vbo_commit(radeon, &accel_state->cbuf); + evergreen_set_alu_consts(radeon, &ps_const_conf, RADEON_GEM_DOMAIN_GTT); +} + +void evergreen_solid(struct radeon *radeon, + int x1, int y1, int x2, int y2) +{ + struct radeon_accel_state *accel_state = &radeon->accel_state; + float *vb; + vb = radeon_vbo_space(radeon, &accel_state->vbo, 8); + + vb[0] = (float)x1; + vb[1] = (float)y1; + + vb[2] = (float)x1; + vb[3] = (float)y2; + + vb[4] = (float)x2; + vb[5] = (float)y2; + + radeon_vbo_commit(radeon, &accel_state->vbo); +} + diff --git a/evergreen_reg.h b/evergreen_reg.h new file mode 100644 index 0000000..b08dbf9 --- /dev/null +++ b/evergreen_reg.h @@ -0,0 +1,250 @@ +/* + * Evergeen Register documentation + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _EVERGREEN_REG_H_ +#define _EVERGREEN_REG_H_ + +/* + * Register definitions + */ + +#include "evergreen_reg_auto.h" + +enum { + SHADER_TYPE_PS, + SHADER_TYPE_VS, + SHADER_TYPE_GS, + SHADER_TYPE_HS, + SHADER_TYPE_LS, + SHADER_TYPE_CS, + SHADER_TYPE_FS, +}; + + +/* SET_*_REG offsets + ends */ +enum { + SET_CONFIG_REG_offset = 0x00008000, + SET_CONFIG_REG_end = 0x0000ac00, + SET_CONTEXT_REG_offset = 0x00028000, + SET_CONTEXT_REG_end = 0x00029000, + SET_RESOURCE_offset = 0x00030000, + SET_RESOURCE_end = 0x00038000, + SET_SAMPLER_offset = 0x0003c000, + SET_SAMPLER_end = 0x0003c600, + SET_CTL_CONST_offset = 0x0003cff0, + SET_CTL_CONST_end = 0x0003ff0c, + SET_LOOP_CONST_offset = 0x0003a200, + SET_LOOP_CONST_end = 0x0003a500, + SET_BOOL_CONST_offset = 0x0003a500, + SET_BOOL_CONST_end = 0x0003a518, +}; + +/* Packet3 commands */ +enum { + IT_NOP = 0x10, + IT_INDIRECT_BUFFER_END = 0x17, + IT_SET_PREDICATION = 0x20, + IT_COND_EXEC = 0x22, + IT_PRED_EXEC = 0x23, + IT_DRAW_INDEX_2 = 0x27, + IT_CONTEXT_CONTROL = 0x28, + IT_DRAW_INDEX_OFFSET = 0x29, + IT_INDEX_TYPE = 0x2A, + IT_DRAW_INDEX = 0x2B, + IT_DRAW_INDEX_AUTO = 0x2D, + IT_DRAW_INDEX_IMMD = 0x2E, + IT_NUM_INSTANCES = 0x2F, + IT_INDIRECT_BUFFER = 0x32, + IT_STRMOUT_BUFFER_UPDATE = 0x34, + IT_MEM_SEMAPHORE = 0x39, + IT_MPEG_INDEX = 0x3A, + IT_WAIT_REG_MEM = 0x3C, + IT_MEM_WRITE = 0x3D, + IT_SURFACE_SYNC = 0x43, + IT_ME_INITIALIZE = 0x44, + IT_COND_WRITE = 0x45, + IT_EVENT_WRITE = 0x46, + IT_EVENT_WRITE_EOP = 0x47, + IT_EVENT_WRITE_EOS = 0x48, + IT_SET_CONFIG_REG = 0x68, + IT_SET_CONTEXT_REG = 0x69, + IT_SET_ALU_CONST = 0x6A, + IT_SET_BOOL_CONST = 0x6B, + IT_SET_LOOP_CONST = 0x6C, + IT_SET_RESOURCE = 0x6D, + IT_SET_SAMPLER = 0x6E, + IT_SET_CTL_CONST = 0x6F, +}; + +/* IT_WAIT_REG_MEM operation encoding */ + +#define IT_WAIT_ALWAYS (0 << 0) +#define IT_WAIT_LT (1 << 0) +#define IT_WAIT_LE (2 << 0) +#define IT_WAIT_EQ (3 << 0) +#define IT_WAIT_NE (4 << 0) +#define IT_WAIT_GE (5 << 0) +#define IT_WAIT_GT (6 << 0) +#define IT_WAIT_REG (0 << 4) +#define IT_WAIT_MEM (1 << 4) + +#define IT_WAIT_ADDR(x) ((x) >> 2) + +/* IT_INDEX_TYPE */ +#define IT_INDEX_TYPE_SWAP_MODE(x) ((x) << 2) + +enum { + + SQ_LDS_ALLOC_PS = 0x288ec, + SQ_DYN_GPR_RESOURCE_LIMIT_1 = 0x28838, + SQ_DYN_GPR_CNTL_PS_FLUSH_REQ = 0x8d8c, + + WAIT_UNTIL = 0x8040, + WAIT_CP_DMA_IDLE_bit = 1 << 8, + WAIT_CMDFIFO_bit = 1 << 10, + WAIT_3D_IDLE_bit = 1 << 15, + WAIT_3D_IDLECLEAN_bit = 1 << 17, + WAIT_EXTERN_SIG_bit = 1 << 19, + CMDFIFO_ENTRIES_mask = 0xf << 20, + CMDFIFO_ENTRIES_shift = 20, + + CP_COHER_CNTL = 0x85f0, + DEST_BASE_0_ENA_bit = 1 << 0, + DEST_BASE_1_ENA_bit = 1 << 1, + SO0_DEST_BASE_ENA_bit = 1 << 2, + SO1_DEST_BASE_ENA_bit = 1 << 3, + SO2_DEST_BASE_ENA_bit = 1 << 4, + SO3_DEST_BASE_ENA_bit = 1 << 5, + CB0_DEST_BASE_ENA_bit = 1 << 6, + CB1_DEST_BASE_ENA_bit = 1 << 7, + CB2_DEST_BASE_ENA_bit = 1 << 8, + CB3_DEST_BASE_ENA_bit = 1 << 9, + CB4_DEST_BASE_ENA_bit = 1 << 10, + CB5_DEST_BASE_ENA_bit = 1 << 11, + CB6_DEST_BASE_ENA_bit = 1 << 12, + CB7_DEST_BASE_ENA_bit = 1 << 13, + DB_DEST_BASE_ENA_bit = 1 << 14, + CB8_DEST_BASE_ENA_bit = 1 << 15, + CB9_DEST_BASE_ENA_bit = 1 << 16, + CB10_DEST_BASE_ENA_bit = 1 << 17, + CB11_DEST_BASE_ENA_bit = 1 << 18, + FULL_CACHE_ENA_bit = 1 << 20, + TC_ACTION_ENA_bit = 1 << 23, + VC_ACTION_ENA_bit = 1 << 24, + CB_ACTION_ENA_bit = 1 << 25, + DB_ACTION_ENA_bit = 1 << 26, + SH_ACTION_ENA_bit = 1 << 27, + SX_ACTION_ENA_bit = 1 << 28, + CP_COHER_SIZE = 0x85f4, + CP_COHER_BASE = 0x85f8, + CP_COHER_STATUS = 0x85fc, + MATCHING_GFX_CNTX_mask = 0xff << 0, + MATCHING_GFX_CNTX_shift = 0, + STATUS_bit = 1 << 31, + +// SQ_VTX_CONSTANT_WORD2_0 = 0x00030008, +// SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20, + FMT_INVALID=0, FMT_8, FMT_4_4, FMT_3_3_2, + FMT_16=5, FMT_16_FLOAT, FMT_8_8, + FMT_5_6_5, FMT_6_5_5, FMT_1_5_5_5, FMT_4_4_4_4, + FMT_5_5_5_1, FMT_32, FMT_32_FLOAT, FMT_16_16, + FMT_16_16_FLOAT=16, FMT_8_24, FMT_8_24_FLOAT, FMT_24_8, + FMT_24_8_FLOAT, FMT_10_11_11, FMT_10_11_11_FLOAT, FMT_11_11_10, + FMT_11_11_10_FLOAT, FMT_2_10_10_10, FMT_8_8_8_8, FMT_10_10_10_2, + FMT_X24_8_32_FLOAT, FMT_32_32, FMT_32_32_FLOAT, FMT_16_16_16_16, + FMT_16_16_16_16_FLOAT=32, FMT_32_32_32_32=34, FMT_32_32_32_32_FLOAT, + FMT_1 = 37, FMT_GB_GR=39, + FMT_BG_RG, FMT_32_AS_8, FMT_32_AS_8_8, FMT_5_9_9_9_SHAREDEXP, + FMT_8_8_8, FMT_16_16_16, FMT_16_16_16_FLOAT, FMT_32_32_32, + FMT_32_32_32_FLOAT=48, + +// High level register file lengths + SQ_FETCH_RESOURCE = SQ_TEX_RESOURCE_WORD0_0, + SQ_FETCH_RESOURCE_ps_num = 176, + SQ_FETCH_RESOURCE_vs_num = 160, + SQ_FETCH_RESOURCE_gs_num = 160, + SQ_FETCH_RESOURCE_hs_num = 160, + SQ_FETCH_RESOURCE_ls_num = 160, + SQ_FETCH_RESOURCE_cs_num = 176, + SQ_FETCH_RESOURCE_fs_num = 32, + SQ_FETCH_RESOURCE_all_num = 1024, + SQ_FETCH_RESOURCE_offset = 32, + SQ_FETCH_RESOURCE_ps = 0, // 0...175 + SQ_FETCH_RESOURCE_vs = SQ_FETCH_RESOURCE_ps + SQ_FETCH_RESOURCE_ps_num, // 176...335 + SQ_FETCH_RESOURCE_gs = SQ_FETCH_RESOURCE_vs + SQ_FETCH_RESOURCE_vs_num, // 336...495 + SQ_FETCH_RESOURCE_hs = SQ_FETCH_RESOURCE_gs + SQ_FETCH_RESOURCE_gs_num, // 496...655 + SQ_FETCH_RESOURCE_ls = SQ_FETCH_RESOURCE_hs + SQ_FETCH_RESOURCE_hs_num, // 656...815 + SQ_FETCH_RESOURCE_cs = SQ_FETCH_RESOURCE_ls + SQ_FETCH_RESOURCE_ls_num, // 816...991 + SQ_FETCH_RESOURCE_fs = SQ_FETCH_RESOURCE_cs + SQ_FETCH_RESOURCE_cs_num, // 992...1023 + + SQ_TEX_SAMPLER_WORD = SQ_TEX_SAMPLER_WORD0_0, + SQ_TEX_SAMPLER_WORD_ps_num = 18, + SQ_TEX_SAMPLER_WORD_vs_num = 18, + SQ_TEX_SAMPLER_WORD_gs_num = 18, + SQ_TEX_SAMPLER_WORD_hs_num = 18, + SQ_TEX_SAMPLER_WORD_ls_num = 18, + SQ_TEX_SAMPLER_WORD_cs_num = 18, + SQ_TEX_SAMPLER_WORD_all_num = 108, + SQ_TEX_SAMPLER_WORD_offset = 12, + SQ_TEX_SAMPLER_WORD_ps = 0, // 0...17 + SQ_TEX_SAMPLER_WORD_vs = SQ_TEX_SAMPLER_WORD_ps + SQ_TEX_SAMPLER_WORD_ps_num, // 18...35 + SQ_TEX_SAMPLER_WORD_gs = SQ_TEX_SAMPLER_WORD_vs + SQ_TEX_SAMPLER_WORD_vs_num, // 36...53 + SQ_TEX_SAMPLER_WORD_hs = SQ_TEX_SAMPLER_WORD_gs + SQ_TEX_SAMPLER_WORD_gs_num, // 54...71 + SQ_TEX_SAMPLER_WORD_ls = SQ_TEX_SAMPLER_WORD_hs + SQ_TEX_SAMPLER_WORD_hs_num, // 72...89 + SQ_TEX_SAMPLER_WORD_cs = SQ_TEX_SAMPLER_WORD_ls + SQ_TEX_SAMPLER_WORD_ls_num, // 90...107 + + SQ_LOOP_CONST = SQ_LOOP_CONST_0, + SQ_LOOP_CONST_ps_num = 32, + SQ_LOOP_CONST_vs_num = 32, + SQ_LOOP_CONST_gs_num = 32, + SQ_LOOP_CONST_hs_num = 32, + SQ_LOOP_CONST_ls_num = 32, + SQ_LOOP_CONST_cs_num = 32, + SQ_LOOP_CONST_all_num = 192, + SQ_LOOP_CONST_offset = 4, + SQ_LOOP_CONST_ps = 0, // 0...31 + SQ_LOOP_CONST_vs = SQ_LOOP_CONST_ps + SQ_LOOP_CONST_ps_num, // 32...63 + SQ_LOOP_CONST_gs = SQ_LOOP_CONST_vs + SQ_LOOP_CONST_vs_num, // 64...95 + SQ_LOOP_CONST_hs = SQ_LOOP_CONST_gs + SQ_LOOP_CONST_gs_num, // 96...127 + SQ_LOOP_CONST_ls = SQ_LOOP_CONST_hs + SQ_LOOP_CONST_hs_num, // 128...159 + SQ_LOOP_CONST_cs = SQ_LOOP_CONST_ls + SQ_LOOP_CONST_ls_num, // 160...191 + + SQ_BOOL_CONST = SQ_BOOL_CONST_0, /* 32 bits each */ + SQ_BOOL_CONST_ps_num = 1, + SQ_BOOL_CONST_vs_num = 1, + SQ_BOOL_CONST_gs_num = 1, + SQ_BOOL_CONST_hs_num = 1, + SQ_BOOL_CONST_ls_num = 1, + SQ_BOOL_CONST_cs_num = 1, + SQ_BOOL_CONST_all_num = 6, + SQ_BOOL_CONST_offset = 4, + SQ_BOOL_CONST_ps = 0, + SQ_BOOL_CONST_vs = SQ_BOOL_CONST_ps + SQ_BOOL_CONST_ps_num, + SQ_BOOL_CONST_gs = SQ_BOOL_CONST_vs + SQ_BOOL_CONST_vs_num, + SQ_BOOL_CONST_hs = SQ_BOOL_CONST_gs + SQ_BOOL_CONST_gs_num, + SQ_BOOL_CONST_ls = SQ_BOOL_CONST_hs + SQ_BOOL_CONST_hs_num, + SQ_BOOL_CONST_cs = SQ_BOOL_CONST_ls + SQ_BOOL_CONST_ls_num, + +}; + +#endif diff --git a/evergreen_reg_auto.h b/evergreen_reg_auto.h new file mode 100644 index 0000000..5c61586 --- /dev/null +++ b/evergreen_reg_auto.h @@ -0,0 +1,4039 @@ +/* + * Evergreen Register documentation + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _EVERGREEN_REG_AUTO_H +#define _EVERGREEN_REG_AUTO_H + +enum { + + VGT_VTX_VECT_EJECT_REG = 0x000088b0, + PRIM_COUNT_mask = 0x3ff << 0, + PRIM_COUNT_shift = 0, + VGT_LAST_COPY_STATE = 0x000088c0, + SRC_STATE_ID_mask = 0x07 << 0, + SRC_STATE_ID_shift = 0, + DST_STATE_ID_mask = 0x07 << 16, + DST_STATE_ID_shift = 16, + VGT_CACHE_INVALIDATION = 0x000088c4, + CACHE_INVALIDATION_mask = 0x03 << 0, + CACHE_INVALIDATION_shift = 0, + VC_ONLY = 0x00, + TC_ONLY = 0x01, + VC_AND_TC = 0x02, + VS_NO_EXTRA_BUFFER_bit = 1 << 5, + AUTO_INVLD_EN_mask = 0x03 << 6, + AUTO_INVLD_EN_shift = 6, + VGT_GS_VERTEX_REUSE = 0x000088d4, + VERT_REUSE_mask = 0x1f << 0, + VERT_REUSE_shift = 0, + VGT_CNTL_STATUS = 0x000088f0, + VGT_OUT_INDX_BUSY_bit = 1 << 0, + VGT_OUT_BUSY_bit = 1 << 1, + VGT_PT_BUSY_bit = 1 << 2, + VGT_TE_BUSY_bit = 1 << 3, + VGT_VR_BUSY_bit = 1 << 4, + VGT_GRP_BUSY_bit = 1 << 5, + VGT_DMA_REQ_BUSY_bit = 1 << 6, + VGT_DMA_BUSY_bit = 1 << 7, + VGT_GS_BUSY_bit = 1 << 8, + VGT_HS_BUSY_bit = 1 << 9, + VGT_TE11_BUSY_bit = 1 << 10, + VGT_BUSY_bit = 1 << 11, + VGT_PRIMITIVE_TYPE = 0x00008958, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask = 0x3f << 0, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift = 0, + DI_PT_NONE = 0x00, + DI_PT_POINTLIST = 0x01, + DI_PT_LINELIST = 0x02, + DI_PT_LINESTRIP = 0x03, + DI_PT_TRILIST = 0x04, + DI_PT_TRIFAN = 0x05, + DI_PT_TRISTRIP = 0x06, + DI_PT_UNUSED_0 = 0x07, + DI_PT_UNUSED_1 = 0x08, + DI_PT_PATCH = 0x09, + DI_PT_LINELIST_ADJ = 0x0a, + DI_PT_LINESTRIP_ADJ = 0x0b, + DI_PT_TRILIST_ADJ = 0x0c, + DI_PT_TRISTRIP_ADJ = 0x0d, + DI_PT_UNUSED_3 = 0x0e, + DI_PT_UNUSED_4 = 0x0f, + DI_PT_TRI_WITH_WFLAGS = 0x10, + DI_PT_RECTLIST = 0x11, + DI_PT_LINELOOP = 0x12, + DI_PT_QUADLIST = 0x13, + DI_PT_QUADSTRIP = 0x14, + DI_PT_POLYGON = 0x15, + DI_PT_2D_COPY_RECT_LIST_V0 = 0x16, + DI_PT_2D_COPY_RECT_LIST_V1 = 0x17, + DI_PT_2D_COPY_RECT_LIST_V2 = 0x18, + DI_PT_2D_COPY_RECT_LIST_V3 = 0x19, + DI_PT_2D_FILL_RECT_LIST = 0x1a, + DI_PT_2D_LINE_STRIP = 0x1b, + DI_PT_2D_TRI_STRIP = 0x1c, + VGT_INDEX_TYPE = 0x0000895c, + INDEX_TYPE_mask = 0x03 << 0, + INDEX_TYPE_shift = 0, + DI_INDEX_SIZE_16_BIT = 0x00, + DI_INDEX_SIZE_32_BIT = 0x01, + VGT_STRMOUT_BUFFER_FILLED_SIZE_0 = 0x00008960, + VGT_STRMOUT_BUFFER_FILLED_SIZE_1 = 0x00008964, + VGT_STRMOUT_BUFFER_FILLED_SIZE_2 = 0x00008968, + VGT_STRMOUT_BUFFER_FILLED_SIZE_3 = 0x0000896c, + VGT_NUM_INDICES = 0x00008970, + VGT_NUM_INSTANCES = 0x00008974, + PA_CL_CNTL_STATUS = 0x00008a10, + CL_BUSY_bit = 1 << 31, + PA_CL_ENHANCE = 0x00008a14, + CLIP_VTX_REORDER_ENA_bit = 1 << 0, + NUM_CLIP_SEQ_mask = 0x03 << 1, + NUM_CLIP_SEQ_shift = 1, + CLIPPED_PRIM_SEQ_STALL_bit = 1 << 3, + VE_NAN_PROC_DISABLE_bit = 1 << 4, + PA_SU_CNTL_STATUS = 0x00008a50, + SU_BUSY_bit = 1 << 31, + PA_SU_LINE_STIPPLE_VALUE = 0x00008a60, + LINE_STIPPLE_VALUE_mask = 0xffffff << 0, + LINE_STIPPLE_VALUE_shift = 0, + PA_SC_LINE_STIPPLE_STATE = 0x00008b10, + CURRENT_PTR_mask = 0x0f << 0, + CURRENT_PTR_shift = 0, + CURRENT_COUNT_mask = 0xff << 8, + CURRENT_COUNT_shift = 8, + SQ_CONFIG = 0x00008c00, + VC_ENABLE_bit = 1 << 0, + EXPORT_SRC_C_bit = 1 << 1, + CS_PRIO_mask = 0x03 << 18, + CS_PRIO_shift = 18, + LS_PRIO_mask = 0x03 << 20, + LS_PRIO_shift = 20, + HS_PRIO_mask = 0x03 << 22, + HS_PRIO_shift = 22, + PS_PRIO_mask = 0x03 << 24, + PS_PRIO_shift = 24, + VS_PRIO_mask = 0x03 << 26, + VS_PRIO_shift = 26, + GS_PRIO_mask = 0x03 << 28, + GS_PRIO_shift = 28, + ES_PRIO_mask = 0x03 << 30, + ES_PRIO_shift = 30, + SQ_GPR_RESOURCE_MGMT_1 = 0x00008c04, + NUM_PS_GPRS_mask = 0xff << 0, + NUM_PS_GPRS_shift = 0, + NUM_VS_GPRS_mask = 0xff << 16, + NUM_VS_GPRS_shift = 16, + NUM_CLAUSE_TEMP_GPRS_mask = 0x0f << 28, + NUM_CLAUSE_TEMP_GPRS_shift = 28, + SQ_GPR_RESOURCE_MGMT_2 = 0x00008c08, + NUM_GS_GPRS_mask = 0xff << 0, + NUM_GS_GPRS_shift = 0, + NUM_ES_GPRS_mask = 0xff << 16, + NUM_ES_GPRS_shift = 16, + SQ_GPR_RESOURCE_MGMT_3 = 0x00008c0c, + NUM_HS_GPRS_mask = 0xff << 0, + NUM_HS_GPRS_shift = 0, + NUM_LS_GPRS_mask = 0xff << 16, + NUM_LS_GPRS_shift = 16, + SQ_GLOBAL_GPR_RESOURCE_MGMT_1 = 0x00008c10, + PS_GGPR_BASE_mask = 0xff << 0, + PS_GGPR_BASE_shift = 0, + VS_GGPR_BASE_mask = 0xff << 8, + VS_GGPR_BASE_shift = 8, + GS_GGPR_BASE_mask = 0xff << 16, + GS_GGPR_BASE_shift = 16, + ES_GGPR_BASE_mask = 0xff << 24, + ES_GGPR_BASE_shift = 24, + SQ_GLOBAL_GPR_RESOURCE_MGMT_2 = 0x00008c14, + HS_GGPR_BASE_mask = 0xff << 0, + HS_GGPR_BASE_shift = 0, + LS_GGPR_BASE_mask = 0xff << 8, + LS_GGPR_BASE_shift = 8, + CS_GGPR_BASE_mask = 0xff << 16, + CS_GGPR_BASE_shift = 16, + SQ_THREAD_RESOURCE_MGMT = 0x00008c18, + NUM_PS_THREADS_mask = 0xff << 0, + NUM_PS_THREADS_shift = 0, + NUM_VS_THREADS_mask = 0xff << 8, + NUM_VS_THREADS_shift = 8, + NUM_GS_THREADS_mask = 0xff << 16, + NUM_GS_THREADS_shift = 16, + NUM_ES_THREADS_mask = 0xff << 24, + NUM_ES_THREADS_shift = 24, + SQ_THREAD_RESOURCE_MGMT_2 = 0x00008c1c, + NUM_HS_THREADS_mask = 0xff << 0, + NUM_HS_THREADS_shift = 0, + NUM_LS_THREADS_mask = 0xff << 8, + NUM_LS_THREADS_shift = 8, + SQ_STACK_RESOURCE_MGMT_1 = 0x00008c20, + NUM_PS_STACK_ENTRIES_mask = 0xfff << 0, + NUM_PS_STACK_ENTRIES_shift = 0, + NUM_VS_STACK_ENTRIES_mask = 0xfff << 16, + NUM_VS_STACK_ENTRIES_shift = 16, + SQ_STACK_RESOURCE_MGMT_2 = 0x00008c24, + NUM_GS_STACK_ENTRIES_mask = 0xfff << 0, + NUM_GS_STACK_ENTRIES_shift = 0, + NUM_ES_STACK_ENTRIES_mask = 0xfff << 16, + NUM_ES_STACK_ENTRIES_shift = 16, + SQ_STACK_RESOURCE_MGMT_3 = 0x00008c28, + NUM_HS_STACK_ENTRIES_mask = 0xfff << 0, + NUM_HS_STACK_ENTRIES_shift = 0, + NUM_LS_STACK_ENTRIES_mask = 0xfff << 16, + NUM_LS_STACK_ENTRIES_shift = 16, + SQ_ESGS_RING_BASE = 0x00008c40, + SQ_ESGS_RING_SIZE = 0x00008c44, + SQ_GSVS_RING_BASE = 0x00008c48, + SQ_GSVS_RING_SIZE = 0x00008c4c, + SQ_ESTMP_RING_BASE = 0x00008c50, + SQ_ESTMP_RING_SIZE = 0x00008c54, + SQ_GSTMP_RING_BASE = 0x00008c58, + SQ_GSTMP_RING_SIZE = 0x00008c5c, + SQ_VSTMP_RING_BASE = 0x00008c60, + SQ_VSTMP_RING_SIZE = 0x00008c64, + SQ_PSTMP_RING_BASE = 0x00008c68, + SQ_PSTMP_RING_SIZE = 0x00008c6c, + SQ_CONST_MEM_BASE = 0x00008df8, + SQ_ALU_WORD1_OP3 = 0x00008dfc, + SRC2_SEL_mask = 0x1ff << 0, + SRC2_SEL_shift = 0, + SQ_ALU_SRC_LDS_OQ_A = 0xdb, + SQ_ALU_SRC_LDS_OQ_B = 0xdc, + SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, + SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, + SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, + SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, + SQ_ALU_SRC_TIME_HI = 0xe3, + SQ_ALU_SRC_TIME_LO = 0xe4, + SQ_ALU_SRC_MASK_HI = 0xe5, + SQ_ALU_SRC_MASK_LO = 0xe6, + SQ_ALU_SRC_HW_WAVE_ID = 0xe7, + SQ_ALU_SRC_SIMD_ID = 0xe8, + SQ_ALU_SRC_SE_ID = 0xe9, + SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, + SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, + SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, + SQ_ALU_SRC_HW_ALU_ODD = 0xed, + SQ_ALU_SRC_LOOP_IDX = 0xee, + SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, + SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, + SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, + SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, + SQ_ALU_SRC_1_DBL_L = 0xf4, + SQ_ALU_SRC_1_DBL_M = 0xf5, + SQ_ALU_SRC_0_5_DBL_L = 0xf6, + SQ_ALU_SRC_0_5_DBL_M = 0xf7, + SQ_ALU_SRC_0 = 0xf8, + SQ_ALU_SRC_1 = 0xf9, + SQ_ALU_SRC_1_INT = 0xfa, + SQ_ALU_SRC_M_1_INT = 0xfb, + SQ_ALU_SRC_0_5 = 0xfc, + SQ_ALU_SRC_LITERAL = 0xfd, + SQ_ALU_SRC_PV = 0xfe, + SQ_ALU_SRC_PS = 0xff, + SRC2_REL_bit = 1 << 9, + SRC2_CHAN_mask = 0x03 << 10, + SRC2_CHAN_shift = 10, + SQ_CHAN_X = 0x00, + SQ_CHAN_Y = 0x01, + SQ_CHAN_Z = 0x02, + SQ_CHAN_W = 0x03, + SRC2_NEG_bit = 1 << 12, + SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13, + SQ_ALU_WORD1_OP3__ALU_INST_shift = 13, + SQ_OP3_INST_BFE_UINT = 0x04, + SQ_OP3_INST_BFE_INT = 0x05, + SQ_OP3_INST_BFI_INT = 0x06, + SQ_OP3_INST_FMA = 0x07, + SQ_OP3_INST_CNDNE_64 = 0x09, + SQ_OP3_INST_FMA_64 = 0x0a, + SQ_OP3_INST_LERP_UINT = 0x0b, + SQ_OP3_INST_BIT_ALIGN_INT = 0x0c, + SQ_OP3_INST_BYTE_ALIGN_INT = 0x0d, + SQ_OP3_INST_SAD_ACCUM_UINT = 0x0e, + SQ_OP3_INST_SAD_ACCUM_HI_UINT = 0x0f, + SQ_OP3_INST_MULADD_UINT24 = 0x10, + SQ_OP3_INST_LDS_IDX_OP = 0x11, + SQ_OP3_INST_MULADD = 0x14, + SQ_OP3_INST_MULADD_M2 = 0x15, + SQ_OP3_INST_MULADD_M4 = 0x16, + SQ_OP3_INST_MULADD_D2 = 0x17, + SQ_OP3_INST_MULADD_IEEE = 0x18, + SQ_OP3_INST_CNDE = 0x19, + SQ_OP3_INST_CNDGT = 0x1a, + SQ_OP3_INST_CNDGE = 0x1b, + SQ_OP3_INST_CNDE_INT = 0x1c, + SQ_OP3_INST_CNDGT_INT = 0x1d, + SQ_OP3_INST_CNDGE_INT = 0x1e, + SQ_OP3_INST_MUL_LIT = 0x1f, + SQ_ALU_WORD1_LDS_DIRECT_LITERAL_LO = 0x00008dfc, + OFFSET_A_mask = 0x1fff << 0, + OFFSET_A_shift = 0, + STRIDE_A_mask = 0x7f << 13, + STRIDE_A_shift = 13, + THREAD_REL_A_bit = 1 << 22, + SQ_TEX_WORD2 = 0x00008dfc, + OFFSET_X_mask = 0x1f << 0, + OFFSET_X_shift = 0, + OFFSET_Y_mask = 0x1f << 5, + OFFSET_Y_shift = 5, + OFFSET_Z_mask = 0x1f << 10, + OFFSET_Z_shift = 10, + SAMPLER_ID_mask = 0x1f << 15, + SAMPLER_ID_shift = 15, + SQ_TEX_WORD2__SRC_SEL_X_mask = 0x07 << 20, + SQ_TEX_WORD2__SRC_SEL_X_shift = 20, + SQ_SEL_X = 0x00, + SQ_SEL_Y = 0x01, + SQ_SEL_Z = 0x02, + SQ_SEL_W = 0x03, + SQ_SEL_0 = 0x04, + SQ_SEL_1 = 0x05, + SRC_SEL_Y_mask = 0x07 << 23, + SRC_SEL_Y_shift = 23, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SRC_SEL_Z_mask = 0x07 << 26, + SRC_SEL_Z_shift = 26, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SRC_SEL_W_mask = 0x07 << 29, + SRC_SEL_W_shift = 29, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc, + BURST_COUNT_mask = 0x0f << 16, + BURST_COUNT_shift = 16, + VALID_PIXEL_MODE_bit = 1 << 20, + END_OF_PROGRAM_bit = 1 << 21, + SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0xff << 22, + SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_shift = 22, + SQ_CF_INST_MEM_STREAM0_BUF0 = 0x40, + SQ_CF_INST_MEM_STREAM0_BUF1 = 0x41, + SQ_CF_INST_MEM_STREAM0_BUF2 = 0x42, + SQ_CF_INST_MEM_STREAM0_BUF3 = 0x43, + SQ_CF_INST_MEM_STREAM1_BUF0 = 0x44, + SQ_CF_INST_MEM_STREAM1_BUF1 = 0x45, + SQ_CF_INST_MEM_STREAM1_BUF2 = 0x46, + SQ_CF_INST_MEM_STREAM1_BUF3 = 0x47, + SQ_CF_INST_MEM_STREAM2_BUF0 = 0x48, + SQ_CF_INST_MEM_STREAM2_BUF1 = 0x49, + SQ_CF_INST_MEM_STREAM2_BUF2 = 0x4a, + SQ_CF_INST_MEM_STREAM2_BUF3 = 0x4b, + SQ_CF_INST_MEM_STREAM3_BUF0 = 0x4c, + SQ_CF_INST_MEM_STREAM3_BUF1 = 0x4d, + SQ_CF_INST_MEM_STREAM3_BUF2 = 0x4e, + SQ_CF_INST_MEM_STREAM3_BUF3 = 0x4f, + SQ_CF_INST_MEM_SCRATCH = 0x50, + SQ_CF_INST_MEM_RING = 0x52, + SQ_CF_INST_EXPORT = 0x53, + SQ_CF_INST_EXPORT_DONE = 0x54, + SQ_CF_INST_MEM_EXPORT = 0x55, + SQ_CF_INST_MEM_RAT = 0x56, + SQ_CF_INST_MEM_RAT_CACHELESS = 0x57, + SQ_CF_INST_MEM_RING1 = 0x58, + SQ_CF_INST_MEM_RING2 = 0x59, + SQ_CF_INST_MEM_RING3 = 0x5a, + SQ_CF_INST_MEM_EXPORT_COMBINED = 0x5b, + SQ_CF_INST_MEM_RAT_COMBINED_CACHELESS = 0x5c, + MARK_bit = 1 << 30, + BARRIER_bit = 1 << 31, + SQ_CF_ALU_WORD1 = 0x00008dfc, + KCACHE_MODE1_mask = 0x03 << 0, + KCACHE_MODE1_shift = 0, + SQ_CF_KCACHE_NOP = 0x00, + SQ_CF_KCACHE_LOCK_1 = 0x01, + SQ_CF_KCACHE_LOCK_2 = 0x02, + SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, + KCACHE_ADDR0_mask = 0xff << 2, + KCACHE_ADDR0_shift = 2, + KCACHE_ADDR1_mask = 0xff << 10, + KCACHE_ADDR1_shift = 10, + SQ_CF_ALU_WORD1__COUNT_mask = 0x7f << 18, + SQ_CF_ALU_WORD1__COUNT_shift = 18, + SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25, + SQ_CF_ALU_WORD1__CF_INST_mask = 0x0f << 26, + SQ_CF_ALU_WORD1__CF_INST_shift = 26, + SQ_CF_INST_ALU = 0x08, + SQ_CF_INST_ALU_PUSH_BEFORE = 0x09, + SQ_CF_INST_ALU_POP_AFTER = 0x0a, + SQ_CF_INST_ALU_POP2_AFTER = 0x0b, + SQ_CF_INST_ALU_EXTENDED = 0x0c, + SQ_CF_INST_ALU_CONTINUE = 0x0d, + SQ_CF_INST_ALU_BREAK = 0x0e, + SQ_CF_INST_ALU_ELSE_AFTER = 0x0f, + WHOLE_QUAD_MODE_bit = 1 << 30, +/* BARRIER_bit = 1 << 31, */ + SQ_TEX_WORD1 = 0x00008dfc, + SQ_TEX_WORD1__DST_GPR_mask = 0x7f << 0, + SQ_TEX_WORD1__DST_GPR_shift = 0, + SQ_TEX_WORD1__DST_REL_bit = 1 << 7, + SQ_TEX_WORD1__DST_SEL_X_mask = 0x07 << 9, + SQ_TEX_WORD1__DST_SEL_X_shift = 9, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_SEL_MASK = 0x07, + SQ_TEX_WORD1__DST_SEL_Y_mask = 0x07 << 12, + SQ_TEX_WORD1__DST_SEL_Y_shift = 12, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_TEX_WORD1__DST_SEL_Z_mask = 0x07 << 15, + SQ_TEX_WORD1__DST_SEL_Z_shift = 15, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_TEX_WORD1__DST_SEL_W_mask = 0x07 << 18, + SQ_TEX_WORD1__DST_SEL_W_shift = 18, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_TEX_WORD1__LOD_BIAS_mask = 0x7f << 21, + SQ_TEX_WORD1__LOD_BIAS_shift = 21, + COORD_TYPE_X_bit = 1 << 28, + COORD_TYPE_Y_bit = 1 << 29, + COORD_TYPE_Z_bit = 1 << 30, + COORD_TYPE_W_bit = 1 << 31, + SQ_VTX_WORD0 = 0x00008dfc, + VTX_INST_mask = 0x1f << 0, + VTX_INST_shift = 0, + SQ_VTX_INST_FETCH = 0x00, + SQ_VTX_INST_SEMANTIC = 0x01, + SQ_VTX_INST_GET_BUFFER_RESINFO = 0x0e, + FETCH_TYPE_mask = 0x03 << 5, + FETCH_TYPE_shift = 5, + SQ_VTX_FETCH_VERTEX_DATA = 0x00, + SQ_VTX_FETCH_INSTANCE_DATA = 0x01, + SQ_VTX_FETCH_NO_INDEX_OFFSET = 0x02, + FETCH_WHOLE_QUAD_bit = 1 << 7, + BUFFER_ID_mask = 0xff << 8, + BUFFER_ID_shift = 8, + SQ_VTX_WORD0__SRC_GPR_mask = 0x7f << 16, + SQ_VTX_WORD0__SRC_GPR_shift = 16, + SRC_REL_bit = 1 << 23, + SQ_VTX_WORD0__SRC_SEL_X_mask = 0x03 << 24, + SQ_VTX_WORD0__SRC_SEL_X_shift = 24, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ + MEGA_FETCH_COUNT_mask = 0x3f << 26, + MEGA_FETCH_COUNT_shift = 26, + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ = 0x00008dfc, + SEL_X_mask = 0x07 << 0, + SEL_X_shift = 0, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SEL_Y_mask = 0x07 << 3, + SEL_Y_shift = 3, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SEL_Z_mask = 0x07 << 6, + SEL_Z_shift = 6, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SEL_W_mask = 0x07 << 9, + SEL_W_shift = 9, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_MEM_RD_WORD0 = 0x00008dfc, + MEM_INST_mask = 0x1f << 0, + MEM_INST_shift = 0, + SQ_MEM_INST_MEM = 0x02, + SQ_MEM_RD_WORD0__ELEM_SIZE_mask = 0x03 << 5, + SQ_MEM_RD_WORD0__ELEM_SIZE_shift = 5, +/* FETCH_WHOLE_QUAD_bit = 1 << 7, */ + MEM_OP_mask = 0x07 << 8, + MEM_OP_shift = 8, + SQ_MEM_OP_RD_SCRATCH = 0x00, + SQ_MEM_OP_RD_SCATTER = 0x02, + SQ_MEM_OP_GDS = 0x04, + SQ_MEM_OP_TF_WRITE = 0x05, + SQ_MEM_RD_WORD0__UNCACHED_bit = 1 << 11, + INDEXED_bit = 1 << 12, + SQ_MEM_RD_WORD0__SRC_GPR_mask = 0x7f << 16, + SQ_MEM_RD_WORD0__SRC_GPR_shift = 16, +/* SRC_REL_bit = 1 << 23, */ + SQ_MEM_RD_WORD0__SRC_SEL_X_mask = 0x03 << 24, + SQ_MEM_RD_WORD0__SRC_SEL_X_shift = 24, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ + BURST_CNT_mask = 0x0f << 26, + BURST_CNT_shift = 26, + SQ_ALU_WORD1 = 0x00008dfc, + SQ_ALU_WORD1__ENCODING_mask = 0x07 << 15, + SQ_ALU_WORD1__ENCODING_shift = 15, + BANK_SWIZZLE_mask = 0x07 << 18, + BANK_SWIZZLE_shift = 18, + SQ_ALU_VEC_012 = 0x00, + SQ_ALU_VEC_021 = 0x01, + SQ_ALU_VEC_120 = 0x02, + SQ_ALU_VEC_102 = 0x03, + SQ_ALU_VEC_201 = 0x04, + SQ_ALU_VEC_210 = 0x05, + SQ_ALU_WORD1__DST_GPR_mask = 0x7f << 21, + SQ_ALU_WORD1__DST_GPR_shift = 21, + SQ_ALU_WORD1__DST_REL_bit = 1 << 28, + DST_CHAN_mask = 0x03 << 29, + DST_CHAN_shift = 29, + CHAN_X = 0x00, + CHAN_Y = 0x01, + CHAN_Z = 0x02, + CHAN_W = 0x03, + SQ_ALU_WORD1__CLAMP_bit = 1 << 31, + SQ_CF_ALU_WORD0_EXT = 0x00008dfc, + KCACHE_BANK_INDEX_MODE0_mask = 0x03 << 4, + KCACHE_BANK_INDEX_MODE0_shift = 4, + SQ_CF_INDEX_NONE = 0x00, + SQ_CF_INDEX_0 = 0x01, + SQ_CF_INDEX_1 = 0x02, + SQ_CF_INVALID = 0x03, + KCACHE_BANK_INDEX_MODE1_mask = 0x03 << 6, + KCACHE_BANK_INDEX_MODE1_shift = 6, +/* SQ_CF_INDEX_NONE = 0x00, */ +/* SQ_CF_INDEX_0 = 0x01, */ +/* SQ_CF_INDEX_1 = 0x02, */ +/* SQ_CF_INVALID = 0x03, */ + KCACHE_BANK_INDEX_MODE2_mask = 0x03 << 8, + KCACHE_BANK_INDEX_MODE2_shift = 8, +/* SQ_CF_INDEX_NONE = 0x00, */ +/* SQ_CF_INDEX_0 = 0x01, */ +/* SQ_CF_INDEX_1 = 0x02, */ +/* SQ_CF_INVALID = 0x03, */ + KCACHE_BANK_INDEX_MODE3_mask = 0x03 << 10, + KCACHE_BANK_INDEX_MODE3_shift = 10, +/* SQ_CF_INDEX_NONE = 0x00, */ +/* SQ_CF_INDEX_0 = 0x01, */ +/* SQ_CF_INDEX_1 = 0x02, */ +/* SQ_CF_INVALID = 0x03, */ + KCACHE_BANK2_mask = 0x0f << 22, + KCACHE_BANK2_shift = 22, + KCACHE_BANK3_mask = 0x0f << 26, + KCACHE_BANK3_shift = 26, + KCACHE_MODE2_mask = 0x03 << 30, + KCACHE_MODE2_shift = 30, +/* SQ_CF_KCACHE_NOP = 0x00, */ +/* SQ_CF_KCACHE_LOCK_1 = 0x01, */ +/* SQ_CF_KCACHE_LOCK_2 = 0x02, */ +/* SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, */ + SQ_ALU_WORD0_LDS_IDX_OP = 0x00008dfc, + SRC0_SEL_mask = 0x1ff << 0, + SRC0_SEL_shift = 0, +/* SQ_ALU_SRC_LDS_OQ_A = 0xdb, */ +/* SQ_ALU_SRC_LDS_OQ_B = 0xdc, */ +/* SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, */ +/* SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, */ +/* SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, */ +/* SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, */ +/* SQ_ALU_SRC_TIME_HI = 0xe3, */ +/* SQ_ALU_SRC_TIME_LO = 0xe4, */ +/* SQ_ALU_SRC_MASK_HI = 0xe5, */ +/* SQ_ALU_SRC_MASK_LO = 0xe6, */ +/* SQ_ALU_SRC_HW_WAVE_ID = 0xe7, */ +/* SQ_ALU_SRC_SIMD_ID = 0xe8, */ +/* SQ_ALU_SRC_SE_ID = 0xe9, */ +/* SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, */ +/* SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, */ +/* SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, */ +/* SQ_ALU_SRC_HW_ALU_ODD = 0xed, */ +/* SQ_ALU_SRC_LOOP_IDX = 0xee, */ +/* SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, */ +/* SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, */ +/* SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, */ +/* SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, */ +/* SQ_ALU_SRC_1_DBL_L = 0xf4, */ +/* SQ_ALU_SRC_1_DBL_M = 0xf5, */ +/* SQ_ALU_SRC_0_5_DBL_L = 0xf6, */ +/* SQ_ALU_SRC_0_5_DBL_M = 0xf7, */ +/* SQ_ALU_SRC_0 = 0xf8, */ +/* SQ_ALU_SRC_1 = 0xf9, */ +/* SQ_ALU_SRC_1_INT = 0xfa, */ +/* SQ_ALU_SRC_M_1_INT = 0xfb, */ +/* SQ_ALU_SRC_0_5 = 0xfc, */ +/* SQ_ALU_SRC_LITERAL = 0xfd, */ +/* SQ_ALU_SRC_PV = 0xfe, */ +/* SQ_ALU_SRC_PS = 0xff, */ + SRC0_REL_bit = 1 << 9, + SRC0_CHAN_mask = 0x03 << 10, + SRC0_CHAN_shift = 10, +/* SQ_CHAN_X = 0x00, */ +/* SQ_CHAN_Y = 0x01, */ +/* SQ_CHAN_Z = 0x02, */ +/* SQ_CHAN_W = 0x03, */ + IDX_OFFSET_4_bit = 1 << 12, + SRC1_SEL_mask = 0x1ff << 13, + SRC1_SEL_shift = 13, +/* SQ_ALU_SRC_LDS_OQ_A = 0xdb, */ +/* SQ_ALU_SRC_LDS_OQ_B = 0xdc, */ +/* SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, */ +/* SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, */ +/* SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, */ +/* SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, */ +/* SQ_ALU_SRC_TIME_HI = 0xe3, */ +/* SQ_ALU_SRC_TIME_LO = 0xe4, */ +/* SQ_ALU_SRC_MASK_HI = 0xe5, */ +/* SQ_ALU_SRC_MASK_LO = 0xe6, */ +/* SQ_ALU_SRC_HW_WAVE_ID = 0xe7, */ +/* SQ_ALU_SRC_SIMD_ID = 0xe8, */ +/* SQ_ALU_SRC_SE_ID = 0xe9, */ +/* SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, */ +/* SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, */ +/* SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, */ +/* SQ_ALU_SRC_HW_ALU_ODD = 0xed, */ +/* SQ_ALU_SRC_LOOP_IDX = 0xee, */ +/* SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, */ +/* SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, */ +/* SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, */ +/* SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, */ +/* SQ_ALU_SRC_1_DBL_L = 0xf4, */ +/* SQ_ALU_SRC_1_DBL_M = 0xf5, */ +/* SQ_ALU_SRC_0_5_DBL_L = 0xf6, */ +/* SQ_ALU_SRC_0_5_DBL_M = 0xf7, */ +/* SQ_ALU_SRC_0 = 0xf8, */ +/* SQ_ALU_SRC_1 = 0xf9, */ +/* SQ_ALU_SRC_1_INT = 0xfa, */ +/* SQ_ALU_SRC_M_1_INT = 0xfb, */ +/* SQ_ALU_SRC_0_5 = 0xfc, */ +/* SQ_ALU_SRC_LITERAL = 0xfd, */ +/* SQ_ALU_SRC_PV = 0xfe, */ +/* SQ_ALU_SRC_PS = 0xff, */ + SRC1_REL_bit = 1 << 22, + SRC1_CHAN_mask = 0x03 << 23, + SRC1_CHAN_shift = 23, +/* SQ_CHAN_X = 0x00, */ +/* SQ_CHAN_Y = 0x01, */ +/* SQ_CHAN_Z = 0x02, */ +/* SQ_CHAN_W = 0x03, */ + IDX_OFFSET_5_bit = 1 << 25, + INDEX_MODE_mask = 0x07 << 26, + INDEX_MODE_shift = 26, + SQ_INDEX_AR_X = 0x00, + SQ_INDEX_LOOP = 0x04, + SQ_INDEX_GLOBAL = 0x05, + SQ_INDEX_GLOBAL_AR_X = 0x06, + PRED_SEL_mask = 0x03 << 29, + PRED_SEL_shift = 29, + SQ_PRED_SEL_OFF = 0x00, + SQ_PRED_SEL_ZERO = 0x02, + SQ_PRED_SEL_ONE = 0x03, + LAST_bit = 1 << 31, + SQ_MEM_GDS_WORD2 = 0x00008dfc, + SQ_MEM_GDS_WORD2__DST_SEL_X_mask = 0x07 << 0, + SQ_MEM_GDS_WORD2__DST_SEL_X_shift = 0, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_MEM_GDS_WORD2__DST_SEL_Y_mask = 0x07 << 3, + SQ_MEM_GDS_WORD2__DST_SEL_Y_shift = 3, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_MEM_GDS_WORD2__DST_SEL_Z_mask = 0x07 << 6, + SQ_MEM_GDS_WORD2__DST_SEL_Z_shift = 6, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_MEM_GDS_WORD2__DST_SEL_W_mask = 0x07 << 9, + SQ_MEM_GDS_WORD2__DST_SEL_W_shift = 9, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_CF_ALLOC_EXPORT_WORD0_RAT = 0x00008dfc, + RAT_ID_mask = 0x0f << 0, + RAT_ID_shift = 0, + RAT_INST_mask = 0x3f << 4, + RAT_INST_shift = 4, + SQ_EXPORT_RAT_INST_NOP = 0x00, + SQ_EXPORT_RAT_INST_STORE_TYPED = 0x01, + SQ_EXPORT_RAT_INST_STORE_RAW = 0x02, + SQ_EXPORT_RAT_INST_STORE_RAW_FDENORM = 0x03, + SQ_EXPORT_RAT_INST_CMPXCHG_INT = 0x04, + SQ_EXPORT_RAT_INST_CMPXCHG_FLT = 0x05, + SQ_EXPORT_RAT_INST_CMPXCHG_FDENORM = 0x06, + SQ_EXPORT_RAT_INST_ADD = 0x07, + SQ_EXPORT_RAT_INST_SUB = 0x08, + SQ_EXPORT_RAT_INST_RSUB = 0x09, + SQ_EXPORT_RAT_INST_MIN_INT = 0x0a, + SQ_EXPORT_RAT_INST_MIN_UINT = 0x0b, + SQ_EXPORT_RAT_INST_MAX_INT = 0x0c, + SQ_EXPORT_RAT_INST_MAX_UINT = 0x0d, + SQ_EXPORT_RAT_INST_AND = 0x0e, + SQ_EXPORT_RAT_INST_OR = 0x0f, + SQ_EXPORT_RAT_INST_XOR = 0x10, + SQ_EXPORT_RAT_INST_MSKOR = 0x11, + SQ_EXPORT_RAT_INST_INC_UINT = 0x12, + SQ_EXPORT_RAT_INST_DEC_UINT = 0x13, + SQ_EXPORT_RAT_INST_NOP_RTN = 0x20, + SQ_EXPORT_RAT_INST_XCHG_RTN = 0x22, + SQ_EXPORT_RAT_INST_XCHG_FDENORM_RTN = 0x23, + SQ_EXPORT_RAT_INST_CMPXCHG_INT_RTN = 0x24, + SQ_EXPORT_RAT_INST_CMPXCHG_FLT_RTN = 0x25, + SQ_EXPORT_RAT_INST_CMPXCHG_FDENORM_RTN = 0x26, + SQ_EXPORT_RAT_INST_ADD_RTN = 0x27, + SQ_EXPORT_RAT_INST_SUB_RTN = 0x28, + SQ_EXPORT_RAT_INST_RSUB_RTN = 0x29, + SQ_EXPORT_RAT_INST_MIN_INT_RTN = 0x2a, + SQ_EXPORT_RAT_INST_MIN_UINT_RTN = 0x2b, + SQ_EXPORT_RAT_INST_MAX_INT_RTN = 0x2c, + SQ_EXPORT_RAT_INST_MAX_UINT_RTN = 0x2d, + SQ_EXPORT_RAT_INST_AND_RTN = 0x2e, + SQ_EXPORT_RAT_INST_OR_RTN = 0x2f, + SQ_EXPORT_RAT_INST_XOR_RTN = 0x30, + SQ_EXPORT_RAT_INST_MSKOR_RTN = 0x31, + SQ_EXPORT_RAT_INST_INC_UINT_RTN = 0x32, + SQ_EXPORT_RAT_INST_DEC_UINT_RTN = 0x33, + RAT_INDEX_MODE_mask = 0x03 << 11, + RAT_INDEX_MODE_shift = 11, +/* SQ_CF_INDEX_NONE = 0x00, */ +/* SQ_CF_INDEX_0 = 0x01, */ +/* SQ_CF_INDEX_1 = 0x02, */ +/* SQ_CF_INVALID = 0x03, */ + SQ_CF_ALLOC_EXPORT_WORD0_RAT__TYPE_mask = 0x03 << 13, + SQ_CF_ALLOC_EXPORT_WORD0_RAT__TYPE_shift = 13, + SQ_EXPORT_PIXEL = 0x00, + SQ_EXPORT_POS = 0x01, + SQ_EXPORT_PARAM = 0x02, + X_UNUSED_FOR_SX_EXPORTS = 0x03, + RW_GPR_mask = 0x7f << 15, + RW_GPR_shift = 15, + RW_REL_bit = 1 << 22, + INDEX_GPR_mask = 0x7f << 23, + INDEX_GPR_shift = 23, + SQ_CF_ALLOC_EXPORT_WORD0_RAT__ELEM_SIZE_mask = 0x03 << 30, + SQ_CF_ALLOC_EXPORT_WORD0_RAT__ELEM_SIZE_shift = 30, + SQ_CF_ALU_WORD0 = 0x00008dfc, + SQ_CF_ALU_WORD0__ADDR_mask = 0x3fffff << 0, + SQ_CF_ALU_WORD0__ADDR_shift = 0, + KCACHE_BANK0_mask = 0x0f << 22, + KCACHE_BANK0_shift = 22, + KCACHE_BANK1_mask = 0x0f << 26, + KCACHE_BANK1_shift = 26, + KCACHE_MODE0_mask = 0x03 << 30, + KCACHE_MODE0_shift = 30, +/* SQ_CF_KCACHE_NOP = 0x00, */ +/* SQ_CF_KCACHE_LOCK_1 = 0x01, */ +/* SQ_CF_KCACHE_LOCK_2 = 0x02, */ +/* SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, */ + SQ_MEM_GDS_WORD1 = 0x00008dfc, + SQ_MEM_GDS_WORD1__DST_GPR_mask = 0x7f << 0, + SQ_MEM_GDS_WORD1__DST_GPR_shift = 0, + DST_REL_MODE_mask = 0x03 << 7, + DST_REL_MODE_shift = 7, + SQ_REL_NONE = 0x00, + SQ_REL_LOOP = 0x01, + SQ_REL_GLOBAL = 0x02, + GDS_OP_mask = 0x3f << 9, + GDS_OP_shift = 9, + SQ_DS_INST_ADD = 0x00, + SQ_DS_INST_SUB = 0x01, + SQ_DS_INST_RSUB = 0x02, + SQ_DS_INST_INC = 0x03, + SQ_DS_INST_DEC = 0x04, + SQ_DS_INST_MIN_INT = 0x05, + SQ_DS_INST_MAX_INT = 0x06, + SQ_DS_INST_MIN_UINT = 0x07, + SQ_DS_INST_MAX_UINT = 0x08, + SQ_DS_INST_AND = 0x09, + SQ_DS_INST_OR = 0x0a, + SQ_DS_INST_XOR = 0x0b, + SQ_DS_INST_MSKOR = 0x0c, + SQ_DS_INST_WRITE = 0x0d, + SQ_DS_INST_WRITE_REL = 0x0e, + SQ_DS_INST_WRITE2 = 0x0f, + SQ_DS_INST_CMP_STORE = 0x10, + SQ_DS_INST_CMP_STORE_SPF = 0x11, + SQ_DS_INST_BYTE_WRITE = 0x12, + SQ_DS_INST_SHORT_WRITE = 0x13, + SQ_DS_INST_ADD_RET = 0x20, + SQ_DS_INST_SUB_RET = 0x21, + SQ_DS_INST_RSUB_RET = 0x22, + SQ_DS_INST_INC_RET = 0x23, + SQ_DS_INST_DEC_RET = 0x24, + SQ_DS_INST_MIN_INT_RET = 0x25, + SQ_DS_INST_MAX_INT_RET = 0x26, + SQ_DS_INST_MIN_UINT_RET = 0x27, + SQ_DS_INST_MAX_UINT_RET = 0x28, + SQ_DS_INST_AND_RET = 0x29, + SQ_DS_INST_OR_RET = 0x2a, + SQ_DS_INST_XOR_RET = 0x2b, + SQ_DS_INST_MSKOR_RET = 0x2c, + SQ_DS_INST_XCHG_RET = 0x2d, + SQ_DS_INST_XCHG_REL_RET = 0x2e, + SQ_DS_INST_XCHG2_RET = 0x2f, + SQ_DS_INST_CMP_XCHG_RET = 0x30, + SQ_DS_INST_CMP_XCHG_SPF_RET = 0x31, + SQ_DS_INST_READ_RET = 0x32, + SQ_DS_INST_READ_REL_RET = 0x33, + SQ_DS_INST_READ2_RET = 0x34, + SQ_DS_INST_READWRITE_RET = 0x35, + SQ_DS_INST_BYTE_READ_RET = 0x36, + SQ_DS_INST_UBYTE_READ_RET = 0x37, + SQ_DS_INST_SHORT_READ_RET = 0x38, + SQ_DS_INST_USHORT_READ_RET = 0x39, + SQ_DS_INST_ATOMIC_ORDERED_ALLOC_RET = 0x3f, + DS_OFFSET_mask = 0x7f << 16, + DS_OFFSET_shift = 16, + UAV_INDEX_MODE_mask = 0x03 << 24, + UAV_INDEX_MODE_shift = 24, +/* SQ_CF_INDEX_NONE = 0x00, */ +/* SQ_CF_INDEX_0 = 0x01, */ +/* SQ_CF_INDEX_1 = 0x02, */ +/* SQ_CF_INVALID = 0x03, */ + UAV_ID_mask = 0x0f << 26, + UAV_ID_shift = 26, + ALLOC_CONSUME_bit = 1 << 30, + BCAST_FIRST_REQ_bit = 1 << 31, + SQ_MEM_RD_WORD2 = 0x00008dfc, + ARRAY_BASE_mask = 0x1fff << 0, + ARRAY_BASE_shift = 0, + SQ_MEM_RD_WORD2__ENDIAN_SWAP_mask = 0x03 << 16, + SQ_MEM_RD_WORD2__ENDIAN_SWAP_shift = 16, + SQ_ENDIAN_NONE = 0x00, + SQ_ENDIAN_8IN16 = 0x01, + SQ_ENDIAN_8IN32 = 0x02, + SQ_MEM_RD_WORD2__ARRAY_SIZE_mask = 0xfff << 20, + SQ_MEM_RD_WORD2__ARRAY_SIZE_shift = 20, + SQ_CF_ALU_WORD1_EXT = 0x00008dfc, + KCACHE_MODE3_mask = 0x03 << 0, + KCACHE_MODE3_shift = 0, +/* SQ_CF_KCACHE_NOP = 0x00, */ +/* SQ_CF_KCACHE_LOCK_1 = 0x01, */ +/* SQ_CF_KCACHE_LOCK_2 = 0x02, */ +/* SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, */ + KCACHE_ADDR2_mask = 0xff << 2, + KCACHE_ADDR2_shift = 2, + KCACHE_ADDR3_mask = 0xff << 10, + KCACHE_ADDR3_shift = 10, + SQ_CF_ALU_WORD1_EXT__CF_INST_mask = 0x0f << 26, + SQ_CF_ALU_WORD1_EXT__CF_INST_shift = 26, +/* SQ_CF_INST_ALU = 0x08, */ +/* SQ_CF_INST_ALU_PUSH_BEFORE = 0x09, */ +/* SQ_CF_INST_ALU_POP_AFTER = 0x0a, */ +/* SQ_CF_INST_ALU_POP2_AFTER = 0x0b, */ +/* SQ_CF_INST_ALU_EXTENDED = 0x0c, */ +/* SQ_CF_INST_ALU_CONTINUE = 0x0d, */ +/* SQ_CF_INST_ALU_BREAK = 0x0e, */ +/* SQ_CF_INST_ALU_ELSE_AFTER = 0x0f, */ +/* BARRIER_bit = 1 << 31, */ + SQ_CF_GWS_WORD0 = 0x00008dfc, + VALUE_mask = 0x3ff << 0, + VALUE_shift = 0, + RESOURCE_mask = 0x1f << 16, + RESOURCE_shift = 16, + SIGN_bit = 1 << 25, + VAL_INDEX_MODE_mask = 0x03 << 26, + VAL_INDEX_MODE_shift = 26, + SQ_GWS_INDEX_NONE = 0x00, + SQ_GWS_INDEX_0 = 0x01, + SQ_GWS_INDEX_1 = 0x02, + SQ_GWS_INDEX_MIX = 0x03, + RSRC_INDEX_MODE_mask = 0x03 << 28, + RSRC_INDEX_MODE_shift = 28, +/* SQ_CF_INDEX_NONE = 0x00, */ +/* SQ_CF_INDEX_0 = 0x01, */ +/* SQ_CF_INDEX_1 = 0x02, */ +/* SQ_CF_INVALID = 0x03, */ + GWS_OPCODE_mask = 0x03 << 30, + GWS_OPCODE_shift = 30, + SQ_GWS_SEMA_V = 0x00, + SQ_GWS_SEMA_P = 0x01, + SQ_GWS_BARRIER = 0x02, + SQ_GWS_INIT = 0x03, + SQ_VTX_WORD2 = 0x00008dfc, + SQ_VTX_WORD2__OFFSET_mask = 0xffff << 0, + SQ_VTX_WORD2__OFFSET_shift = 0, + SQ_VTX_WORD2__ENDIAN_SWAP_mask = 0x03 << 16, + SQ_VTX_WORD2__ENDIAN_SWAP_shift = 16, +/* SQ_ENDIAN_NONE = 0x00, */ +/* SQ_ENDIAN_8IN16 = 0x01, */ +/* SQ_ENDIAN_8IN32 = 0x02, */ + CONST_BUF_NO_STRIDE_bit = 1 << 18, + MEGA_FETCH_bit = 1 << 19, + SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20, + BUFFER_INDEX_MODE_mask = 0x03 << 21, + BUFFER_INDEX_MODE_shift = 21, +/* SQ_CF_INDEX_NONE = 0x00, */ +/* SQ_CF_INDEX_0 = 0x01, */ +/* SQ_CF_INDEX_1 = 0x02, */ +/* SQ_CF_INVALID = 0x03, */ + SQ_CF_ALLOC_EXPORT_WORD1_BUF = 0x00008dfc, + SQ_CF_ALLOC_EXPORT_WORD1_BUF__ARRAY_SIZE_mask = 0xfff << 0, + SQ_CF_ALLOC_EXPORT_WORD1_BUF__ARRAY_SIZE_shift = 0, + COMP_MASK_mask = 0x0f << 12, + COMP_MASK_shift = 12, + SQ_CF_WORD0 = 0x00008dfc, + SQ_CF_WORD0__ADDR_mask = 0xffffff << 0, + SQ_CF_WORD0__ADDR_shift = 0, + JUMPTABLE_SEL_mask = 0x07 << 24, + JUMPTABLE_SEL_shift = 24, + SQ_CF_JUMPTABLE_SEL_CONST_A = 0x00, + SQ_CF_JUMPTABLE_SEL_CONST_B = 0x01, + SQ_CF_JUMPTABLE_SEL_CONST_C = 0x02, + SQ_CF_JUMPTABLE_SEL_CONST_D = 0x03, + SQ_CF_JUMPTABLE_SEL_INDEX_0 = 0x04, + SQ_CF_JUMPTABLE_SEL_INDEX_1 = 0x05, + SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc, +/* ARRAY_BASE_mask = 0x1fff << 0, */ +/* ARRAY_BASE_shift = 0, */ + SQ_CF_ALLOC_EXPORT_WORD0__TYPE_mask = 0x03 << 13, + SQ_CF_ALLOC_EXPORT_WORD0__TYPE_shift = 13, +/* SQ_EXPORT_PIXEL = 0x00, */ +/* SQ_EXPORT_POS = 0x01, */ +/* SQ_EXPORT_PARAM = 0x02, */ +/* X_UNUSED_FOR_SX_EXPORTS = 0x03, */ +/* RW_GPR_mask = 0x7f << 15, */ +/* RW_GPR_shift = 15, */ +/* RW_REL_bit = 1 << 22, */ +/* INDEX_GPR_mask = 0x7f << 23, */ +/* INDEX_GPR_shift = 23, */ + SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE_mask = 0x03 << 30, + SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE_shift = 30, + SQ_MEM_GDS_WORD0 = 0x00008dfc, +/* MEM_INST_mask = 0x1f << 0, */ +/* MEM_INST_shift = 0, */ +/* SQ_MEM_INST_MEM = 0x02, */ +/* MEM_OP_mask = 0x07 << 8, */ +/* MEM_OP_shift = 8, */ +/* SQ_MEM_OP_RD_SCRATCH = 0x00, */ +/* SQ_MEM_OP_RD_SCATTER = 0x02, */ +/* SQ_MEM_OP_GDS = 0x04, */ +/* SQ_MEM_OP_TF_WRITE = 0x05, */ + SQ_MEM_GDS_WORD0__SRC_GPR_mask = 0x7f << 11, + SQ_MEM_GDS_WORD0__SRC_GPR_shift = 11, + SRC_REL_MODE_mask = 0x03 << 18, + SRC_REL_MODE_shift = 18, +/* SQ_REL_NONE = 0x00, */ +/* SQ_REL_LOOP = 0x01, */ +/* SQ_REL_GLOBAL = 0x02, */ + SQ_MEM_GDS_WORD0__SRC_SEL_X_mask = 0x07 << 20, + SQ_MEM_GDS_WORD0__SRC_SEL_X_shift = 20, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SRC_SEL_Y_mask = 0x07 << 23, */ +/* SRC_SEL_Y_shift = 23, */ +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SRC_SEL_Z_mask = 0x07 << 26, */ +/* SRC_SEL_Z_shift = 26, */ +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_ALU_WORD1_LDS_DIRECT_LITERAL_HI = 0x00008dfc, + OFFSET_B_mask = 0x1fff << 0, + OFFSET_B_shift = 0, + STRIDE_B_mask = 0x7f << 13, + STRIDE_B_shift = 13, + THREAD_REL_B_bit = 1 << 22, + DIRECT_READ_32_bit = 1 << 31, + SQ_VTX_WORD1 = 0x00008dfc, + SQ_VTX_WORD1__DST_SEL_X_mask = 0x07 << 9, + SQ_VTX_WORD1__DST_SEL_X_shift = 9, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_VTX_WORD1__DST_SEL_Y_mask = 0x07 << 12, + SQ_VTX_WORD1__DST_SEL_Y_shift = 12, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_VTX_WORD1__DST_SEL_Z_mask = 0x07 << 15, + SQ_VTX_WORD1__DST_SEL_Z_shift = 15, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_VTX_WORD1__DST_SEL_W_mask = 0x07 << 18, + SQ_VTX_WORD1__DST_SEL_W_shift = 18, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + USE_CONST_FIELDS_bit = 1 << 21, + SQ_VTX_WORD1__DATA_FORMAT_mask = 0x3f << 22, + SQ_VTX_WORD1__DATA_FORMAT_shift = 22, + SQ_VTX_WORD1__NUM_FORMAT_ALL_mask = 0x03 << 28, + SQ_VTX_WORD1__NUM_FORMAT_ALL_shift = 28, + SQ_NUM_FORMAT_NORM = 0x00, + SQ_NUM_FORMAT_INT = 0x01, + SQ_NUM_FORMAT_SCALED = 0x02, + SQ_VTX_WORD1__FORMAT_COMP_ALL_bit = 1 << 30, + SQ_VTX_WORD1__SRF_MODE_ALL_bit = 1 << 31, + SQ_ALU_WORD1_OP2 = 0x00008dfc, + SRC0_ABS_bit = 1 << 0, + SRC1_ABS_bit = 1 << 1, + UPDATE_EXECUTE_MASK_bit = 1 << 2, + UPDATE_PRED_bit = 1 << 3, + WRITE_MASK_bit = 1 << 4, + OMOD_mask = 0x03 << 5, + OMOD_shift = 5, + SQ_ALU_OMOD_OFF = 0x00, + SQ_ALU_OMOD_M2 = 0x01, + SQ_ALU_OMOD_M4 = 0x02, + SQ_ALU_OMOD_D2 = 0x03, + SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x7ff << 7, + SQ_ALU_WORD1_OP2__ALU_INST_shift = 7, + SQ_OP2_INST_ADD = 0x00, + SQ_OP2_INST_MUL = 0x01, + SQ_OP2_INST_MUL_IEEE = 0x02, + SQ_OP2_INST_MAX = 0x03, + SQ_OP2_INST_MIN = 0x04, + SQ_OP2_INST_MAX_DX10 = 0x05, + SQ_OP2_INST_MIN_DX10 = 0x06, + SQ_OP2_INST_SETE = 0x08, + SQ_OP2_INST_SETGT = 0x09, + SQ_OP2_INST_SETGE = 0x0a, + SQ_OP2_INST_SETNE = 0x0b, + SQ_OP2_INST_SETE_DX10 = 0x0c, + SQ_OP2_INST_SETGT_DX10 = 0x0d, + SQ_OP2_INST_SETGE_DX10 = 0x0e, + SQ_OP2_INST_SETNE_DX10 = 0x0f, + SQ_OP2_INST_FRACT = 0x10, + SQ_OP2_INST_TRUNC = 0x11, + SQ_OP2_INST_CEIL = 0x12, + SQ_OP2_INST_RNDNE = 0x13, + SQ_OP2_INST_FLOOR = 0x14, + SQ_OP2_INST_ASHR_INT = 0x15, + SQ_OP2_INST_LSHR_INT = 0x16, + SQ_OP2_INST_LSHL_INT = 0x17, + SQ_OP2_INST_MOV = 0x19, + SQ_OP2_INST_NOP = 0x1a, + SQ_OP2_INST_PRED_SETGT_UINT = 0x1e, + SQ_OP2_INST_PRED_SETGE_UINT = 0x1f, + SQ_OP2_INST_PRED_SETE = 0x20, + SQ_OP2_INST_PRED_SETGT = 0x21, + SQ_OP2_INST_PRED_SETGE = 0x22, + SQ_OP2_INST_PRED_SETNE = 0x23, + SQ_OP2_INST_PRED_SET_INV = 0x24, + SQ_OP2_INST_PRED_SET_POP = 0x25, + SQ_OP2_INST_PRED_SET_CLR = 0x26, + SQ_OP2_INST_PRED_SET_RESTORE = 0x27, + SQ_OP2_INST_PRED_SETE_PUSH = 0x28, + SQ_OP2_INST_PRED_SETGT_PUSH = 0x29, + SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a, + SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b, + SQ_OP2_INST_KILLE = 0x2c, + SQ_OP2_INST_KILLGT = 0x2d, + SQ_OP2_INST_KILLGE = 0x2e, + SQ_OP2_INST_KILLNE = 0x2f, + SQ_OP2_INST_AND_INT = 0x30, + SQ_OP2_INST_OR_INT = 0x31, + SQ_OP2_INST_XOR_INT = 0x32, + SQ_OP2_INST_NOT_INT = 0x33, + SQ_OP2_INST_ADD_INT = 0x34, + SQ_OP2_INST_SUB_INT = 0x35, + SQ_OP2_INST_MAX_INT = 0x36, + SQ_OP2_INST_MIN_INT = 0x37, + SQ_OP2_INST_MAX_UINT = 0x38, + SQ_OP2_INST_MIN_UINT = 0x39, + SQ_OP2_INST_SETE_INT = 0x3a, + SQ_OP2_INST_SETGT_INT = 0x3b, + SQ_OP2_INST_SETGE_INT = 0x3c, + SQ_OP2_INST_SETNE_INT = 0x3d, + SQ_OP2_INST_SETGT_UINT = 0x3e, + SQ_OP2_INST_SETGE_UINT = 0x3f, + SQ_OP2_INST_KILLGT_UINT = 0x40, + SQ_OP2_INST_KILLGE_UINT = 0x41, + SQ_OP2_INST_PRED_SETE_INT = 0x42, + SQ_OP2_INST_PRED_SETGT_INT = 0x43, + SQ_OP2_INST_PRED_SETGE_INT = 0x44, + SQ_OP2_INST_PRED_SETNE_INT = 0x45, + SQ_OP2_INST_KILLE_INT = 0x46, + SQ_OP2_INST_KILLGT_INT = 0x47, + SQ_OP2_INST_KILLGE_INT = 0x48, + SQ_OP2_INST_KILLNE_INT = 0x49, + SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a, + SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b, + SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c, + SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d, + SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e, + SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f, + SQ_OP2_INST_FLT_TO_INT = 0x50, + SQ_OP2_INST_BFREV_INT = 0x51, + SQ_OP2_INST_ADDC_UINT = 0x52, + SQ_OP2_INST_SUBB_UINT = 0x53, + SQ_OP2_INST_GROUP_BARRIER = 0x54, + SQ_OP2_INST_GROUP_SEQ_BEGIN = 0x55, + SQ_OP2_INST_GROUP_SEQ_END = 0x56, + SQ_OP2_INST_SET_MODE = 0x57, + SQ_OP2_INST_SET_CF_IDX0 = 0x58, + SQ_OP2_INST_SET_CF_IDX1 = 0x59, + SQ_OP2_INST_SET_LDS_SIZE = 0x5a, + SQ_OP2_INST_EXP_IEEE = 0x81, + SQ_OP2_INST_LOG_CLAMPED = 0x82, + SQ_OP2_INST_LOG_IEEE = 0x83, + SQ_OP2_INST_RECIP_CLAMPED = 0x84, + SQ_OP2_INST_RECIP_FF = 0x85, + SQ_OP2_INST_RECIP_IEEE = 0x86, + SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x87, + SQ_OP2_INST_RECIPSQRT_FF = 0x88, + SQ_OP2_INST_RECIPSQRT_IEEE = 0x89, + SQ_OP2_INST_SQRT_IEEE = 0x8a, + SQ_OP2_INST_SIN = 0x8d, + SQ_OP2_INST_COS = 0x8e, + SQ_OP2_INST_MULLO_INT = 0x8f, + SQ_OP2_INST_MULHI_INT = 0x90, + SQ_OP2_INST_MULLO_UINT = 0x91, + SQ_OP2_INST_MULHI_UINT = 0x92, + SQ_OP2_INST_RECIP_INT = 0x93, + SQ_OP2_INST_RECIP_UINT = 0x94, + SQ_OP2_INST_RECIP_64 = 0x95, + SQ_OP2_INST_RECIP_CLAMPED_64 = 0x96, + SQ_OP2_INST_RECIPSQRT_64 = 0x97, + SQ_OP2_INST_RECIPSQRT_CLAMPED_64 = 0x98, + SQ_OP2_INST_SQRT_64 = 0x99, + SQ_OP2_INST_FLT_TO_UINT = 0x9a, + SQ_OP2_INST_INT_TO_FLT = 0x9b, + SQ_OP2_INST_UINT_TO_FLT = 0x9c, + SQ_OP2_INST_BFM_INT = 0xa0, + SQ_OP2_INST_FLT32_TO_FLT16 = 0xa2, + SQ_OP2_INST_FLT16_TO_FLT32 = 0xa3, + SQ_OP2_INST_UBYTE0_FLT = 0xa4, + SQ_OP2_INST_UBYTE1_FLT = 0xa5, + SQ_OP2_INST_UBYTE2_FLT = 0xa6, + SQ_OP2_INST_UBYTE3_FLT = 0xa7, + SQ_OP2_INST_BCNT_INT = 0xaa, + SQ_OP2_INST_FFBH_UINT = 0xab, + SQ_OP2_INST_FFBL_INT = 0xac, + SQ_OP2_INST_FFBH_INT = 0xad, + SQ_OP2_INST_FLT_TO_UINT4 = 0xae, + SQ_OP2_INST_DOT_IEEE = 0xaf, + SQ_OP2_INST_FLT_TO_INT_RPI = 0xb0, + SQ_OP2_INST_FLT_TO_INT_FLOOR = 0xb1, + SQ_OP2_INST_MULHI_UINT24 = 0xb2, + SQ_OP2_INST_MBCNT_32HI_INT = 0xb3, + SQ_OP2_INST_OFFSET_TO_FLT = 0xb4, + SQ_OP2_INST_MUL_UINT24 = 0xb5, + SQ_OP2_INST_BCNT_ACCUM_PREV_INT = 0xb6, + SQ_OP2_INST_MBCNT_32LO_ACCUM_PREV_INT = 0xb7, + SQ_OP2_INST_SETE_64 = 0xb8, + SQ_OP2_INST_SETNE_64 = 0xb9, + SQ_OP2_INST_SETGT_64 = 0xba, + SQ_OP2_INST_SETGE_64 = 0xbb, + SQ_OP2_INST_MIN_64 = 0xbc, + SQ_OP2_INST_MAX_64 = 0xbd, + SQ_OP2_INST_DOT4 = 0xbe, + SQ_OP2_INST_DOT4_IEEE = 0xbf, + SQ_OP2_INST_CUBE = 0xc0, + SQ_OP2_INST_MAX4 = 0xc1, + SQ_OP2_INST_FREXP_64 = 0xc4, + SQ_OP2_INST_LDEXP_64 = 0xc5, + SQ_OP2_INST_FRACT_64 = 0xc6, + SQ_OP2_INST_PRED_SETGT_64 = 0xc7, + SQ_OP2_INST_PRED_SETE_64 = 0xc8, + SQ_OP2_INST_PRED_SETGE_64 = 0xc9, + SQ_OP2_INST_MUL_64 = 0xca, + SQ_OP2_INST_ADD_64 = 0xcb, + SQ_OP2_INST_MOVA_INT = 0xcc, + SQ_OP2_INST_FLT64_TO_FLT32 = 0xcd, + SQ_OP2_INST_FLT32_TO_FLT64 = 0xce, + SQ_OP2_INST_SAD_ACCUM_PREV_UINT = 0xcf, + SQ_OP2_INST_DOT = 0xd0, + SQ_OP2_INST_MUL_PREV = 0xd1, + SQ_OP2_INST_MUL_IEEE_PREV = 0xd2, + SQ_OP2_INST_ADD_PREV = 0xd3, + SQ_OP2_INST_MULADD_PREV = 0xd4, + SQ_OP2_INST_MULADD_IEEE_PREV = 0xd5, + SQ_OP2_INST_INTERP_XY = 0xd6, + SQ_OP2_INST_INTERP_ZW = 0xd7, + SQ_OP2_INST_INTERP_X = 0xd8, + SQ_OP2_INST_INTERP_Z = 0xd9, + SQ_OP2_INST_STORE_FLAGS = 0xda, + SQ_OP2_INST_LOAD_STORE_FLAGS = 0xdb, + SQ_OP2_INST_INTERP_LOAD_P0 = 0xe0, + SQ_OP2_INST_INTERP_LOAD_P10 = 0xe1, + SQ_OP2_INST_INTERP_LOAD_P20 = 0xe2, + SQ_CF_WORD1 = 0x00008dfc, + POP_COUNT_mask = 0x07 << 0, + POP_COUNT_shift = 0, + CF_CONST_mask = 0x1f << 3, + CF_CONST_shift = 3, + COND_mask = 0x03 << 8, + COND_shift = 8, + SQ_CF_COND_ACTIVE = 0x00, + SQ_CF_COND_FALSE = 0x01, + SQ_CF_COND_BOOL = 0x02, + SQ_CF_COND_NOT_BOOL = 0x03, + SQ_CF_WORD1__COUNT_mask = 0x3f << 10, + SQ_CF_WORD1__COUNT_shift = 10, +/* VALID_PIXEL_MODE_bit = 1 << 20, */ +/* END_OF_PROGRAM_bit = 1 << 21, */ + SQ_CF_WORD1__CF_INST_mask = 0xff << 22, + SQ_CF_WORD1__CF_INST_shift = 22, + SQ_CF_INST_NOP = 0x00, + SQ_CF_INST_TC = 0x01, + SQ_CF_INST_VC = 0x02, + SQ_CF_INST_GDS = 0x03, + SQ_CF_INST_LOOP_START = 0x04, + SQ_CF_INST_LOOP_END = 0x05, + SQ_CF_INST_LOOP_START_DX10 = 0x06, + SQ_CF_INST_LOOP_START_NO_AL = 0x07, + SQ_CF_INST_LOOP_CONTINUE = 0x08, + SQ_CF_INST_LOOP_BREAK = 0x09, + SQ_CF_INST_JUMP = 0x0a, + SQ_CF_INST_PUSH = 0x0b, + SQ_CF_INST_ELSE = 0x0d, + SQ_CF_INST_POP = 0x0e, + SQ_CF_INST_CALL = 0x12, + SQ_CF_INST_CALL_FS = 0x13, + SQ_CF_INST_RETURN = 0x14, + SQ_CF_INST_EMIT_VERTEX = 0x15, + SQ_CF_INST_EMIT_CUT_VERTEX = 0x16, + SQ_CF_INST_CUT_VERTEX = 0x17, + SQ_CF_INST_KILL = 0x18, + SQ_CF_INST_WAIT_ACK = 0x1a, + SQ_CF_INST_TC_ACK = 0x1b, + SQ_CF_INST_VC_ACK = 0x1c, + SQ_CF_INST_JUMPTABLE = 0x1d, + SQ_CF_INST_GLOBAL_WAVE_SYNC = 0x1e, + SQ_CF_INST_HALT = 0x1f, +/* WHOLE_QUAD_MODE_bit = 1 << 30, */ +/* BARRIER_bit = 1 << 31, */ + SQ_VTX_WORD1_SEM = 0x00008dfc, + SEMANTIC_ID_mask = 0xff << 0, + SEMANTIC_ID_shift = 0, + SQ_TEX_WORD0 = 0x00008dfc, + TEX_INST_mask = 0x1f << 0, + TEX_INST_shift = 0, + SQ_TEX_INST_LD = 0x03, + SQ_TEX_INST_GET_TEXTURE_RESINFO = 0x04, + SQ_TEX_INST_GET_NUMBER_OF_SAMPLES = 0x05, + SQ_TEX_INST_GET_LOD = 0x06, + SQ_TEX_INST_GET_GRADIENTS_H = 0x07, + SQ_TEX_INST_GET_GRADIENTS_V = 0x08, + SQ_TEX_INST_SET_TEXTURE_OFFSETS = 0x09, + SQ_TEX_INST_KEEP_GRADIENTS = 0x0a, + SQ_TEX_INST_SET_GRADIENTS_H = 0x0b, + SQ_TEX_INST_SET_GRADIENTS_V = 0x0c, + SQ_TEX_INST_PASS = 0x0d, + SQ_TEX_INST_SAMPLE = 0x10, + SQ_TEX_INST_SAMPLE_L = 0x11, + SQ_TEX_INST_SAMPLE_LB = 0x12, + SQ_TEX_INST_SAMPLE_LZ = 0x13, + SQ_TEX_INST_SAMPLE_G = 0x14, + SQ_TEX_INST_GATHER4 = 0x15, + SQ_TEX_INST_SAMPLE_G_LB = 0x16, + SQ_TEX_INST_GATHER4_O = 0x17, + SQ_TEX_INST_SAMPLE_C = 0x18, + SQ_TEX_INST_SAMPLE_C_L = 0x19, + SQ_TEX_INST_SAMPLE_C_LB = 0x1a, + SQ_TEX_INST_SAMPLE_C_LZ = 0x1b, + SQ_TEX_INST_SAMPLE_C_G = 0x1c, + SQ_TEX_INST_GATHER4_C = 0x1d, + SQ_TEX_INST_SAMPLE_C_G_LB = 0x1e, + SQ_TEX_INST_GATHER4_C_O = 0x1f, + INST_MOD_mask = 0x03 << 5, + INST_MOD_shift = 5, +/* FETCH_WHOLE_QUAD_bit = 1 << 7, */ + RESOURCE_ID_mask = 0xff << 8, + RESOURCE_ID_shift = 8, + SQ_TEX_WORD0__SRC_GPR_mask = 0x7f << 16, + SQ_TEX_WORD0__SRC_GPR_shift = 16, +/* SRC_REL_bit = 1 << 23, */ + SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24, + RESOURCE_INDEX_MODE_mask = 0x03 << 25, + RESOURCE_INDEX_MODE_shift = 25, +/* SQ_CF_INDEX_NONE = 0x00, */ +/* SQ_CF_INDEX_0 = 0x01, */ +/* SQ_CF_INDEX_1 = 0x02, */ +/* SQ_CF_INVALID = 0x03, */ + SAMPLER_INDEX_MODE_mask = 0x03 << 27, + SAMPLER_INDEX_MODE_shift = 27, +/* SQ_CF_INDEX_NONE = 0x00, */ +/* SQ_CF_INDEX_0 = 0x01, */ +/* SQ_CF_INDEX_1 = 0x02, */ +/* SQ_CF_INVALID = 0x03, */ + SQ_VTX_WORD1_GPR = 0x00008dfc, + SQ_VTX_WORD1_GPR__DST_GPR_mask = 0x7f << 0, + SQ_VTX_WORD1_GPR__DST_GPR_shift = 0, + SQ_VTX_WORD1_GPR__DST_REL_bit = 1 << 7, + SQ_ALU_WORD1_LDS_IDX_OP = 0x00008dfc, +/* SRC2_SEL_mask = 0x1ff << 0, */ +/* SRC2_SEL_shift = 0, */ +/* SQ_ALU_SRC_LDS_OQ_A = 0xdb, */ +/* SQ_ALU_SRC_LDS_OQ_B = 0xdc, */ +/* SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, */ +/* SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, */ +/* SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, */ +/* SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, */ +/* SQ_ALU_SRC_TIME_HI = 0xe3, */ +/* SQ_ALU_SRC_TIME_LO = 0xe4, */ +/* SQ_ALU_SRC_MASK_HI = 0xe5, */ +/* SQ_ALU_SRC_MASK_LO = 0xe6, */ +/* SQ_ALU_SRC_HW_WAVE_ID = 0xe7, */ +/* SQ_ALU_SRC_SIMD_ID = 0xe8, */ +/* SQ_ALU_SRC_SE_ID = 0xe9, */ +/* SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, */ +/* SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, */ +/* SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, */ +/* SQ_ALU_SRC_HW_ALU_ODD = 0xed, */ +/* SQ_ALU_SRC_LOOP_IDX = 0xee, */ +/* SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, */ +/* SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, */ +/* SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, */ +/* SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, */ +/* SQ_ALU_SRC_1_DBL_L = 0xf4, */ +/* SQ_ALU_SRC_1_DBL_M = 0xf5, */ +/* SQ_ALU_SRC_0_5_DBL_L = 0xf6, */ +/* SQ_ALU_SRC_0_5_DBL_M = 0xf7, */ +/* SQ_ALU_SRC_0 = 0xf8, */ +/* SQ_ALU_SRC_1 = 0xf9, */ +/* SQ_ALU_SRC_1_INT = 0xfa, */ +/* SQ_ALU_SRC_M_1_INT = 0xfb, */ +/* SQ_ALU_SRC_0_5 = 0xfc, */ +/* SQ_ALU_SRC_LITERAL = 0xfd, */ +/* SQ_ALU_SRC_PV = 0xfe, */ +/* SQ_ALU_SRC_PS = 0xff, */ +/* SRC2_REL_bit = 1 << 9, */ +/* SRC2_CHAN_mask = 0x03 << 10, */ +/* SRC2_CHAN_shift = 10, */ +/* SQ_CHAN_X = 0x00, */ +/* SQ_CHAN_Y = 0x01, */ +/* SQ_CHAN_Z = 0x02, */ +/* SQ_CHAN_W = 0x03, */ + IDX_OFFSET_1_bit = 1 << 12, + SQ_ALU_WORD1_LDS_IDX_OP__ALU_INST_mask = 0x1f << 13, + SQ_ALU_WORD1_LDS_IDX_OP__ALU_INST_shift = 13, +/* SQ_OP3_INST_BFE_UINT = 0x04, */ +/* SQ_OP3_INST_BFE_INT = 0x05, */ +/* SQ_OP3_INST_BFI_INT = 0x06, */ +/* SQ_OP3_INST_FMA = 0x07, */ +/* SQ_OP3_INST_CNDNE_64 = 0x09, */ +/* SQ_OP3_INST_FMA_64 = 0x0a, */ +/* SQ_OP3_INST_LERP_UINT = 0x0b, */ +/* SQ_OP3_INST_BIT_ALIGN_INT = 0x0c, */ +/* SQ_OP3_INST_BYTE_ALIGN_INT = 0x0d, */ +/* SQ_OP3_INST_SAD_ACCUM_UINT = 0x0e, */ +/* SQ_OP3_INST_SAD_ACCUM_HI_UINT = 0x0f, */ +/* SQ_OP3_INST_MULADD_UINT24 = 0x10, */ +/* SQ_OP3_INST_LDS_IDX_OP = 0x11, */ +/* SQ_OP3_INST_MULADD = 0x14, */ +/* SQ_OP3_INST_MULADD_M2 = 0x15, */ +/* SQ_OP3_INST_MULADD_M4 = 0x16, */ +/* SQ_OP3_INST_MULADD_D2 = 0x17, */ +/* SQ_OP3_INST_MULADD_IEEE = 0x18, */ +/* SQ_OP3_INST_CNDE = 0x19, */ +/* SQ_OP3_INST_CNDGT = 0x1a, */ +/* SQ_OP3_INST_CNDGE = 0x1b, */ +/* SQ_OP3_INST_CNDE_INT = 0x1c, */ +/* SQ_OP3_INST_CNDGT_INT = 0x1d, */ +/* SQ_OP3_INST_CNDGE_INT = 0x1e, */ +/* SQ_OP3_INST_MUL_LIT = 0x1f, */ +/* BANK_SWIZZLE_mask = 0x07 << 18, */ +/* BANK_SWIZZLE_shift = 18, */ +/* SQ_ALU_VEC_012 = 0x00, */ +/* SQ_ALU_VEC_021 = 0x01, */ +/* SQ_ALU_VEC_120 = 0x02, */ +/* SQ_ALU_VEC_102 = 0x03, */ +/* SQ_ALU_VEC_201 = 0x04, */ +/* SQ_ALU_VEC_210 = 0x05, */ + LDS_OP_mask = 0x3f << 21, + LDS_OP_shift = 21, +/* SQ_DS_INST_ADD = 0x00, */ +/* SQ_DS_INST_SUB = 0x01, */ +/* SQ_DS_INST_RSUB = 0x02, */ +/* SQ_DS_INST_INC = 0x03, */ +/* SQ_DS_INST_DEC = 0x04, */ +/* SQ_DS_INST_MIN_INT = 0x05, */ +/* SQ_DS_INST_MAX_INT = 0x06, */ +/* SQ_DS_INST_MIN_UINT = 0x07, */ +/* SQ_DS_INST_MAX_UINT = 0x08, */ +/* SQ_DS_INST_AND = 0x09, */ +/* SQ_DS_INST_OR = 0x0a, */ +/* SQ_DS_INST_XOR = 0x0b, */ +/* SQ_DS_INST_MSKOR = 0x0c, */ +/* SQ_DS_INST_WRITE = 0x0d, */ +/* SQ_DS_INST_WRITE_REL = 0x0e, */ +/* SQ_DS_INST_WRITE2 = 0x0f, */ +/* SQ_DS_INST_CMP_STORE = 0x10, */ +/* SQ_DS_INST_CMP_STORE_SPF = 0x11, */ +/* SQ_DS_INST_BYTE_WRITE = 0x12, */ +/* SQ_DS_INST_SHORT_WRITE = 0x13, */ +/* SQ_DS_INST_ADD_RET = 0x20, */ +/* SQ_DS_INST_SUB_RET = 0x21, */ +/* SQ_DS_INST_RSUB_RET = 0x22, */ +/* SQ_DS_INST_INC_RET = 0x23, */ +/* SQ_DS_INST_DEC_RET = 0x24, */ +/* SQ_DS_INST_MIN_INT_RET = 0x25, */ +/* SQ_DS_INST_MAX_INT_RET = 0x26, */ +/* SQ_DS_INST_MIN_UINT_RET = 0x27, */ +/* SQ_DS_INST_MAX_UINT_RET = 0x28, */ +/* SQ_DS_INST_AND_RET = 0x29, */ +/* SQ_DS_INST_OR_RET = 0x2a, */ +/* SQ_DS_INST_XOR_RET = 0x2b, */ +/* SQ_DS_INST_MSKOR_RET = 0x2c, */ +/* SQ_DS_INST_XCHG_RET = 0x2d, */ +/* SQ_DS_INST_XCHG_REL_RET = 0x2e, */ +/* SQ_DS_INST_XCHG2_RET = 0x2f, */ +/* SQ_DS_INST_CMP_XCHG_RET = 0x30, */ +/* SQ_DS_INST_CMP_XCHG_SPF_RET = 0x31, */ +/* SQ_DS_INST_READ_RET = 0x32, */ +/* SQ_DS_INST_READ_REL_RET = 0x33, */ +/* SQ_DS_INST_READ2_RET = 0x34, */ +/* SQ_DS_INST_READWRITE_RET = 0x35, */ +/* SQ_DS_INST_BYTE_READ_RET = 0x36, */ +/* SQ_DS_INST_UBYTE_READ_RET = 0x37, */ +/* SQ_DS_INST_SHORT_READ_RET = 0x38, */ +/* SQ_DS_INST_USHORT_READ_RET = 0x39, */ +/* SQ_DS_INST_ATOMIC_ORDERED_ALLOC_RET = 0x3f, */ + IDX_OFFSET_0_bit = 1 << 27, + IDX_OFFSET_2_bit = 1 << 28, +/* DST_CHAN_mask = 0x03 << 29, */ +/* DST_CHAN_shift = 29, */ +/* CHAN_X = 0x00, */ +/* CHAN_Y = 0x01, */ +/* CHAN_Z = 0x02, */ +/* CHAN_W = 0x03, */ + IDX_OFFSET_3_bit = 1 << 31, + SQ_CF_ENCODING_WORD1 = 0x00008dfc, + SQ_CF_ENCODING_WORD1__ENCODING_mask = 0x03 << 28, + SQ_CF_ENCODING_WORD1__ENCODING_shift = 28, + SQ_CF_ENCODING_INST_CF = 0x00, + SQ_CF_ENCODING_INST_ALLOC_EXPORT = 0x01, + SQ_CF_ENCODING_INST_ALU0 = 0x02, + SQ_CF_ENCODING_INST_ALU1 = 0x03, + SQ_ALU_WORD0 = 0x00008dfc, +/* SRC0_SEL_mask = 0x1ff << 0, */ +/* SRC0_SEL_shift = 0, */ +/* SQ_ALU_SRC_LDS_OQ_A = 0xdb, */ +/* SQ_ALU_SRC_LDS_OQ_B = 0xdc, */ +/* SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, */ +/* SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, */ +/* SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, */ +/* SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, */ +/* SQ_ALU_SRC_TIME_HI = 0xe3, */ +/* SQ_ALU_SRC_TIME_LO = 0xe4, */ +/* SQ_ALU_SRC_MASK_HI = 0xe5, */ +/* SQ_ALU_SRC_MASK_LO = 0xe6, */ +/* SQ_ALU_SRC_HW_WAVE_ID = 0xe7, */ +/* SQ_ALU_SRC_SIMD_ID = 0xe8, */ +/* SQ_ALU_SRC_SE_ID = 0xe9, */ +/* SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, */ +/* SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, */ +/* SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, */ +/* SQ_ALU_SRC_HW_ALU_ODD = 0xed, */ +/* SQ_ALU_SRC_LOOP_IDX = 0xee, */ +/* SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, */ +/* SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, */ +/* SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, */ +/* SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, */ +/* SQ_ALU_SRC_1_DBL_L = 0xf4, */ +/* SQ_ALU_SRC_1_DBL_M = 0xf5, */ +/* SQ_ALU_SRC_0_5_DBL_L = 0xf6, */ +/* SQ_ALU_SRC_0_5_DBL_M = 0xf7, */ +/* SQ_ALU_SRC_0 = 0xf8, */ +/* SQ_ALU_SRC_1 = 0xf9, */ +/* SQ_ALU_SRC_1_INT = 0xfa, */ +/* SQ_ALU_SRC_M_1_INT = 0xfb, */ +/* SQ_ALU_SRC_0_5 = 0xfc, */ +/* SQ_ALU_SRC_LITERAL = 0xfd, */ +/* SQ_ALU_SRC_PV = 0xfe, */ +/* SQ_ALU_SRC_PS = 0xff, */ +/* SRC0_REL_bit = 1 << 9, */ +/* SRC0_CHAN_mask = 0x03 << 10, */ +/* SRC0_CHAN_shift = 10, */ +/* SQ_CHAN_X = 0x00, */ +/* SQ_CHAN_Y = 0x01, */ +/* SQ_CHAN_Z = 0x02, */ +/* SQ_CHAN_W = 0x03, */ + SRC0_NEG_bit = 1 << 12, +/* SRC1_SEL_mask = 0x1ff << 13, */ +/* SRC1_SEL_shift = 13, */ +/* SQ_ALU_SRC_LDS_OQ_A = 0xdb, */ +/* SQ_ALU_SRC_LDS_OQ_B = 0xdc, */ +/* SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, */ +/* SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, */ +/* SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, */ +/* SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, */ +/* SQ_ALU_SRC_TIME_HI = 0xe3, */ +/* SQ_ALU_SRC_TIME_LO = 0xe4, */ +/* SQ_ALU_SRC_MASK_HI = 0xe5, */ +/* SQ_ALU_SRC_MASK_LO = 0xe6, */ +/* SQ_ALU_SRC_HW_WAVE_ID = 0xe7, */ +/* SQ_ALU_SRC_SIMD_ID = 0xe8, */ +/* SQ_ALU_SRC_SE_ID = 0xe9, */ +/* SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, */ +/* SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, */ +/* SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, */ +/* SQ_ALU_SRC_HW_ALU_ODD = 0xed, */ +/* SQ_ALU_SRC_LOOP_IDX = 0xee, */ +/* SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, */ +/* SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, */ +/* SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, */ +/* SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, */ +/* SQ_ALU_SRC_1_DBL_L = 0xf4, */ +/* SQ_ALU_SRC_1_DBL_M = 0xf5, */ +/* SQ_ALU_SRC_0_5_DBL_L = 0xf6, */ +/* SQ_ALU_SRC_0_5_DBL_M = 0xf7, */ +/* SQ_ALU_SRC_0 = 0xf8, */ +/* SQ_ALU_SRC_1 = 0xf9, */ +/* SQ_ALU_SRC_1_INT = 0xfa, */ +/* SQ_ALU_SRC_M_1_INT = 0xfb, */ +/* SQ_ALU_SRC_0_5 = 0xfc, */ +/* SQ_ALU_SRC_LITERAL = 0xfd, */ +/* SQ_ALU_SRC_PV = 0xfe, */ +/* SQ_ALU_SRC_PS = 0xff, */ +/* SRC1_REL_bit = 1 << 22, */ +/* SRC1_CHAN_mask = 0x03 << 23, */ +/* SRC1_CHAN_shift = 23, */ +/* SQ_CHAN_X = 0x00, */ +/* SQ_CHAN_Y = 0x01, */ +/* SQ_CHAN_Z = 0x02, */ +/* SQ_CHAN_W = 0x03, */ + SRC1_NEG_bit = 1 << 25, +/* INDEX_MODE_mask = 0x07 << 26, */ +/* INDEX_MODE_shift = 26, */ +/* SQ_INDEX_AR_X = 0x00, */ +/* SQ_INDEX_LOOP = 0x04, */ +/* SQ_INDEX_GLOBAL = 0x05, */ +/* SQ_INDEX_GLOBAL_AR_X = 0x06, */ +/* PRED_SEL_mask = 0x03 << 29, */ +/* PRED_SEL_shift = 29, */ +/* SQ_PRED_SEL_OFF = 0x00, */ +/* SQ_PRED_SEL_ZERO = 0x02, */ +/* SQ_PRED_SEL_ONE = 0x03, */ +/* LAST_bit = 1 << 31, */ + SQ_MEM_RD_WORD1 = 0x00008dfc, + SQ_MEM_RD_WORD1__DST_GPR_mask = 0x7f << 0, + SQ_MEM_RD_WORD1__DST_GPR_shift = 0, + SQ_MEM_RD_WORD1__DST_REL_bit = 1 << 7, + SQ_MEM_RD_WORD1__DST_SEL_X_mask = 0x07 << 9, + SQ_MEM_RD_WORD1__DST_SEL_X_shift = 9, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_MEM_RD_WORD1__DST_SEL_Y_mask = 0x07 << 12, + SQ_MEM_RD_WORD1__DST_SEL_Y_shift = 12, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_MEM_RD_WORD1__DST_SEL_Z_mask = 0x07 << 15, + SQ_MEM_RD_WORD1__DST_SEL_Z_shift = 15, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_MEM_RD_WORD1__DST_SEL_W_mask = 0x07 << 18, + SQ_MEM_RD_WORD1__DST_SEL_W_shift = 18, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_MEM_RD_WORD1__DATA_FORMAT_mask = 0x3f << 22, + SQ_MEM_RD_WORD1__DATA_FORMAT_shift = 22, + SQ_MEM_RD_WORD1__NUM_FORMAT_ALL_mask = 0x03 << 28, + SQ_MEM_RD_WORD1__NUM_FORMAT_ALL_shift = 28, +/* SQ_NUM_FORMAT_NORM = 0x00, */ +/* SQ_NUM_FORMAT_INT = 0x01, */ +/* SQ_NUM_FORMAT_SCALED = 0x02, */ + SQ_MEM_RD_WORD1__FORMAT_COMP_ALL_bit = 1 << 30, + SQ_MEM_RD_WORD1__SRF_MODE_ALL_bit = 1 << 31, + SQ_LSTMP_RING_BASE = 0x00008e10, + SQ_LSTMP_RING_SIZE = 0x00008e14, + SQ_HSTMP_RING_BASE = 0x00008e18, + SQ_HSTMP_RING_SIZE = 0x00008e1c, + SX_EXPORT_BUFFER_SIZES = 0x0000900c, + COLOR_BUFFER_SIZE_mask = 0xff << 0, + COLOR_BUFFER_SIZE_shift = 0, + POSITION_BUFFER_SIZE_mask = 0xff << 8, + POSITION_BUFFER_SIZE_shift = 8, + SMX_BUFFER_SIZE_mask = 0xff << 16, + SMX_BUFFER_SIZE_shift = 16, + SX_MEMORY_EXPORT_BASE = 0x00009010, + SX_MEMORY_EXPORT_SIZE = 0x00009014, + SPI_CONFIG_CNTL = 0x00009100, + GPR_WRITE_PRIORITY_mask = 0x3ffff << 0, + GPR_WRITE_PRIORITY_shift = 0, + SPI_CONFIG_CNTL_1 = 0x0000913c, + VTX_DONE_DELAY_mask = 0x0f << 0, + VTX_DONE_DELAY_shift = 0, + X_DELAY_14_CLKS = 0x00, + X_DELAY_16_CLKS = 0x01, + X_DELAY_18_CLKS = 0x02, + X_DELAY_20_CLKS = 0x03, + X_DELAY_22_CLKS = 0x04, + X_DELAY_24_CLKS = 0x05, + X_DELAY_26_CLKS = 0x06, + X_DELAY_28_CLKS = 0x07, + X_DELAY_30_CLKS = 0x08, + X_DELAY_32_CLKS = 0x09, + X_DELAY_34_CLKS = 0x0a, + X_DELAY_4_CLKS = 0x0b, + X_DELAY_6_CLKS = 0x0c, + X_DELAY_8_CLKS = 0x0d, + X_DELAY_10_CLKS = 0x0e, + X_DELAY_12_CLKS = 0x0f, + INTERP_ONE_PRIM_PER_ROW_bit = 1 << 4, + BC_OPTIMIZE_DISABLE_bit = 1 << 5, + PC_LIMIT_ENABLE_bit = 1 << 6, + PC_LIMIT_STRICT_bit = 1 << 7, + PC_LIMIT_SIZE_mask = 0xffff << 16, + PC_LIMIT_SIZE_shift = 16, + TD_CNTL = 0x00009494, + SYNC_PHASE_SH_mask = 0x03 << 0, + SYNC_PHASE_SH_shift = 0, + PAD_STALL_EN_bit = 1 << 8, + GATHER4_FLOAT_MODE_bit = 1 << 16, + TD_STATUS = 0x00009498, + BUSY_bit = 1 << 31, + TA_CNTL_AUX = 0x00009508, + TA_CNTL_AUX__DISABLE_CUBE_WRAP_bit = 1 << 0, + DISABLE_CUBE_ANISO_bit = 1 << 1, + GETLOD_SELECT_mask = 0x03 << 2, + GETLOD_SELECT_shift = 2, + X_SAMPLER_AND_RESOURCE_CLAMPED_LOD_IN_RESOURCE= 0x00, + DISABLE_IDLE_STALL_bit = 1 << 4, + TEX_COORD_PRECISION_bit = 1 << 28, + LOD_LOG2_TRUNC_bit = 1 << 29, + DB_ZPASS_COUNT_LOW = 0x00009870, + DB_ZPASS_COUNT_HI = 0x00009874, + COUNT_HI_mask = 0x7fffffff << 0, + COUNT_HI_shift = 0, + TD_PS_BORDER_COLOR_INDEX = 0x0000a400, + INDEX_mask = 0x1f << 0, + INDEX_shift = 0, + TD_PS_BORDER_COLOR_RED = 0x0000a404, + TD_PS_BORDER_COLOR_GREEN = 0x0000a408, + TD_PS_BORDER_COLOR_BLUE = 0x0000a40c, + TD_PS_BORDER_COLOR_ALPHA = 0x0000a410, + TD_VS_BORDER_COLOR_INDEX = 0x0000a414, +/* INDEX_mask = 0x1f << 0, */ +/* INDEX_shift = 0, */ + TD_VS_BORDER_COLOR_RED = 0x0000a418, + TD_VS_BORDER_COLOR_GREEN = 0x0000a41c, + TD_VS_BORDER_COLOR_BLUE = 0x0000a420, + TD_VS_BORDER_COLOR_ALPHA = 0x0000a424, + TD_GS_BORDER_COLOR_INDEX = 0x0000a428, +/* INDEX_mask = 0x1f << 0, */ +/* INDEX_shift = 0, */ + TD_GS_BORDER_COLOR_RED = 0x0000a42c, + TD_GS_BORDER_COLOR_GREEN = 0x0000a430, + TD_GS_BORDER_COLOR_BLUE = 0x0000a434, + TD_GS_BORDER_COLOR_ALPHA = 0x0000a438, + TD_HS_BORDER_COLOR_INDEX = 0x0000a43c, +/* INDEX_mask = 0x1f << 0, */ +/* INDEX_shift = 0, */ + TD_HS_BORDER_COLOR_RED = 0x0000a440, + TD_HS_BORDER_COLOR_GREEN = 0x0000a444, + TD_HS_BORDER_COLOR_BLUE = 0x0000a448, + TD_HS_BORDER_COLOR_ALPHA = 0x0000a44c, + TD_LS_BORDER_COLOR_INDEX = 0x0000a450, +/* INDEX_mask = 0x1f << 0, */ +/* INDEX_shift = 0, */ + TD_LS_BORDER_COLOR_RED = 0x0000a454, + TD_LS_BORDER_COLOR_GREEN = 0x0000a458, + TD_LS_BORDER_COLOR_BLUE = 0x0000a45c, + TD_LS_BORDER_COLOR_ALPHA = 0x0000a460, + TD_CS_BORDER_COLOR_INDEX = 0x0000a464, +/* INDEX_mask = 0x1f << 0, */ +/* INDEX_shift = 0, */ + TD_CS_BORDER_COLOR_RED = 0x0000a468, + TD_CS_BORDER_COLOR_GREEN = 0x0000a46c, + TD_CS_BORDER_COLOR_BLUE = 0x0000a470, + TD_CS_BORDER_COLOR_ALPHA = 0x0000a474, + DB_RENDER_CONTROL = 0x00028000, + DEPTH_CLEAR_ENABLE_bit = 1 << 0, + STENCIL_CLEAR_ENABLE_bit = 1 << 1, + DEPTH_COPY_bit = 1 << 2, + STENCIL_COPY_bit = 1 << 3, + RESUMMARIZE_ENABLE_bit = 1 << 4, + STENCIL_COMPRESS_DISABLE_bit = 1 << 5, + DEPTH_COMPRESS_DISABLE_bit = 1 << 6, + COPY_CENTROID_bit = 1 << 7, + COPY_SAMPLE_mask = 0x07 << 8, + COPY_SAMPLE_shift = 8, + COLOR_DISABLE_bit = 1 << 12, + DB_COUNT_CONTROL = 0x00028004, + ZPASS_INCREMENT_DISABLE_bit = 1 << 0, + PERFECT_ZPASS_COUNTS_bit = 1 << 1, + DB_DEPTH_VIEW = 0x00028008, + SLICE_START_mask = 0x7ff << 0, + SLICE_START_shift = 0, + SLICE_MAX_mask = 0x7ff << 13, + SLICE_MAX_shift = 13, + Z_READ_ONLY_bit = 1 << 24, + STENCIL_READ_ONLY_bit = 1 << 25, + DB_RENDER_OVERRIDE = 0x0002800c, + FORCE_HIZ_ENABLE_mask = 0x03 << 0, + FORCE_HIZ_ENABLE_shift = 0, + FORCE_OFF = 0x00, + FORCE_ENABLE = 0x01, + FORCE_DISABLE = 0x02, + FORCE_RESERVED = 0x03, + FORCE_HIS_ENABLE0_mask = 0x03 << 2, + FORCE_HIS_ENABLE0_shift = 2, +/* FORCE_OFF = 0x00, */ +/* FORCE_ENABLE = 0x01, */ +/* FORCE_DISABLE = 0x02, */ +/* FORCE_RESERVED = 0x03, */ + FORCE_HIS_ENABLE1_mask = 0x03 << 4, + FORCE_HIS_ENABLE1_shift = 4, +/* FORCE_OFF = 0x00, */ +/* FORCE_ENABLE = 0x01, */ +/* FORCE_DISABLE = 0x02, */ +/* FORCE_RESERVED = 0x03, */ + FORCE_SHADER_Z_ORDER_bit = 1 << 6, + FAST_Z_DISABLE_bit = 1 << 7, + FAST_STENCIL_DISABLE_bit = 1 << 8, + NOOP_CULL_DISABLE_bit = 1 << 9, + FORCE_COLOR_KILL_bit = 1 << 10, + FORCE_Z_READ_bit = 1 << 11, + FORCE_STENCIL_READ_bit = 1 << 12, + FORCE_FULL_Z_RANGE_mask = 0x03 << 13, + FORCE_FULL_Z_RANGE_shift = 13, +/* FORCE_OFF = 0x00, */ +/* FORCE_ENABLE = 0x01, */ +/* FORCE_DISABLE = 0x02, */ +/* FORCE_RESERVED = 0x03, */ + FORCE_QC_SMASK_CONFLICT_bit = 1 << 15, + DISABLE_VIEWPORT_CLAMP_bit = 1 << 16, + IGNORE_SC_ZRANGE_bit = 1 << 17, + DISABLE_FULLY_COVERED_bit = 1 << 18, + FORCE_Z_LIMIT_SUMM_mask = 0x03 << 19, + FORCE_Z_LIMIT_SUMM_shift = 19, + FORCE_SUMM_OFF = 0x00, + FORCE_SUMM_MINZ = 0x01, + FORCE_SUMM_MAXZ = 0x02, + FORCE_SUMM_BOTH = 0x03, + MAX_TILES_IN_DTT_mask = 0x1f << 21, + MAX_TILES_IN_DTT_shift = 21, + DISABLE_PIXEL_RATE_TILES_bit = 1 << 26, + FORCE_Z_DIRTY_bit = 1 << 27, + FORCE_STENCIL_DIRTY_bit = 1 << 28, + FORCE_Z_VALID_bit = 1 << 29, + FORCE_STENCIL_VALID_bit = 1 << 30, + PRESERVE_COMPRESSION_bit = 1 << 31, + DB_RENDER_OVERRIDE2 = 0x00028010, + PARTIAL_SQUAD_LAUNCH_CONTROL_mask = 0x03 << 0, + PARTIAL_SQUAD_LAUNCH_CONTROL_shift = 0, + PSLC_AUTO = 0x00, + PSLC_ON_HANG_ONLY = 0x01, + PSLC_ASAP = 0x02, + PSLC_COUNTDOWN = 0x03, + PARTIAL_SQUAD_LAUNCH_COUNTDOWN_mask = 0x07 << 2, + PARTIAL_SQUAD_LAUNCH_COUNTDOWN_shift = 2, + DISABLE_ZMASK_EXPCLEAR_OPTIMIZATIO_bit = 1 << 5, + DB_HTILE_DATA_BASE = 0x00028014, + DB_STENCIL_CLEAR = 0x00028028, + DB_STENCIL_CLEAR__CLEAR_mask = 0xff << 0, + DB_STENCIL_CLEAR__CLEAR_shift = 0, + MIN_mask = 0xff << 16, + MIN_shift = 16, + DB_DEPTH_CLEAR = 0x0002802c, + PA_SC_SCREEN_SCISSOR_TL = 0x00028030, + PA_SC_SCREEN_SCISSOR_TL__TL_X_mask = 0xffff << 0, + PA_SC_SCREEN_SCISSOR_TL__TL_X_shift = 0, + PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask = 0xffff << 16, + PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift = 16, + PA_SC_SCREEN_SCISSOR_BR = 0x00028034, + PA_SC_SCREEN_SCISSOR_BR__BR_X_mask = 0xffff << 0, + PA_SC_SCREEN_SCISSOR_BR__BR_X_shift = 0, + PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask = 0xffff << 16, + PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift = 16, + DB_Z_INFO = 0x00028040, + DB_Z_INFO__FORMAT_mask = 0x03 << 0, + DB_Z_INFO__FORMAT_shift = 0, + Z_INVALID = 0x00, + Z_16 = 0x01, + Z_24 = 0x02, + Z_32_FLOAT = 0x03, + DB_Z_INFO__ARRAY_MODE_mask = 0x0f << 4, + DB_Z_INFO__ARRAY_MODE_shift = 4, + ARRAY_LINEAR_GENERAL = 0x00, + ARRAY_LINEAR_ALIGNED = 0x01, + ARRAY_1D_TILED_THIN1 = 0x02, + ARRAY_2D_TILED_THIN1 = 0x04, + DB_Z_INFO__TILE_SPLIT_mask = 0x07 << 8, + DB_Z_INFO__TILE_SPLIT_shift = 8, + ADDR_SURF_TILE_SPLIT_64B = 0x00, + ADDR_SURF_TILE_SPLIT_128B = 0x01, + ADDR_SURF_TILE_SPLIT_256B = 0x02, + ADDR_SURF_TILE_SPLIT_512B = 0x03, + ADDR_SURF_TILE_SPLIT_1KB = 0x04, + ADDR_SURF_TILE_SPLIT_2KB = 0x05, + ADDR_SURF_TILE_SPLIT_4KB = 0x06, + DB_Z_INFO__NUM_BANKS_mask = 0x03 << 12, + DB_Z_INFO__NUM_BANKS_shift = 12, + ADDR_SURF_2_BANK = 0x00, + ADDR_SURF_4_BANK = 0x01, + ADDR_SURF_8_BANK = 0x02, + ADDR_SURF_16_BANK = 0x03, + DB_Z_INFO__BANK_WIDTH_mask = 0x03 << 16, + DB_Z_INFO__BANK_WIDTH_shift = 16, + ADDR_SURF_BANK_WIDTH_1 = 0x00, + ADDR_SURF_BANK_WIDTH_2 = 0x01, + ADDR_SURF_BANK_WIDTH_4 = 0x02, + ADDR_SURF_BANK_WIDTH_8 = 0x03, + DB_Z_INFO__BANK_HEIGHT_mask = 0x03 << 20, + DB_Z_INFO__BANK_HEIGHT_shift = 20, + ADDR_SURF_BANK_HEIGHT_1 = 0x00, + ADDR_SURF_BANK_HEIGHT_2 = 0x01, + ADDR_SURF_BANK_HEIGHT_4 = 0x02, + ADDR_SURF_BANK_HEIGHT_8 = 0x03, + DB_Z_INFO__MACRO_TILE_ASPECT_mask = 0x03 << 24, + DB_Z_INFO__MACRO_TILE_ASPECT_shift = 24, + ADDR_SURF_MACRO_ASPECT_1 = 0x00, + ADDR_SURF_MACRO_ASPECT_2 = 0x01, + ADDR_SURF_MACRO_ASPECT_4 = 0x02, + ADDR_SURF_MACRO_ASPECT_8 = 0x03, + ALLOW_EXPCLEAR_bit = 1 << 27, + READ_SIZE_bit = 1 << 28, + TILE_SURFACE_ENABLE_bit = 1 << 29, + DB_Z_INFO__TILE_COMPACT_bit = 1 << 30, + ZRANGE_PRECISION_bit = 1 << 31, + DB_STENCIL_INFO = 0x00028044, + DB_STENCIL_INFO__FORMAT_bit = 1 << 0, + DB_STENCIL_INFO__TILE_SPLIT_mask = 0x07 << 8, + DB_STENCIL_INFO__TILE_SPLIT_shift = 8, +/* ADDR_SURF_TILE_SPLIT_64B = 0x00, */ +/* ADDR_SURF_TILE_SPLIT_128B = 0x01, */ +/* ADDR_SURF_TILE_SPLIT_256B = 0x02, */ +/* ADDR_SURF_TILE_SPLIT_512B = 0x03, */ +/* ADDR_SURF_TILE_SPLIT_1KB = 0x04, */ +/* ADDR_SURF_TILE_SPLIT_2KB = 0x05, */ +/* ADDR_SURF_TILE_SPLIT_4KB = 0x06, */ + DB_Z_READ_BASE = 0x00028048, + DB_STENCIL_READ_BASE = 0x0002804c, + DB_Z_WRITE_BASE = 0x00028050, + DB_STENCIL_WRITE_BASE = 0x00028054, + DB_DEPTH_SIZE = 0x00028058, + PITCH_TILE_MAX_mask = 0x7ff << 0, + PITCH_TILE_MAX_shift = 0, + HEIGHT_TILE_MAX_mask = 0x7ff << 11, + HEIGHT_TILE_MAX_shift = 11, + DB_DEPTH_SLICE = 0x0002805c, + SLICE_TILE_MAX_mask = 0x3fffff << 0, + SLICE_TILE_MAX_shift = 0, + SQ_ALU_CONST_BUFFER_SIZE_PS_0 = 0x00028140, + SQ_ALU_CONST_BUFFER_SIZE_PS_0_num = 16, + SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_mask = 0x1ff << 0, + SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_shift = 0, + SQ_ALU_CONST_BUFFER_SIZE_VS_0 = 0x00028180, + SQ_ALU_CONST_BUFFER_SIZE_VS_0_num = 16, + SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_mask = 0x1ff << 0, + SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_shift = 0, + SQ_ALU_CONST_BUFFER_SIZE_GS_0 = 0x000281c0, + SQ_ALU_CONST_BUFFER_SIZE_GS_0_num = 16, + SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_mask = 0x1ff << 0, + SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_shift = 0, + PA_SC_WINDOW_OFFSET = 0x00028200, + WINDOW_X_OFFSET_mask = 0xffff << 0, + WINDOW_X_OFFSET_shift = 0, + WINDOW_Y_OFFSET_mask = 0xffff << 16, + WINDOW_Y_OFFSET_shift = 16, + PA_SC_WINDOW_SCISSOR_TL = 0x00028204, + PA_SC_WINDOW_SCISSOR_TL__TL_X_mask = 0x7fff << 0, + PA_SC_WINDOW_SCISSOR_TL__TL_X_shift = 0, + PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask = 0x7fff << 16, + PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift = 16, + WINDOW_OFFSET_DISABLE_bit = 1 << 31, + PA_SC_WINDOW_SCISSOR_BR = 0x00028208, + PA_SC_WINDOW_SCISSOR_BR__BR_X_mask = 0x7fff << 0, + PA_SC_WINDOW_SCISSOR_BR__BR_X_shift = 0, + PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask = 0x7fff << 16, + PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift = 16, + PA_SC_CLIPRECT_RULE = 0x0002820c, + CLIP_RULE_mask = 0xffff << 0, + CLIP_RULE_shift = 0, + PA_SC_CLIPRECT_0_TL = 0x00028210, + PA_SC_CLIPRECT_0_TL_num = 4, + PA_SC_CLIPRECT_0_TL_offset = 8, + PA_SC_CLIPRECT_0_TL__TL_X_mask = 0x7fff << 0, + PA_SC_CLIPRECT_0_TL__TL_X_shift = 0, + PA_SC_CLIPRECT_0_TL__TL_Y_mask = 0x7fff << 16, + PA_SC_CLIPRECT_0_TL__TL_Y_shift = 16, + PA_SC_CLIPRECT_0_BR = 0x00028214, + PA_SC_CLIPRECT_0_BR_num = 4, + PA_SC_CLIPRECT_0_BR_offset = 8, + PA_SC_CLIPRECT_0_BR__BR_X_mask = 0x7fff << 0, + PA_SC_CLIPRECT_0_BR__BR_X_shift = 0, + PA_SC_CLIPRECT_0_BR__BR_Y_mask = 0x7fff << 16, + PA_SC_CLIPRECT_0_BR__BR_Y_shift = 16, + PA_SC_EDGERULE = 0x00028230, + ER_TRI_mask = 0x0f << 0, + ER_TRI_shift = 0, + ER_POINT_mask = 0x0f << 4, + ER_POINT_shift = 4, + ER_RECT_mask = 0x0f << 8, + ER_RECT_shift = 8, + ER_LINE_LR_mask = 0x3f << 12, + ER_LINE_LR_shift = 12, + ER_LINE_RL_mask = 0x3f << 18, + ER_LINE_RL_shift = 18, + ER_LINE_TB_mask = 0x0f << 24, + ER_LINE_TB_shift = 24, + ER_LINE_BT_mask = 0x0f << 28, + ER_LINE_BT_shift = 28, + PA_SU_HARDWARE_SCREEN_OFFSET = 0x00028234, + HW_SCREEN_OFFSET_X_mask = 0x1f << 0, + HW_SCREEN_OFFSET_X_shift = 0, + HW_SCREEN_OFFSET_Y_mask = 0x1f << 8, + HW_SCREEN_OFFSET_Y_shift = 8, + CB_TARGET_MASK = 0x00028238, + TARGET0_ENABLE_mask = 0x0f << 0, + TARGET0_ENABLE_shift = 0, + TARGET1_ENABLE_mask = 0x0f << 4, + TARGET1_ENABLE_shift = 4, + TARGET2_ENABLE_mask = 0x0f << 8, + TARGET2_ENABLE_shift = 8, + TARGET3_ENABLE_mask = 0x0f << 12, + TARGET3_ENABLE_shift = 12, + TARGET4_ENABLE_mask = 0x0f << 16, + TARGET4_ENABLE_shift = 16, + TARGET5_ENABLE_mask = 0x0f << 20, + TARGET5_ENABLE_shift = 20, + TARGET6_ENABLE_mask = 0x0f << 24, + TARGET6_ENABLE_shift = 24, + TARGET7_ENABLE_mask = 0x0f << 28, + TARGET7_ENABLE_shift = 28, + CB_SHADER_MASK = 0x0002823c, + OUTPUT0_ENABLE_mask = 0x0f << 0, + OUTPUT0_ENABLE_shift = 0, + OUTPUT1_ENABLE_mask = 0x0f << 4, + OUTPUT1_ENABLE_shift = 4, + OUTPUT2_ENABLE_mask = 0x0f << 8, + OUTPUT2_ENABLE_shift = 8, + OUTPUT3_ENABLE_mask = 0x0f << 12, + OUTPUT3_ENABLE_shift = 12, + OUTPUT4_ENABLE_mask = 0x0f << 16, + OUTPUT4_ENABLE_shift = 16, + OUTPUT5_ENABLE_mask = 0x0f << 20, + OUTPUT5_ENABLE_shift = 20, + OUTPUT6_ENABLE_mask = 0x0f << 24, + OUTPUT6_ENABLE_shift = 24, + OUTPUT7_ENABLE_mask = 0x0f << 28, + OUTPUT7_ENABLE_shift = 28, + PA_SC_GENERIC_SCISSOR_TL = 0x00028240, + PA_SC_GENERIC_SCISSOR_TL__TL_X_mask = 0x7fff << 0, + PA_SC_GENERIC_SCISSOR_TL__TL_X_shift = 0, + PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask = 0x7fff << 16, + PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift = 16, +/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */ + PA_SC_GENERIC_SCISSOR_BR = 0x00028244, + PA_SC_GENERIC_SCISSOR_BR__BR_X_mask = 0x7fff << 0, + PA_SC_GENERIC_SCISSOR_BR__BR_X_shift = 0, + PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask = 0x7fff << 16, + PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift = 16, + PA_SC_VPORT_SCISSOR_0_TL = 0x00028250, + PA_SC_VPORT_SCISSOR_0_TL_num = 16, + PA_SC_VPORT_SCISSOR_0_TL_offset = 8, + PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask = 0x7fff << 0, + PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift = 0, + PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask = 0x7fff << 16, + PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift = 16, +/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */ + PA_SC_VPORT_SCISSOR_0_BR = 0x00028254, + PA_SC_VPORT_SCISSOR_0_BR_num = 16, + PA_SC_VPORT_SCISSOR_0_BR_offset = 8, + PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask = 0x7fff << 0, + PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift = 0, + PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask = 0x7fff << 16, + PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift = 16, + PA_SC_VPORT_ZMIN_0 = 0x000282d0, + PA_SC_VPORT_ZMIN_0_num = 16, + PA_SC_VPORT_ZMIN_0_offset = 8, + PA_SC_VPORT_ZMAX_0 = 0x000282d4, + PA_SC_VPORT_ZMAX_0_num = 16, + PA_SC_VPORT_ZMAX_0_offset = 8, + SX_MISC = 0x00028350, + MULTIPASS_bit = 1 << 0, + SQ_VTX_SEMANTIC_0 = 0x00028380, + SQ_VTX_SEMANTIC_0_num = 32, +/* SEMANTIC_ID_mask = 0xff << 0, */ +/* SEMANTIC_ID_shift = 0, */ + VGT_MAX_VTX_INDX = 0x00028400, + VGT_MIN_VTX_INDX = 0x00028404, + VGT_INDX_OFFSET = 0x00028408, + VGT_MULTI_PRIM_IB_RESET_INDX = 0x0002840c, + SX_ALPHA_TEST_CONTROL = 0x00028410, + ALPHA_FUNC_mask = 0x07 << 0, + ALPHA_FUNC_shift = 0, + REF_NEVER = 0x00, + REF_LESS = 0x01, + REF_EQUAL = 0x02, + REF_LEQUAL = 0x03, + REF_GREATER = 0x04, + REF_NOTEQUAL = 0x05, + REF_GEQUAL = 0x06, + REF_ALWAYS = 0x07, + ALPHA_TEST_ENABLE_bit = 1 << 3, + ALPHA_TEST_BYPASS_bit = 1 << 8, + CB_BLEND_RED = 0x00028414, + CB_BLEND_GREEN = 0x00028418, + CB_BLEND_BLUE = 0x0002841c, + CB_BLEND_ALPHA = 0x00028420, + DB_STENCILREFMASK = 0x00028430, + STENCILREF_mask = 0xff << 0, + STENCILREF_shift = 0, + STENCILMASK_mask = 0xff << 8, + STENCILMASK_shift = 8, + STENCILWRITEMASK_mask = 0xff << 16, + STENCILWRITEMASK_shift = 16, + DB_STENCILREFMASK_BF = 0x00028434, + STENCILREF_BF_mask = 0xff << 0, + STENCILREF_BF_shift = 0, + STENCILMASK_BF_mask = 0xff << 8, + STENCILMASK_BF_shift = 8, + STENCILWRITEMASK_BF_mask = 0xff << 16, + STENCILWRITEMASK_BF_shift = 16, + SX_ALPHA_REF = 0x00028438, + PA_CL_VPORT_XSCALE_0 = 0x0002843c, + PA_CL_VPORT_XSCALE_0_num = 16, + PA_CL_VPORT_XSCALE_0_offset = 24, + PA_CL_VPORT_XOFFSET_0 = 0x00028440, + PA_CL_VPORT_XOFFSET_0_num = 16, + PA_CL_VPORT_XOFFSET_0_offset = 24, + PA_CL_VPORT_YSCALE_0 = 0x00028444, + PA_CL_VPORT_YSCALE_0_num = 16, + PA_CL_VPORT_YSCALE_0_offset = 24, + PA_CL_VPORT_YOFFSET_0 = 0x00028448, + PA_CL_VPORT_YOFFSET_0_num = 16, + PA_CL_VPORT_YOFFSET_0_offset = 24, + PA_CL_VPORT_ZSCALE_0 = 0x0002844c, + PA_CL_VPORT_ZSCALE_0_num = 16, + PA_CL_VPORT_ZSCALE_0_offset = 24, + PA_CL_VPORT_ZOFFSET_0 = 0x00028450, + PA_CL_VPORT_ZOFFSET_0_num = 16, + PA_CL_VPORT_ZOFFSET_0_offset = 24, + PA_CL_UCP_0_X = 0x000285bc, + PA_CL_UCP_0_X_num = 6, + PA_CL_UCP_0_X_offset = 16, + PA_CL_UCP_0_Y = 0x000285c0, + PA_CL_UCP_0_Y_num = 6, + PA_CL_UCP_0_Y_offset = 16, + PA_CL_UCP_0_Z = 0x000285c4, + PA_CL_UCP_0_Z_num = 6, + PA_CL_UCP_0_Z_offset = 16, + PA_CL_UCP_0_W = 0x000285c8, + PA_CL_UCP_0_W_num = 6, + PA_CL_UCP_0_W_offset = 16, + SPI_VS_OUT_ID_0 = 0x0002861c, + SPI_VS_OUT_ID_0_num = 10, + SEMANTIC_0_mask = 0xff << 0, + SEMANTIC_0_shift = 0, + SEMANTIC_1_mask = 0xff << 8, + SEMANTIC_1_shift = 8, + SEMANTIC_2_mask = 0xff << 16, + SEMANTIC_2_shift = 16, + SEMANTIC_3_mask = 0xff << 24, + SEMANTIC_3_shift = 24, + SPI_PS_INPUT_CNTL_0 = 0x00028644, + SPI_PS_INPUT_CNTL_0_num = 32, + SEMANTIC_mask = 0xff << 0, + SEMANTIC_shift = 0, + DEFAULT_VAL_mask = 0x03 << 8, + DEFAULT_VAL_shift = 8, + X_0_0F = 0x00, + FLAT_SHADE_bit = 1 << 10, + CYL_WRAP_mask = 0x0f << 13, + CYL_WRAP_shift = 13, + PT_SPRITE_TEX_bit = 1 << 17, + SPI_VS_OUT_CONFIG = 0x000286c4, + VS_PER_COMPONENT_bit = 1 << 0, + VS_EXPORT_COUNT_mask = 0x1f << 1, + VS_EXPORT_COUNT_shift = 1, + VS_EXPORTS_FOG_bit = 1 << 8, + VS_OUT_FOG_VEC_ADDR_mask = 0x1f << 9, + VS_OUT_FOG_VEC_ADDR_shift = 9, + SPI_PS_IN_CONTROL_0 = 0x000286cc, + NUM_INTERP_mask = 0x3f << 0, + NUM_INTERP_shift = 0, + POSITION_ENA_bit = 1 << 8, + POSITION_CENTROID_bit = 1 << 9, + POSITION_ADDR_mask = 0x1f << 10, + POSITION_ADDR_shift = 10, + PARAM_GEN_mask = 0x0f << 15, + PARAM_GEN_shift = 15, + PERSP_GRADIENT_ENA_bit = 1 << 28, + LINEAR_GRADIENT_ENA_bit = 1 << 29, + POSITION_SAMPLE_bit = 1 << 30, + SPI_PS_IN_CONTROL_1 = 0x000286d0, + FRONT_FACE_ENA_bit = 1 << 8, + FRONT_FACE_ALL_BITS_bit = 1 << 11, + FRONT_FACE_ADDR_mask = 0x1f << 12, + FRONT_FACE_ADDR_shift = 12, + FOG_ADDR_mask = 0x7f << 17, + FOG_ADDR_shift = 17, + FIXED_PT_POSITION_ENA_bit = 1 << 24, + FIXED_PT_POSITION_ADDR_mask = 0x1f << 25, + FIXED_PT_POSITION_ADDR_shift = 25, + POSITION_ULC_bit = 1 << 30, + SPI_INTERP_CONTROL_0 = 0x000286d4, + FLAT_SHADE_ENA_bit = 1 << 0, + PNT_SPRITE_ENA_bit = 1 << 1, + PNT_SPRITE_OVRD_X_mask = 0x07 << 2, + PNT_SPRITE_OVRD_X_shift = 2, + SPI_PNT_SPRITE_SEL_0 = 0x00, + SPI_PNT_SPRITE_SEL_1 = 0x01, + SPI_PNT_SPRITE_SEL_S = 0x02, + SPI_PNT_SPRITE_SEL_T = 0x03, + SPI_PNT_SPRITE_SEL_NONE = 0x04, + PNT_SPRITE_OVRD_Y_mask = 0x07 << 5, + PNT_SPRITE_OVRD_Y_shift = 5, +/* SPI_PNT_SPRITE_SEL_0 = 0x00, */ +/* SPI_PNT_SPRITE_SEL_1 = 0x01, */ +/* SPI_PNT_SPRITE_SEL_S = 0x02, */ +/* SPI_PNT_SPRITE_SEL_T = 0x03, */ +/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */ + PNT_SPRITE_OVRD_Z_mask = 0x07 << 8, + PNT_SPRITE_OVRD_Z_shift = 8, +/* SPI_PNT_SPRITE_SEL_0 = 0x00, */ +/* SPI_PNT_SPRITE_SEL_1 = 0x01, */ +/* SPI_PNT_SPRITE_SEL_S = 0x02, */ +/* SPI_PNT_SPRITE_SEL_T = 0x03, */ +/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */ + PNT_SPRITE_OVRD_W_mask = 0x07 << 11, + PNT_SPRITE_OVRD_W_shift = 11, +/* SPI_PNT_SPRITE_SEL_0 = 0x00, */ +/* SPI_PNT_SPRITE_SEL_1 = 0x01, */ +/* SPI_PNT_SPRITE_SEL_S = 0x02, */ +/* SPI_PNT_SPRITE_SEL_T = 0x03, */ +/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */ + PNT_SPRITE_TOP_1_bit = 1 << 14, + SPI_INPUT_Z = 0x000286d8, + PROVIDE_Z_TO_SPI_bit = 1 << 0, + SPI_FOG_CNTL = 0x000286dc, + PASS_FOG_THROUGH_PS_bit = 1 << 0, + SPI_BARYC_CNTL = 0x000286e0, + PERSP_CENTER_ENA_mask = 0x03 << 0, + PERSP_CENTER_ENA_shift = 0, + X_OFF = 0x00, + PERSP_CENTER_ENA__X_ON_AT_CENTER = 0x01, + PERSP_CENTER_ENA__X_ON_AT_CENTROID = 0x02, + PERSP_CENTROID_ENA_mask = 0x03 << 4, + PERSP_CENTROID_ENA_shift = 4, +/* X_OFF = 0x00, */ + PERSP_CENTROID_ENA__X_ON_AT_CENTROID = 0x01, + PERSP_CENTROID_ENA__X_ON_AT_CENTER = 0x02, + PERSP_SAMPLE_ENA_mask = 0x03 << 8, + PERSP_SAMPLE_ENA_shift = 8, +/* X_OFF = 0x00, */ + PERSP_PULL_MODEL_ENA_mask = 0x03 << 12, + PERSP_PULL_MODEL_ENA_shift = 12, +/* X_OFF = 0x00, */ + LINEAR_CENTER_ENA_mask = 0x03 << 16, + LINEAR_CENTER_ENA_shift = 16, +/* X_OFF = 0x00, */ + LINEAR_CENTER_ENA__X_ON_AT_CENTER = 0x01, + LINEAR_CENTER_ENA__X_ON_AT_CENTROID = 0x02, + LINEAR_CENTROID_ENA_mask = 0x03 << 20, + LINEAR_CENTROID_ENA_shift = 20, +/* X_OFF = 0x00, */ + LINEAR_CENTROID_ENA__X_ON_AT_CENTROID = 0x01, + LINEAR_CENTROID_ENA__X_ON_AT_CENTER = 0x02, + LINEAR_SAMPLE_ENA_mask = 0x03 << 24, + LINEAR_SAMPLE_ENA_shift = 24, +/* X_OFF = 0x00, */ + SPI_PS_IN_CONTROL_2 = 0x000286e4, + LINE_STIPPLE_TEX_ADDR_mask = 0xff << 0, + LINE_STIPPLE_TEX_ADDR_shift = 0, + LINE_STIPPLE_TEX_ENA_bit = 1 << 8, + CB_BLEND0_CONTROL = 0x00028780, + CB_BLEND0_CONTROL_num = 8, + COLOR_SRCBLEND_mask = 0x1f << 0, + COLOR_SRCBLEND_shift = 0, + BLEND_ZERO = 0x00, + BLEND_ONE = 0x01, + BLEND_SRC_COLOR = 0x02, + BLEND_ONE_MINUS_SRC_COLOR = 0x03, + BLEND_SRC_ALPHA = 0x04, + BLEND_ONE_MINUS_SRC_ALPHA = 0x05, + BLEND_DST_ALPHA = 0x06, + BLEND_ONE_MINUS_DST_ALPHA = 0x07, + BLEND_DST_COLOR = 0x08, + BLEND_ONE_MINUS_DST_COLOR = 0x09, + BLEND_SRC_ALPHA_SATURATE = 0x0a, + BLEND_BOTH_SRC_ALPHA = 0x0b, + BLEND_BOTH_INV_SRC_ALPHA = 0x0c, + BLEND_CONSTANT_COLOR = 0x0d, + BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, + BLEND_SRC1_COLOR = 0x0f, + BLEND_INV_SRC1_COLOR = 0x10, + BLEND_SRC1_ALPHA = 0x11, + BLEND_INV_SRC1_ALPHA = 0x12, + BLEND_CONSTANT_ALPHA = 0x13, + BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, + COLOR_COMB_FCN_mask = 0x07 << 5, + COLOR_COMB_FCN_shift = 5, + COMB_DST_PLUS_SRC = 0x00, + COMB_SRC_MINUS_DST = 0x01, + COMB_MIN_DST_SRC = 0x02, + COMB_MAX_DST_SRC = 0x03, + COMB_DST_MINUS_SRC = 0x04, + COLOR_DESTBLEND_mask = 0x1f << 8, + COLOR_DESTBLEND_shift = 8, +/* BLEND_ZERO = 0x00, */ +/* BLEND_ONE = 0x01, */ +/* BLEND_SRC_COLOR = 0x02, */ +/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */ +/* BLEND_SRC_ALPHA = 0x04, */ +/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */ +/* BLEND_DST_ALPHA = 0x06, */ +/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */ +/* BLEND_DST_COLOR = 0x08, */ +/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */ +/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */ +/* BLEND_BOTH_SRC_ALPHA = 0x0b, */ +/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */ +/* BLEND_CONSTANT_COLOR = 0x0d, */ +/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */ +/* BLEND_SRC1_COLOR = 0x0f, */ +/* BLEND_INV_SRC1_COLOR = 0x10, */ +/* BLEND_SRC1_ALPHA = 0x11, */ +/* BLEND_INV_SRC1_ALPHA = 0x12, */ +/* BLEND_CONSTANT_ALPHA = 0x13, */ +/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */ + ALPHA_SRCBLEND_mask = 0x1f << 16, + ALPHA_SRCBLEND_shift = 16, +/* BLEND_ZERO = 0x00, */ +/* BLEND_ONE = 0x01, */ +/* BLEND_SRC_COLOR = 0x02, */ +/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */ +/* BLEND_SRC_ALPHA = 0x04, */ +/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */ +/* BLEND_DST_ALPHA = 0x06, */ +/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */ +/* BLEND_DST_COLOR = 0x08, */ +/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */ +/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */ +/* BLEND_BOTH_SRC_ALPHA = 0x0b, */ +/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */ +/* BLEND_CONSTANT_COLOR = 0x0d, */ +/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */ +/* BLEND_SRC1_COLOR = 0x0f, */ +/* BLEND_INV_SRC1_COLOR = 0x10, */ +/* BLEND_SRC1_ALPHA = 0x11, */ +/* BLEND_INV_SRC1_ALPHA = 0x12, */ +/* BLEND_CONSTANT_ALPHA = 0x13, */ +/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */ + ALPHA_COMB_FCN_mask = 0x07 << 21, + ALPHA_COMB_FCN_shift = 21, +/* COMB_DST_PLUS_SRC = 0x00, */ +/* COMB_SRC_MINUS_DST = 0x01, */ +/* COMB_MIN_DST_SRC = 0x02, */ +/* COMB_MAX_DST_SRC = 0x03, */ +/* COMB_DST_MINUS_SRC = 0x04, */ + ALPHA_DESTBLEND_mask = 0x1f << 24, + ALPHA_DESTBLEND_shift = 24, +/* BLEND_ZERO = 0x00, */ +/* BLEND_ONE = 0x01, */ +/* BLEND_SRC_COLOR = 0x02, */ +/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */ +/* BLEND_SRC_ALPHA = 0x04, */ +/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */ +/* BLEND_DST_ALPHA = 0x06, */ +/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */ +/* BLEND_DST_COLOR = 0x08, */ +/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */ +/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */ +/* BLEND_BOTH_SRC_ALPHA = 0x0b, */ +/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */ +/* BLEND_CONSTANT_COLOR = 0x0d, */ +/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */ +/* BLEND_SRC1_COLOR = 0x0f, */ +/* BLEND_INV_SRC1_COLOR = 0x10, */ +/* BLEND_SRC1_ALPHA = 0x11, */ +/* BLEND_INV_SRC1_ALPHA = 0x12, */ +/* BLEND_CONSTANT_ALPHA = 0x13, */ +/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */ + SEPARATE_ALPHA_BLEND_bit = 1 << 29, + CB_BLEND0_CONTROL__ENABLE_bit = 1 << 30, + PA_CL_POINT_X_RAD = 0x000287d4, + PA_CL_POINT_Y_RAD = 0x000287d8, + PA_CL_POINT_SIZE = 0x000287dc, + PA_CL_POINT_CULL_RAD = 0x000287e0, + VGT_DMA_BASE_HI = 0x000287e4, + VGT_DMA_BASE_HI__BASE_ADDR_mask = 0xff << 0, + VGT_DMA_BASE_HI__BASE_ADDR_shift = 0, + VGT_DMA_BASE = 0x000287e8, + VGT_DRAW_INITIATOR = 0x000287f0, + SOURCE_SELECT_mask = 0x03 << 0, + SOURCE_SELECT_shift = 0, + DI_SRC_SEL_DMA = 0x00, + DI_SRC_SEL_IMMEDIATE = 0x01, + DI_SRC_SEL_AUTO_INDEX = 0x02, + DI_SRC_SEL_RESERVED = 0x03, + MAJOR_MODE_mask = 0x03 << 2, + MAJOR_MODE_shift = 2, + DI_MAJOR_MODE_0 = 0x00, + DI_MAJOR_MODE_1 = 0x01, + NOT_EOP_bit = 1 << 5, + USE_OPAQUE_bit = 1 << 6, + VGT_IMMED_DATA = 0x000287f4, + VGT_EVENT_ADDRESS_REG = 0x000287f8, + ADDRESS_LOW_mask = 0xfffffff << 0, + ADDRESS_LOW_shift = 0, + DB_DEPTH_CONTROL = 0x00028800, + STENCIL_ENABLE_bit = 1 << 0, + Z_ENABLE_bit = 1 << 1, + Z_WRITE_ENABLE_bit = 1 << 2, + ZFUNC_mask = 0x07 << 4, + ZFUNC_shift = 4, + FRAG_NEVER = 0x00, + FRAG_LESS = 0x01, + FRAG_EQUAL = 0x02, + FRAG_LEQUAL = 0x03, + FRAG_GREATER = 0x04, + FRAG_NOTEQUAL = 0x05, + FRAG_GEQUAL = 0x06, + FRAG_ALWAYS = 0x07, + BACKFACE_ENABLE_bit = 1 << 7, + STENCILFUNC_mask = 0x07 << 8, + STENCILFUNC_shift = 8, +/* REF_NEVER = 0x00, */ +/* REF_LESS = 0x01, */ +/* REF_EQUAL = 0x02, */ +/* REF_LEQUAL = 0x03, */ +/* REF_GREATER = 0x04, */ +/* REF_NOTEQUAL = 0x05, */ +/* REF_GEQUAL = 0x06, */ +/* REF_ALWAYS = 0x07, */ + STENCILFAIL_mask = 0x07 << 11, + STENCILFAIL_shift = 11, + STENCIL_KEEP = 0x00, + STENCIL_ZERO = 0x01, + STENCIL_REPLACE = 0x02, + STENCIL_INCR_CLAMP = 0x03, + STENCIL_DECR_CLAMP = 0x04, + STENCIL_INVERT = 0x05, + STENCIL_INCR_WRAP = 0x06, + STENCIL_DECR_WRAP = 0x07, + STENCILZPASS_mask = 0x07 << 14, + STENCILZPASS_shift = 14, +/* STENCIL_KEEP = 0x00, */ +/* STENCIL_ZERO = 0x01, */ +/* STENCIL_REPLACE = 0x02, */ +/* STENCIL_INCR_CLAMP = 0x03, */ +/* STENCIL_DECR_CLAMP = 0x04, */ +/* STENCIL_INVERT = 0x05, */ +/* STENCIL_INCR_WRAP = 0x06, */ +/* STENCIL_DECR_WRAP = 0x07, */ + STENCILZFAIL_mask = 0x07 << 17, + STENCILZFAIL_shift = 17, +/* STENCIL_KEEP = 0x00, */ +/* STENCIL_ZERO = 0x01, */ +/* STENCIL_REPLACE = 0x02, */ +/* STENCIL_INCR_CLAMP = 0x03, */ +/* STENCIL_DECR_CLAMP = 0x04, */ +/* STENCIL_INVERT = 0x05, */ +/* STENCIL_INCR_WRAP = 0x06, */ +/* STENCIL_DECR_WRAP = 0x07, */ + STENCILFUNC_BF_mask = 0x07 << 20, + STENCILFUNC_BF_shift = 20, +/* REF_NEVER = 0x00, */ +/* REF_LESS = 0x01, */ +/* REF_EQUAL = 0x02, */ +/* REF_LEQUAL = 0x03, */ +/* REF_GREATER = 0x04, */ +/* REF_NOTEQUAL = 0x05, */ +/* REF_GEQUAL = 0x06, */ +/* REF_ALWAYS = 0x07, */ + STENCILFAIL_BF_mask = 0x07 << 23, + STENCILFAIL_BF_shift = 23, +/* STENCIL_KEEP = 0x00, */ +/* STENCIL_ZERO = 0x01, */ +/* STENCIL_REPLACE = 0x02, */ +/* STENCIL_INCR_CLAMP = 0x03, */ +/* STENCIL_DECR_CLAMP = 0x04, */ +/* STENCIL_INVERT = 0x05, */ +/* STENCIL_INCR_WRAP = 0x06, */ +/* STENCIL_DECR_WRAP = 0x07, */ + STENCILZPASS_BF_mask = 0x07 << 26, + STENCILZPASS_BF_shift = 26, +/* STENCIL_KEEP = 0x00, */ +/* STENCIL_ZERO = 0x01, */ +/* STENCIL_REPLACE = 0x02, */ +/* STENCIL_INCR_CLAMP = 0x03, */ +/* STENCIL_DECR_CLAMP = 0x04, */ +/* STENCIL_INVERT = 0x05, */ +/* STENCIL_INCR_WRAP = 0x06, */ +/* STENCIL_DECR_WRAP = 0x07, */ + STENCILZFAIL_BF_mask = 0x07 << 29, + STENCILZFAIL_BF_shift = 29, +/* STENCIL_KEEP = 0x00, */ +/* STENCIL_ZERO = 0x01, */ +/* STENCIL_REPLACE = 0x02, */ +/* STENCIL_INCR_CLAMP = 0x03, */ +/* STENCIL_DECR_CLAMP = 0x04, */ +/* STENCIL_INVERT = 0x05, */ +/* STENCIL_INCR_WRAP = 0x06, */ +/* STENCIL_DECR_WRAP = 0x07, */ + CB_COLOR_CONTROL = 0x00028808, + DEGAMMA_ENABLE_bit = 1 << 3, + CB_COLOR_CONTROL__MODE_mask = 0x07 << 4, + CB_COLOR_CONTROL__MODE_shift = 4, + CB_DISABLE = 0x00, + CB_NORMAL = 0x01, + CB_ELIMINATE_FAST_CLEAR = 0x02, + CB_RESOLVE = 0x03, + CB_DECOMPRESS = 0x04, + CB_FMASK_DECOMPRESS = 0x05, + ROP3_mask = 0xff << 16, + ROP3_shift = 16, + DB_SHADER_CONTROL = 0x0002880c, + Z_EXPORT_ENABLE_bit = 1 << 0, + STENCIL_REF_EXPORT_ENABLE_bit = 1 << 1, + Z_ORDER_mask = 0x03 << 4, + Z_ORDER_shift = 4, + LATE_Z = 0x00, + EARLY_Z_THEN_LATE_Z = 0x01, + RE_Z = 0x02, + EARLY_Z_THEN_RE_Z = 0x03, + KILL_ENABLE_bit = 1 << 6, + COVERAGE_TO_MASK_ENABLE_bit = 1 << 7, + MASK_EXPORT_ENABLE_bit = 1 << 8, + DUAL_EXPORT_ENABLE_bit = 1 << 9, + EXEC_ON_HIER_FAIL_bit = 1 << 10, + EXEC_ON_NOOP_bit = 1 << 11, + ALPHA_TO_MASK_DISABLE_bit = 1 << 12, + DB_SOURCE_FORMAT_mask = 0x03 << 13, + DB_SOURCE_FORMAT_shift = 13, + EXPORT_DB_FULL = 0x00, + EXPORT_DB_FOUR16 = 0x01, + EXPORT_DB_TWO = 0x02, + DEPTH_BEFORE_SHADER_bit = 1 << 15, + CONSERVATIVE_Z_EXPORT_mask = 0x03 << 16, + CONSERVATIVE_Z_EXPORT_shift = 16, + EXPORT_ANY_Z = 0x00, + EXPORT_LESS_THAN_Z = 0x01, + EXPORT_GREATER_THAN_Z = 0x02, + EXPORT_RESERVED = 0x03, + PA_CL_CLIP_CNTL = 0x00028810, + UCP_ENA_0_bit = 1 << 0, + UCP_ENA_1_bit = 1 << 1, + UCP_ENA_2_bit = 1 << 2, + UCP_ENA_3_bit = 1 << 3, + UCP_ENA_4_bit = 1 << 4, + UCP_ENA_5_bit = 1 << 5, + PS_UCP_Y_SCALE_NEG_bit = 1 << 13, + PS_UCP_MODE_mask = 0x03 << 14, + PS_UCP_MODE_shift = 14, + CLIP_DISABLE_bit = 1 << 16, + UCP_CULL_ONLY_ENA_bit = 1 << 17, + BOUNDARY_EDGE_FLAG_ENA_bit = 1 << 18, + DX_CLIP_SPACE_DEF_bit = 1 << 19, + DIS_CLIP_ERR_DETECT_bit = 1 << 20, + VTX_KILL_OR_bit = 1 << 21, + DX_RASTERIZATION_KILL_bit = 1 << 22, + DX_LINEAR_ATTR_CLIP_ENA_bit = 1 << 24, + VTE_VPORT_PROVOKE_DISABLE_bit = 1 << 25, + ZCLIP_NEAR_DISABLE_bit = 1 << 26, + ZCLIP_FAR_DISABLE_bit = 1 << 27, + PA_SU_SC_MODE_CNTL = 0x00028814, + CULL_FRONT_bit = 1 << 0, + CULL_BACK_bit = 1 << 1, + FACE_bit = 1 << 2, + POLY_MODE_mask = 0x03 << 3, + POLY_MODE_shift = 3, + X_DISABLE_POLY_MODE = 0x00, + X_DUAL_MODE = 0x01, + POLYMODE_FRONT_PTYPE_mask = 0x07 << 5, + POLYMODE_FRONT_PTYPE_shift = 5, + X_DRAW_POINTS = 0x00, + X_DRAW_LINES = 0x01, + X_DRAW_TRIANGLES = 0x02, + POLYMODE_BACK_PTYPE_mask = 0x07 << 8, + POLYMODE_BACK_PTYPE_shift = 8, +/* X_DRAW_POINTS = 0x00, */ +/* X_DRAW_LINES = 0x01, */ +/* X_DRAW_TRIANGLES = 0x02, */ + POLY_OFFSET_FRONT_ENABLE_bit = 1 << 11, + POLY_OFFSET_BACK_ENABLE_bit = 1 << 12, + POLY_OFFSET_PARA_ENABLE_bit = 1 << 13, + VTX_WINDOW_OFFSET_ENABLE_bit = 1 << 16, + PROVOKING_VTX_LAST_bit = 1 << 19, + PERSP_CORR_DIS_bit = 1 << 20, + MULTI_PRIM_IB_ENA_bit = 1 << 21, + PA_CL_VTE_CNTL = 0x00028818, + VPORT_X_SCALE_ENA_bit = 1 << 0, + VPORT_X_OFFSET_ENA_bit = 1 << 1, + VPORT_Y_SCALE_ENA_bit = 1 << 2, + VPORT_Y_OFFSET_ENA_bit = 1 << 3, + VPORT_Z_SCALE_ENA_bit = 1 << 4, + VPORT_Z_OFFSET_ENA_bit = 1 << 5, + VTX_XY_FMT_bit = 1 << 8, + VTX_Z_FMT_bit = 1 << 9, + VTX_W0_FMT_bit = 1 << 10, + PA_CL_VS_OUT_CNTL = 0x0002881c, + CLIP_DIST_ENA_0_bit = 1 << 0, + CLIP_DIST_ENA_1_bit = 1 << 1, + CLIP_DIST_ENA_2_bit = 1 << 2, + CLIP_DIST_ENA_3_bit = 1 << 3, + CLIP_DIST_ENA_4_bit = 1 << 4, + CLIP_DIST_ENA_5_bit = 1 << 5, + CLIP_DIST_ENA_6_bit = 1 << 6, + CLIP_DIST_ENA_7_bit = 1 << 7, + CULL_DIST_ENA_0_bit = 1 << 8, + CULL_DIST_ENA_1_bit = 1 << 9, + CULL_DIST_ENA_2_bit = 1 << 10, + CULL_DIST_ENA_3_bit = 1 << 11, + CULL_DIST_ENA_4_bit = 1 << 12, + CULL_DIST_ENA_5_bit = 1 << 13, + CULL_DIST_ENA_6_bit = 1 << 14, + CULL_DIST_ENA_7_bit = 1 << 15, + USE_VTX_POINT_SIZE_bit = 1 << 16, + USE_VTX_EDGE_FLAG_bit = 1 << 17, + USE_VTX_RENDER_TARGET_INDX_bit = 1 << 18, + USE_VTX_VIEWPORT_INDX_bit = 1 << 19, + USE_VTX_KILL_FLAG_bit = 1 << 20, + VS_OUT_MISC_VEC_ENA_bit = 1 << 21, + VS_OUT_CCDIST0_VEC_ENA_bit = 1 << 22, + VS_OUT_CCDIST1_VEC_ENA_bit = 1 << 23, + PA_CL_NANINF_CNTL = 0x00028820, + VTE_XY_INF_DISCARD_bit = 1 << 0, + VTE_Z_INF_DISCARD_bit = 1 << 1, + VTE_W_INF_DISCARD_bit = 1 << 2, + VTE_0XNANINF_IS_0_bit = 1 << 3, + VTE_XY_NAN_RETAIN_bit = 1 << 4, + VTE_Z_NAN_RETAIN_bit = 1 << 5, + VTE_W_NAN_RETAIN_bit = 1 << 6, + VTE_W_RECIP_NAN_IS_0_bit = 1 << 7, + VS_XY_NAN_TO_INF_bit = 1 << 8, + VS_XY_INF_RETAIN_bit = 1 << 9, + VS_Z_NAN_TO_INF_bit = 1 << 10, + VS_Z_INF_RETAIN_bit = 1 << 11, + VS_W_NAN_TO_INF_bit = 1 << 12, + VS_W_INF_RETAIN_bit = 1 << 13, + VS_CLIP_DIST_INF_DISCARD_bit = 1 << 14, + VTE_NO_OUTPUT_NEG_0_bit = 1 << 20, + PA_SU_LINE_STIPPLE_CNTL = 0x00028824, + LINE_STIPPLE_RESET_mask = 0x03 << 0, + LINE_STIPPLE_RESET_shift = 0, + EXPAND_FULL_LENGTH_bit = 1 << 2, + FRACTIONAL_ACCUM_bit = 1 << 3, + DIAMOND_ADJUST_bit = 1 << 4, + PA_SU_LINE_STIPPLE_SCALE = 0x00028828, + PA_SU_PRIM_FILTER_CNTL = 0x0002882c, + TRIANGLE_FILTER_DISABLE_bit = 1 << 0, + LINE_FILTER_DISABLE_bit = 1 << 1, + POINT_FILTER_DISABLE_bit = 1 << 2, + RECTANGLE_FILTER_DISABLE_bit = 1 << 3, + TRIANGLE_EXPAND_ENA_bit = 1 << 4, + LINE_EXPAND_ENA_bit = 1 << 5, + POINT_EXPAND_ENA_bit = 1 << 6, + RECTANGLE_EXPAND_ENA_bit = 1 << 7, + PRIM_EXPAND_CONSTANT_mask = 0xff << 8, + PRIM_EXPAND_CONSTANT_shift = 8, + SQ_LSTMP_RING_ITEMSIZE = 0x00028830, + ITEMSIZE_mask = 0x7fff << 0, + ITEMSIZE_shift = 0, + SQ_HSTMP_RING_ITEMSIZE = 0x00028834, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_PGM_START_PS = 0x00028840, + SQ_PGM_RESOURCES_PS = 0x00028844, + NUM_GPRS_mask = 0xff << 0, + NUM_GPRS_shift = 0, + STACK_SIZE_mask = 0xff << 8, + STACK_SIZE_shift = 8, + DX10_CLAMP_bit = 1 << 21, + UNCACHED_FIRST_INST_bit = 1 << 28, + CLAMP_CONSTS_bit = 1 << 31, + SQ_PGM_RESOURCES_2_PS = 0x00028848, + SINGLE_ROUND_mask = 0x03 << 0, + SINGLE_ROUND_shift = 0, + SQ_ROUND_NEAREST_EVEN = 0x00, + SQ_ROUND_PLUS_INFINITY = 0x01, + SQ_ROUND_MINUS_INFINITY = 0x02, + SQ_ROUND_TO_ZERO = 0x03, + DOUBLE_ROUND_mask = 0x03 << 2, + DOUBLE_ROUND_shift = 2, +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ + ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, + ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, + ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, + ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, + SQ_PGM_EXPORTS_PS = 0x0002884c, + EXPORT_MODE_mask = 0x1f << 0, + EXPORT_MODE_shift = 0, + SQ_PGM_START_VS = 0x0002885c, + SQ_PGM_RESOURCES_VS = 0x00028860, +/* NUM_GPRS_mask = 0xff << 0, */ +/* NUM_GPRS_shift = 0, */ +/* STACK_SIZE_mask = 0xff << 8, */ +/* STACK_SIZE_shift = 8, */ +/* DX10_CLAMP_bit = 1 << 21, */ +/* UNCACHED_FIRST_INST_bit = 1 << 28, */ + SQ_PGM_RESOURCES_2_VS = 0x00028864, +/* SINGLE_ROUND_mask = 0x03 << 0, */ +/* SINGLE_ROUND_shift = 0, */ +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ +/* DOUBLE_ROUND_mask = 0x03 << 2, */ +/* DOUBLE_ROUND_shift = 2, */ +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ +/* ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, */ +/* ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, */ +/* ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, */ +/* ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, */ + SQ_PGM_START_GS = 0x00028874, + SQ_PGM_RESOURCES_GS = 0x00028878, +/* NUM_GPRS_mask = 0xff << 0, */ +/* NUM_GPRS_shift = 0, */ +/* STACK_SIZE_mask = 0xff << 8, */ +/* STACK_SIZE_shift = 8, */ +/* DX10_CLAMP_bit = 1 << 21, */ +/* UNCACHED_FIRST_INST_bit = 1 << 28, */ + SQ_PGM_RESOURCES_2_GS = 0x0002887c, +/* SINGLE_ROUND_mask = 0x03 << 0, */ +/* SINGLE_ROUND_shift = 0, */ +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ +/* DOUBLE_ROUND_mask = 0x03 << 2, */ +/* DOUBLE_ROUND_shift = 2, */ +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ +/* ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, */ +/* ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, */ +/* ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, */ +/* ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, */ + SQ_PGM_START_ES = 0x0002888c, + SQ_PGM_RESOURCES_ES = 0x00028890, +/* NUM_GPRS_mask = 0xff << 0, */ +/* NUM_GPRS_shift = 0, */ +/* STACK_SIZE_mask = 0xff << 8, */ +/* STACK_SIZE_shift = 8, */ +/* DX10_CLAMP_bit = 1 << 21, */ +/* UNCACHED_FIRST_INST_bit = 1 << 28, */ + SQ_PGM_RESOURCES_2_ES = 0x00028894, +/* SINGLE_ROUND_mask = 0x03 << 0, */ +/* SINGLE_ROUND_shift = 0, */ +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ +/* DOUBLE_ROUND_mask = 0x03 << 2, */ +/* DOUBLE_ROUND_shift = 2, */ +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ +/* ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, */ +/* ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, */ +/* ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, */ +/* ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, */ + SQ_PGM_START_FS = 0x000288a4, + SQ_PGM_RESOURCES_FS = 0x000288a8, +/* NUM_GPRS_mask = 0xff << 0, */ +/* NUM_GPRS_shift = 0, */ +/* STACK_SIZE_mask = 0xff << 8, */ +/* STACK_SIZE_shift = 8, */ +/* DX10_CLAMP_bit = 1 << 21, */ + SQ_PGM_START_HS = 0x000288b8, + SQ_PGM_RESOURCES_HS = 0x000288bc, +/* NUM_GPRS_mask = 0xff << 0, */ +/* NUM_GPRS_shift = 0, */ +/* STACK_SIZE_mask = 0xff << 8, */ +/* STACK_SIZE_shift = 8, */ +/* DX10_CLAMP_bit = 1 << 21, */ +/* UNCACHED_FIRST_INST_bit = 1 << 28, */ + SQ_PGM_RESOURCES_2_HS = 0x000288c0, +/* SINGLE_ROUND_mask = 0x03 << 0, */ +/* SINGLE_ROUND_shift = 0, */ +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ +/* DOUBLE_ROUND_mask = 0x03 << 2, */ +/* DOUBLE_ROUND_shift = 2, */ +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ +/* ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, */ +/* ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, */ +/* ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, */ +/* ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, */ + SQ_PGM_START_LS = 0x000288d0, + SQ_PGM_RESOURCES_LS = 0x000288d4, +/* NUM_GPRS_mask = 0xff << 0, */ +/* NUM_GPRS_shift = 0, */ +/* STACK_SIZE_mask = 0xff << 8, */ +/* STACK_SIZE_shift = 8, */ +/* DX10_CLAMP_bit = 1 << 21, */ +/* UNCACHED_FIRST_INST_bit = 1 << 28, */ + SQ_PGM_RESOURCES_2_LS = 0x000288d8, +/* SINGLE_ROUND_mask = 0x03 << 0, */ +/* SINGLE_ROUND_shift = 0, */ +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ +/* DOUBLE_ROUND_mask = 0x03 << 2, */ +/* DOUBLE_ROUND_shift = 2, */ +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ +/* ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, */ +/* ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, */ +/* ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, */ +/* ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, */ + SQ_VTX_SEMANTIC_CLEAR = 0x000288f0, + SQ_ESGS_RING_ITEMSIZE = 0x00028900, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_GSVS_RING_ITEMSIZE = 0x00028904, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_ESTMP_RING_ITEMSIZE = 0x00028908, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_GSTMP_RING_ITEMSIZE = 0x0002890c, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_VSTMP_RING_ITEMSIZE = 0x00028910, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_PSTMP_RING_ITEMSIZE = 0x00028914, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_GS_VERT_ITEMSIZE = 0x0002891c, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_GS_VERT_ITEMSIZE_1 = 0x00028920, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_GS_VERT_ITEMSIZE_2 = 0x00028924, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_GS_VERT_ITEMSIZE_3 = 0x00028928, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_GSVS_RING_OFFSET_1 = 0x0002892c, + SQ_GSVS_RING_OFFSET_1__OFFSET_mask = 0x7fff << 0, + SQ_GSVS_RING_OFFSET_1__OFFSET_shift = 0, + SQ_GSVS_RING_OFFSET_2 = 0x00028930, + SQ_GSVS_RING_OFFSET_2__OFFSET_mask = 0x7fff << 0, + SQ_GSVS_RING_OFFSET_2__OFFSET_shift = 0, + SQ_GSVS_RING_OFFSET_3 = 0x00028934, + SQ_GSVS_RING_OFFSET_3__OFFSET_mask = 0x7fff << 0, + SQ_GSVS_RING_OFFSET_3__OFFSET_shift = 0, + SQ_ALU_CONST_CACHE_PS_0 = 0x00028940, + SQ_ALU_CONST_CACHE_PS_0_num = 16, + SQ_ALU_CONST_CACHE_VS_0 = 0x00028980, + SQ_ALU_CONST_CACHE_VS_0_num = 16, + SQ_ALU_CONST_CACHE_GS_0 = 0x000289c0, + SQ_ALU_CONST_CACHE_GS_0_num = 16, + PA_SU_POINT_SIZE = 0x00028a00, + HEIGHT_mask = 0xffff << 0, + HEIGHT_shift = 0, + PA_SU_POINT_SIZE__WIDTH_mask = 0xffff << 16, + PA_SU_POINT_SIZE__WIDTH_shift = 16, + PA_SU_POINT_MINMAX = 0x00028a04, + MIN_SIZE_mask = 0xffff << 0, + MIN_SIZE_shift = 0, + PA_SU_POINT_MINMAX__MAX_SIZE_mask = 0xffff << 16, + PA_SU_POINT_MINMAX__MAX_SIZE_shift = 16, + PA_SU_LINE_CNTL = 0x00028a08, + PA_SU_LINE_CNTL__WIDTH_mask = 0xffff << 0, + PA_SU_LINE_CNTL__WIDTH_shift = 0, + PA_SC_LINE_STIPPLE = 0x00028a0c, + LINE_PATTERN_mask = 0xffff << 0, + LINE_PATTERN_shift = 0, + REPEAT_COUNT_mask = 0xff << 16, + REPEAT_COUNT_shift = 16, + PATTERN_BIT_ORDER_bit = 1 << 28, + AUTO_RESET_CNTL_mask = 0x03 << 29, + AUTO_RESET_CNTL_shift = 29, + VGT_OUTPUT_PATH_CNTL = 0x00028a10, + PATH_SELECT_mask = 0x07 << 0, + PATH_SELECT_shift = 0, + VGT_OUTPATH_VTX_REUSE = 0x00, + VGT_OUTPATH_TESS_EN = 0x01, + VGT_OUTPATH_PASSTHRU = 0x02, + VGT_OUTPATH_GS_BLOCK = 0x03, + VGT_OUTPATH_HS_BLOCK = 0x04, + VGT_HOS_CNTL = 0x00028a14, + TESS_MODE_mask = 0x03 << 0, + TESS_MODE_shift = 0, + VGT_HOS_MAX_TESS_LEVEL = 0x00028a18, + VGT_HOS_MIN_TESS_LEVEL = 0x00028a1c, + VGT_HOS_REUSE_DEPTH = 0x00028a20, + REUSE_DEPTH_mask = 0xff << 0, + REUSE_DEPTH_shift = 0, + VGT_GROUP_PRIM_TYPE = 0x00028a24, + VGT_GROUP_PRIM_TYPE__PRIM_TYPE_mask = 0x1f << 0, + VGT_GROUP_PRIM_TYPE__PRIM_TYPE_shift = 0, + VGT_GRP_3D_POINT = 0x00, + VGT_GRP_3D_LINE = 0x01, + VGT_GRP_3D_TRI = 0x02, + VGT_GRP_3D_RECT = 0x03, + VGT_GRP_3D_QUAD = 0x04, + VGT_GRP_2D_COPY_RECT_V0 = 0x05, + VGT_GRP_2D_COPY_RECT_V1 = 0x06, + VGT_GRP_2D_COPY_RECT_V2 = 0x07, + VGT_GRP_2D_COPY_RECT_V3 = 0x08, + VGT_GRP_2D_FILL_RECT = 0x09, + VGT_GRP_2D_LINE = 0x0a, + VGT_GRP_2D_TRI = 0x0b, + VGT_GRP_PRIM_INDEX_LINE = 0x0c, + VGT_GRP_PRIM_INDEX_TRI = 0x0d, + VGT_GRP_PRIM_INDEX_QUAD = 0x0e, + VGT_GRP_3D_LINE_ADJ = 0x0f, + VGT_GRP_3D_TRI_ADJ = 0x10, + VGT_GRP_3D_PATCH = 0x11, + RETAIN_ORDER_bit = 1 << 14, + RETAIN_QUADS_bit = 1 << 15, + PRIM_ORDER_mask = 0x07 << 16, + PRIM_ORDER_shift = 16, + VGT_GRP_LIST = 0x00, + VGT_GRP_STRIP = 0x01, + VGT_GRP_FAN = 0x02, + VGT_GRP_LOOP = 0x03, + VGT_GRP_POLYGON = 0x04, + VGT_GROUP_FIRST_DECR = 0x00028a28, + FIRST_DECR_mask = 0x0f << 0, + FIRST_DECR_shift = 0, + VGT_GROUP_DECR = 0x00028a2c, + DECR_mask = 0x0f << 0, + DECR_shift = 0, + VGT_GROUP_VECT_0_CNTL = 0x00028a30, + COMP_X_EN_bit = 1 << 0, + COMP_Y_EN_bit = 1 << 1, + COMP_Z_EN_bit = 1 << 2, + COMP_W_EN_bit = 1 << 3, + VGT_GROUP_VECT_0_CNTL__STRIDE_mask = 0xff << 8, + VGT_GROUP_VECT_0_CNTL__STRIDE_shift = 8, + SHIFT_mask = 0xff << 16, + SHIFT_shift = 16, + VGT_GROUP_VECT_1_CNTL = 0x00028a34, +/* COMP_X_EN_bit = 1 << 0, */ +/* COMP_Y_EN_bit = 1 << 1, */ +/* COMP_Z_EN_bit = 1 << 2, */ +/* COMP_W_EN_bit = 1 << 3, */ + VGT_GROUP_VECT_1_CNTL__STRIDE_mask = 0xff << 8, + VGT_GROUP_VECT_1_CNTL__STRIDE_shift = 8, +/* SHIFT_mask = 0xff << 16, */ +/* SHIFT_shift = 16, */ + VGT_GROUP_VECT_0_FMT_CNTL = 0x00028a38, + X_CONV_mask = 0x0f << 0, + X_CONV_shift = 0, + VGT_GRP_INDEX_16 = 0x00, + VGT_GRP_INDEX_32 = 0x01, + VGT_GRP_UINT_16 = 0x02, + VGT_GRP_UINT_32 = 0x03, + VGT_GRP_SINT_16 = 0x04, + VGT_GRP_SINT_32 = 0x05, + VGT_GRP_FLOAT_32 = 0x06, + VGT_GRP_AUTO_PRIM = 0x07, + VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, + X_OFFSET_mask = 0x0f << 4, + X_OFFSET_shift = 4, + Y_CONV_mask = 0x0f << 8, + Y_CONV_shift = 8, +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ + Y_OFFSET_mask = 0x0f << 12, + Y_OFFSET_shift = 12, + Z_CONV_mask = 0x0f << 16, + Z_CONV_shift = 16, +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ + Z_OFFSET_mask = 0x0f << 20, + Z_OFFSET_shift = 20, + W_CONV_mask = 0x0f << 24, + W_CONV_shift = 24, +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ + W_OFFSET_mask = 0x0f << 28, + W_OFFSET_shift = 28, + VGT_GROUP_VECT_1_FMT_CNTL = 0x00028a3c, +/* X_CONV_mask = 0x0f << 0, */ +/* X_CONV_shift = 0, */ +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ +/* X_OFFSET_mask = 0x0f << 4, */ +/* X_OFFSET_shift = 4, */ +/* Y_CONV_mask = 0x0f << 8, */ +/* Y_CONV_shift = 8, */ +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ +/* Y_OFFSET_mask = 0x0f << 12, */ +/* Y_OFFSET_shift = 12, */ +/* Z_CONV_mask = 0x0f << 16, */ +/* Z_CONV_shift = 16, */ +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ +/* Z_OFFSET_mask = 0x0f << 20, */ +/* Z_OFFSET_shift = 20, */ +/* W_CONV_mask = 0x0f << 24, */ +/* W_CONV_shift = 24, */ +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ +/* W_OFFSET_mask = 0x0f << 28, */ +/* W_OFFSET_shift = 28, */ + VGT_GS_MODE = 0x00028a40, + VGT_GS_MODE__MODE_mask = 0x03 << 0, + VGT_GS_MODE__MODE_shift = 0, + GS_OFF = 0x00, + GS_SCENARIO_A = 0x01, + GS_SCENARIO_B = 0x02, + GS_SCENARIO_G = 0x03, + GS_SCENARIO_C = 0x04, + SPRITE_EN = 0x05, + ES_PASSTHRU_bit = 1 << 2, + CUT_MODE_mask = 0x03 << 3, + CUT_MODE_shift = 3, + GS_CUT_1024 = 0x00, + GS_CUT_512 = 0x01, + GS_CUT_256 = 0x02, + GS_CUT_128 = 0x03, + MODE_HI_bit = 1 << 8, + PA_SC_MODE_CNTL_0 = 0x00028a48, + MSAA_ENABLE_bit = 1 << 0, + VPORT_SCISSOR_ENABLE_bit = 1 << 1, + LINE_STIPPLE_ENABLE_bit = 1 << 2, + VGT_ENHANCE = 0x00028a50, + VGT_GS_PER_ES = 0x00028a54, + GS_PER_ES_mask = 0x7ff << 0, + GS_PER_ES_shift = 0, + VGT_ES_PER_GS = 0x00028a58, + ES_PER_GS_mask = 0x7ff << 0, + ES_PER_GS_shift = 0, + VGT_GS_PER_VS = 0x00028a5c, + GS_PER_VS_mask = 0x0f << 0, + GS_PER_VS_shift = 0, + VGT_GS_OUT_PRIM_TYPE = 0x00028a6c, + OUTPRIM_TYPE_mask = 0x3f << 0, + OUTPRIM_TYPE_shift = 0, + POINTLIST = 0x00, + LINESTRIP = 0x01, + TRISTRIP = 0x02, + VGT_DMA_SIZE = 0x00028a74, + VGT_DMA_MAX_SIZE = 0x00028a78, + VGT_DMA_INDEX_TYPE = 0x00028a7c, +/* INDEX_TYPE_mask = 0x03 << 0, */ +/* INDEX_TYPE_shift = 0, */ + VGT_INDEX_16 = 0x00, + VGT_INDEX_32 = 0x01, + SWAP_MODE_mask = 0x03 << 2, + SWAP_MODE_shift = 2, + VGT_DMA_SWAP_NONE = 0x00, + VGT_DMA_SWAP_16_BIT = 0x01, + VGT_DMA_SWAP_32_BIT = 0x02, + VGT_DMA_SWAP_WORD = 0x03, + VGT_PRIMITIVEID_EN = 0x00028a84, + PRIMITIVEID_EN_bit = 1 << 0, + VGT_DMA_NUM_INSTANCES = 0x00028a88, + VGT_EVENT_INITIATOR = 0x00028a90, + EVENT_TYPE_mask = 0x3f << 0, + EVENT_TYPE_shift = 0, + SAMPLE_STREAMOUTSTATS1 = 0x01, + SAMPLE_STREAMOUTSTATS2 = 0x02, + SAMPLE_STREAMOUTSTATS3 = 0x03, + CACHE_FLUSH_TS = 0x04, + CONTEXT_DONE = 0x05, + CACHE_FLUSH = 0x06, + CS_PARTIAL_FLUSH = 0x07, + RST_PIX_CNT = 0x0d, + VS_PARTIAL_FLUSH = 0x0f, + PS_PARTIAL_FLUSH = 0x10, + FLUSH_HS_OUTPUT = 0x11, + FLUSH_LS_OUTPUT = 0x12, + CACHE_FLUSH_AND_INV_TS_EVENT = 0x14, + ZPASS_DONE = 0x15, + CACHE_FLUSH_AND_INV_EVENT = 0x16, + PERFCOUNTER_START = 0x17, + PERFCOUNTER_STOP = 0x18, + PIPELINESTAT_START = 0x19, + PIPELINESTAT_STOP = 0x1a, + PERFCOUNTER_SAMPLE = 0x1b, + FLUSH_ES_OUTPUT = 0x1c, + FLUSH_GS_OUTPUT = 0x1d, + SAMPLE_PIPELINESTAT = 0x1e, + SO_VGTSTREAMOUT_FLUSH = 0x1f, + SAMPLE_STREAMOUTSTATS = 0x20, + RESET_VTX_CNT = 0x21, + BLOCK_CONTEXT_DONE = 0x22, + CS_CONTEXT_DONE = 0x23, + VGT_FLUSH = 0x24, + SQ_NON_EVENT = 0x26, + SC_SEND_DB_VPZ = 0x27, + BOTTOM_OF_PIPE_TS = 0x28, + FLUSH_SX_TS = 0x29, + DB_CACHE_FLUSH_AND_INV = 0x2a, + FLUSH_AND_INV_DB_DATA_TS = 0x2b, + FLUSH_AND_INV_DB_META = 0x2c, + FLUSH_AND_INV_CB_DATA_TS = 0x2d, + FLUSH_AND_INV_CB_META = 0x2e, + CS_DONE = 0x2f, + PS_DONE = 0x30, + FLUSH_AND_INV_CB_PIXEL_DATA = 0x31, + ADDRESS_HI_mask = 0xff << 19, + ADDRESS_HI_shift = 19, + EXTENDED_EVENT_bit = 1 << 27, + VGT_MULTI_PRIM_IB_RESET_EN = 0x00028a94, + RESET_EN_bit = 1 << 0, + VGT_INSTANCE_STEP_RATE_0 = 0x00028aa0, + VGT_INSTANCE_STEP_RATE_1 = 0x00028aa4, + VGT_REUSE_OFF = 0x00028ab4, + REUSE_OFF_bit = 1 << 0, + VGT_VTX_CNT_EN = 0x00028ab8, + VTX_CNT_EN_bit = 1 << 0, + DB_HTILE_SURFACE = 0x00028abc, + HTILE_WIDTH_bit = 1 << 0, + HTILE_HEIGHT_bit = 1 << 1, + LINEAR_bit = 1 << 2, + FULL_CACHE_bit = 1 << 3, + HTILE_USES_PRELOAD_WIN_bit = 1 << 4, + PRELOAD_bit = 1 << 5, + PREFETCH_WIDTH_mask = 0x3f << 6, + PREFETCH_WIDTH_shift = 6, + PREFETCH_HEIGHT_mask = 0x3f << 12, + PREFETCH_HEIGHT_shift = 12, + DB_SRESULTS_COMPARE_STATE0 = 0x00028ac0, + COMPAREFUNC0_mask = 0x07 << 0, + COMPAREFUNC0_shift = 0, +/* REF_NEVER = 0x00, */ +/* REF_LESS = 0x01, */ +/* REF_EQUAL = 0x02, */ +/* REF_LEQUAL = 0x03, */ +/* REF_GREATER = 0x04, */ +/* REF_NOTEQUAL = 0x05, */ +/* REF_GEQUAL = 0x06, */ +/* REF_ALWAYS = 0x07, */ + COMPAREVALUE0_mask = 0xff << 4, + COMPAREVALUE0_shift = 4, + COMPAREMASK0_mask = 0xff << 12, + COMPAREMASK0_shift = 12, + ENABLE0_bit = 1 << 24, + DB_SRESULTS_COMPARE_STATE1 = 0x00028ac4, + COMPAREFUNC1_mask = 0x07 << 0, + COMPAREFUNC1_shift = 0, +/* REF_NEVER = 0x00, */ +/* REF_LESS = 0x01, */ +/* REF_EQUAL = 0x02, */ +/* REF_LEQUAL = 0x03, */ +/* REF_GREATER = 0x04, */ +/* REF_NOTEQUAL = 0x05, */ +/* REF_GEQUAL = 0x06, */ +/* REF_ALWAYS = 0x07, */ + COMPAREVALUE1_mask = 0xff << 4, + COMPAREVALUE1_shift = 4, + COMPAREMASK1_mask = 0xff << 12, + COMPAREMASK1_shift = 12, + ENABLE1_bit = 1 << 24, + DB_PRELOAD_CONTROL = 0x00028ac8, + START_X_mask = 0xff << 0, + START_X_shift = 0, + START_Y_mask = 0xff << 8, + START_Y_shift = 8, + MAX_X_mask = 0xff << 16, + MAX_X_shift = 16, + MAX_Y_mask = 0xff << 24, + MAX_Y_shift = 24, + VGT_STRMOUT_BUFFER_SIZE_0 = 0x00028ad0, + VGT_STRMOUT_VTX_STRIDE_0 = 0x00028ad4, + VGT_STRMOUT_VTX_STRIDE_0__STRIDE_mask = 0x3ff << 0, + VGT_STRMOUT_VTX_STRIDE_0__STRIDE_shift = 0, + VGT_STRMOUT_BUFFER_BASE_0 = 0x00028ad8, + VGT_STRMOUT_BUFFER_OFFSET_0 = 0x00028adc, + VGT_STRMOUT_BUFFER_SIZE_1 = 0x00028ae0, + VGT_STRMOUT_VTX_STRIDE_1 = 0x00028ae4, + VGT_STRMOUT_VTX_STRIDE_1__STRIDE_mask = 0x3ff << 0, + VGT_STRMOUT_VTX_STRIDE_1__STRIDE_shift = 0, + VGT_STRMOUT_BUFFER_BASE_1 = 0x00028ae8, + VGT_STRMOUT_BUFFER_OFFSET_1 = 0x00028aec, + VGT_STRMOUT_BUFFER_SIZE_2 = 0x00028af0, + VGT_STRMOUT_VTX_STRIDE_2 = 0x00028af4, + VGT_STRMOUT_VTX_STRIDE_2__STRIDE_mask = 0x3ff << 0, + VGT_STRMOUT_VTX_STRIDE_2__STRIDE_shift = 0, + VGT_STRMOUT_BUFFER_BASE_2 = 0x00028af8, + VGT_STRMOUT_BUFFER_OFFSET_2 = 0x00028afc, + VGT_STRMOUT_BUFFER_SIZE_3 = 0x00028b00, + VGT_STRMOUT_VTX_STRIDE_3 = 0x00028b04, + VGT_STRMOUT_VTX_STRIDE_3__STRIDE_mask = 0x3ff << 0, + VGT_STRMOUT_VTX_STRIDE_3__STRIDE_shift = 0, + VGT_STRMOUT_BUFFER_BASE_3 = 0x00028b08, + VGT_STRMOUT_BUFFER_OFFSET_3 = 0x00028b0c, + VGT_STRMOUT_BASE_OFFSET_0 = 0x00028b10, + VGT_STRMOUT_BASE_OFFSET_1 = 0x00028b14, + VGT_STRMOUT_BASE_OFFSET_2 = 0x00028b18, + VGT_STRMOUT_BASE_OFFSET_3 = 0x00028b1c, + VGT_STRMOUT_DRAW_OPAQUE_OFFSET = 0x00028b28, + VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE = 0x00028b2c, + VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE = 0x00028b30, + VERTEX_STRIDE_mask = 0x1ff << 0, + VERTEX_STRIDE_shift = 0, + VGT_GS_MAX_VERT_OUT = 0x00028b38, + MAX_VERT_OUT_mask = 0x7ff << 0, + MAX_VERT_OUT_shift = 0, + VGT_STRMOUT_BASE_OFFSET_HI_0 = 0x00028b44, + VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_mask = 0x3f << 0, + VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_shift = 0, + VGT_STRMOUT_BASE_OFFSET_HI_1 = 0x00028b48, + VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_mask = 0x3f << 0, + VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_shift = 0, + VGT_STRMOUT_BASE_OFFSET_HI_2 = 0x00028b4c, + VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_mask = 0x3f << 0, + VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_shift = 0, + VGT_STRMOUT_BASE_OFFSET_HI_3 = 0x00028b50, + VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_mask = 0x3f << 0, + VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_shift = 0, + VGT_SHADER_STAGES_EN = 0x00028b54, + LS_EN_mask = 0x03 << 0, + LS_EN_shift = 0, + LS_STAGE_OFF = 0x00, + LS_STAGE_ON = 0x01, + CS_STAGE_ON = 0x02, + HS_EN_bit = 1 << 2, + ES_EN_mask = 0x03 << 3, + ES_EN_shift = 3, + ES_STAGE_OFF = 0x00, + ES_STAGE_DS = 0x01, + ES_STAGE_REAL = 0x02, + GS_EN_bit = 1 << 5, + VS_EN_mask = 0x03 << 6, + VS_EN_shift = 6, + VS_STAGE_REAL = 0x00, + VS_STAGE_DS = 0x01, + VS_STAGE_COPY_SHADER = 0x02, + VGT_LS_HS_CONFIG = 0x00028b58, + NUM_PATCHES_mask = 0xff << 0, + NUM_PATCHES_shift = 0, + HS_NUM_INPUT_CP_mask = 0x3f << 8, + HS_NUM_INPUT_CP_shift = 8, + HS_NUM_OUTPUT_CP_mask = 0x3f << 14, + HS_NUM_OUTPUT_CP_shift = 14, + VGT_LS_SIZE = 0x00028b5c, + VGT_LS_SIZE__SIZE_mask = 0xff << 0, + VGT_LS_SIZE__SIZE_shift = 0, + PATCH_CP_SIZE_mask = 0x1fff << 8, + PATCH_CP_SIZE_shift = 8, + VGT_HS_SIZE = 0x00028b60, + VGT_HS_SIZE__SIZE_mask = 0xff << 0, + VGT_HS_SIZE__SIZE_shift = 0, +/* PATCH_CP_SIZE_mask = 0x1fff << 8, */ +/* PATCH_CP_SIZE_shift = 8, */ + VGT_LS_HS_ALLOC = 0x00028b64, + HS_TOTAL_OUTPUT_mask = 0x1fff << 0, + HS_TOTAL_OUTPUT_shift = 0, + LS_HS_TOTAL_OUTPUT_mask = 0x1fff << 13, + LS_HS_TOTAL_OUTPUT_shift = 13, + VGT_HS_PATCH_CONST = 0x00028b68, + VGT_HS_PATCH_CONST__SIZE_mask = 0x1fff << 0, + VGT_HS_PATCH_CONST__SIZE_shift = 0, + VGT_HS_PATCH_CONST__STRIDE_mask = 0x1fff << 13, + VGT_HS_PATCH_CONST__STRIDE_shift = 13, + DB_ALPHA_TO_MASK = 0x00028b70, + ALPHA_TO_MASK_ENABLE_bit = 1 << 0, + ALPHA_TO_MASK_OFFSET0_mask = 0x03 << 8, + ALPHA_TO_MASK_OFFSET0_shift = 8, + ALPHA_TO_MASK_OFFSET1_mask = 0x03 << 10, + ALPHA_TO_MASK_OFFSET1_shift = 10, + ALPHA_TO_MASK_OFFSET2_mask = 0x03 << 12, + ALPHA_TO_MASK_OFFSET2_shift = 12, + ALPHA_TO_MASK_OFFSET3_mask = 0x03 << 14, + ALPHA_TO_MASK_OFFSET3_shift = 14, + OFFSET_ROUND_bit = 1 << 16, + PA_SU_POLY_OFFSET_DB_FMT_CNTL = 0x00028b78, + POLY_OFFSET_NEG_NUM_DB_BITS_mask = 0xff << 0, + POLY_OFFSET_NEG_NUM_DB_BITS_shift = 0, + POLY_OFFSET_DB_IS_FLOAT_FMT_bit = 1 << 8, + PA_SU_POLY_OFFSET_CLAMP = 0x00028b7c, + PA_SU_POLY_OFFSET_FRONT_SCALE = 0x00028b80, + PA_SU_POLY_OFFSET_FRONT_OFFSET = 0x00028b84, + PA_SU_POLY_OFFSET_BACK_SCALE = 0x00028b88, + PA_SU_POLY_OFFSET_BACK_OFFSET = 0x00028b8c, + VGT_GS_INSTANCE_CNT = 0x00028b90, + VGT_GS_INSTANCE_CNT__ENABLE_bit = 1 << 0, + CNT_mask = 0x7f << 2, + CNT_shift = 2, + VGT_STRMOUT_CONFIG = 0x00028b94, + STREAMOUT_0_EN_bit = 1 << 0, + STREAMOUT_1_EN_bit = 1 << 1, + STREAMOUT_2_EN_bit = 1 << 2, + STREAMOUT_3_EN_bit = 1 << 3, + RAST_STREAM_mask = 0x07 << 4, + RAST_STREAM_shift = 4, + VGT_STRMOUT_BUFFER_CONFIG = 0x00028b98, + STREAM_0_BUFFER_EN_mask = 0x0f << 0, + STREAM_0_BUFFER_EN_shift = 0, + STREAM_1_BUFFER_EN_mask = 0x0f << 4, + STREAM_1_BUFFER_EN_shift = 4, + STREAM_2_BUFFER_EN_mask = 0x0f << 8, + STREAM_2_BUFFER_EN_shift = 8, + STREAM_3_BUFFER_EN_mask = 0x0f << 12, + STREAM_3_BUFFER_EN_shift = 12, + CB_IMMED0_BASE = 0x00028b9c, + CB_IMMED0_BASE_num = 12, + PA_SC_LINE_CNTL = 0x00028c00, + EXPAND_LINE_WIDTH_bit = 1 << 9, + LAST_PIXEL_bit = 1 << 10, + PERPENDICULAR_ENDCAP_ENA_bit = 1 << 11, + DX10_DIAMOND_TEST_ENA_bit = 1 << 12, + PA_SC_AA_CONFIG = 0x00028c04, + MSAA_NUM_SAMPLES_mask = 0x03 << 0, + MSAA_NUM_SAMPLES_shift = 0, + AA_MASK_CENTROID_DTMN_bit = 1 << 4, + MAX_SAMPLE_DIST_mask = 0x0f << 13, + MAX_SAMPLE_DIST_shift = 13, + PA_SU_VTX_CNTL = 0x00028c08, + PIX_CENTER_bit = 1 << 0, + PA_SU_VTX_CNTL__ROUND_MODE_mask = 0x03 << 1, + PA_SU_VTX_CNTL__ROUND_MODE_shift = 1, + X_TRUNCATE = 0x00, + X_ROUND = 0x01, + X_ROUND_TO_EVEN = 0x02, + X_ROUND_TO_ODD = 0x03, + QUANT_MODE_mask = 0x07 << 3, + QUANT_MODE_shift = 3, + X_1_16TH = 0x00, + X_1_8TH = 0x01, + X_1_4TH = 0x02, + X_1_2 = 0x03, + X_1 = 0x04, + X_1_256TH = 0x05, + X_1_1024TH = 0x06, + X_1_4096TH = 0x07, + PA_CL_GB_VERT_CLIP_ADJ = 0x00028c0c, + PA_CL_GB_VERT_DISC_ADJ = 0x00028c10, + PA_CL_GB_HORZ_CLIP_ADJ = 0x00028c14, + PA_CL_GB_HORZ_DISC_ADJ = 0x00028c18, + PA_SC_AA_SAMPLE_LOCS_0 = 0x00028c1c, + S0_X_mask = 0x0f << 0, + S0_X_shift = 0, + S0_Y_mask = 0x0f << 4, + S0_Y_shift = 4, + S1_X_mask = 0x0f << 8, + S1_X_shift = 8, + S1_Y_mask = 0x0f << 12, + S1_Y_shift = 12, + S2_X_mask = 0x0f << 16, + S2_X_shift = 16, + S2_Y_mask = 0x0f << 20, + S2_Y_shift = 20, + S3_X_mask = 0x0f << 24, + S3_X_shift = 24, + S3_Y_mask = 0x0f << 28, + S3_Y_shift = 28, + PA_SC_AA_SAMPLE_LOCS_1 = 0x00028c20, +/* S0_X_mask = 0x0f << 0, */ +/* S0_X_shift = 0, */ +/* S0_Y_mask = 0x0f << 4, */ +/* S0_Y_shift = 4, */ +/* S1_X_mask = 0x0f << 8, */ +/* S1_X_shift = 8, */ +/* S1_Y_mask = 0x0f << 12, */ +/* S1_Y_shift = 12, */ +/* S2_X_mask = 0x0f << 16, */ +/* S2_X_shift = 16, */ +/* S2_Y_mask = 0x0f << 20, */ +/* S2_Y_shift = 20, */ +/* S3_X_mask = 0x0f << 24, */ +/* S3_X_shift = 24, */ +/* S3_Y_mask = 0x0f << 28, */ +/* S3_Y_shift = 28, */ + PA_SC_AA_SAMPLE_LOCS_2 = 0x00028c24, +/* S0_X_mask = 0x0f << 0, */ +/* S0_X_shift = 0, */ +/* S0_Y_mask = 0x0f << 4, */ +/* S0_Y_shift = 4, */ +/* S1_X_mask = 0x0f << 8, */ +/* S1_X_shift = 8, */ +/* S1_Y_mask = 0x0f << 12, */ +/* S1_Y_shift = 12, */ +/* S2_X_mask = 0x0f << 16, */ +/* S2_X_shift = 16, */ +/* S2_Y_mask = 0x0f << 20, */ +/* S2_Y_shift = 20, */ +/* S3_X_mask = 0x0f << 24, */ +/* S3_X_shift = 24, */ +/* S3_Y_mask = 0x0f << 28, */ +/* S3_Y_shift = 28, */ + PA_SC_AA_SAMPLE_LOCS_3 = 0x00028c28, +/* S0_X_mask = 0x0f << 0, */ +/* S0_X_shift = 0, */ +/* S0_Y_mask = 0x0f << 4, */ +/* S0_Y_shift = 4, */ +/* S1_X_mask = 0x0f << 8, */ +/* S1_X_shift = 8, */ +/* S1_Y_mask = 0x0f << 12, */ +/* S1_Y_shift = 12, */ +/* S2_X_mask = 0x0f << 16, */ +/* S2_X_shift = 16, */ +/* S2_Y_mask = 0x0f << 20, */ +/* S2_Y_shift = 20, */ +/* S3_X_mask = 0x0f << 24, */ +/* S3_X_shift = 24, */ +/* S3_Y_mask = 0x0f << 28, */ +/* S3_Y_shift = 28, */ + PA_SC_AA_SAMPLE_LOCS_4 = 0x00028c2c, +/* S0_X_mask = 0x0f << 0, */ +/* S0_X_shift = 0, */ +/* S0_Y_mask = 0x0f << 4, */ +/* S0_Y_shift = 4, */ +/* S1_X_mask = 0x0f << 8, */ +/* S1_X_shift = 8, */ +/* S1_Y_mask = 0x0f << 12, */ +/* S1_Y_shift = 12, */ +/* S2_X_mask = 0x0f << 16, */ +/* S2_X_shift = 16, */ +/* S2_Y_mask = 0x0f << 20, */ +/* S2_Y_shift = 20, */ +/* S3_X_mask = 0x0f << 24, */ +/* S3_X_shift = 24, */ +/* S3_Y_mask = 0x0f << 28, */ +/* S3_Y_shift = 28, */ + PA_SC_AA_SAMPLE_LOCS_5 = 0x00028c30, +/* S0_X_mask = 0x0f << 0, */ +/* S0_X_shift = 0, */ +/* S0_Y_mask = 0x0f << 4, */ +/* S0_Y_shift = 4, */ +/* S1_X_mask = 0x0f << 8, */ +/* S1_X_shift = 8, */ +/* S1_Y_mask = 0x0f << 12, */ +/* S1_Y_shift = 12, */ +/* S2_X_mask = 0x0f << 16, */ +/* S2_X_shift = 16, */ +/* S2_Y_mask = 0x0f << 20, */ +/* S2_Y_shift = 20, */ +/* S3_X_mask = 0x0f << 24, */ +/* S3_X_shift = 24, */ +/* S3_Y_mask = 0x0f << 28, */ +/* S3_Y_shift = 28, */ + PA_SC_AA_SAMPLE_LOCS_6 = 0x00028c34, +/* S0_X_mask = 0x0f << 0, */ +/* S0_X_shift = 0, */ +/* S0_Y_mask = 0x0f << 4, */ +/* S0_Y_shift = 4, */ +/* S1_X_mask = 0x0f << 8, */ +/* S1_X_shift = 8, */ +/* S1_Y_mask = 0x0f << 12, */ +/* S1_Y_shift = 12, */ +/* S2_X_mask = 0x0f << 16, */ +/* S2_X_shift = 16, */ +/* S2_Y_mask = 0x0f << 20, */ +/* S2_Y_shift = 20, */ +/* S3_X_mask = 0x0f << 24, */ +/* S3_X_shift = 24, */ +/* S3_Y_mask = 0x0f << 28, */ +/* S3_Y_shift = 28, */ + PA_SC_AA_SAMPLE_LOCS_7 = 0x00028c38, +/* S0_X_mask = 0x0f << 0, */ +/* S0_X_shift = 0, */ +/* S0_Y_mask = 0x0f << 4, */ +/* S0_Y_shift = 4, */ +/* S1_X_mask = 0x0f << 8, */ +/* S1_X_shift = 8, */ +/* S1_Y_mask = 0x0f << 12, */ +/* S1_Y_shift = 12, */ +/* S2_X_mask = 0x0f << 16, */ +/* S2_X_shift = 16, */ +/* S2_Y_mask = 0x0f << 20, */ +/* S2_Y_shift = 20, */ +/* S3_X_mask = 0x0f << 24, */ +/* S3_X_shift = 24, */ +/* S3_Y_mask = 0x0f << 28, */ +/* S3_Y_shift = 28, */ + PA_SC_AA_MASK = 0x00028c3c, + VGT_VERTEX_REUSE_BLOCK_CNTL = 0x00028c58, + VTX_REUSE_DEPTH_mask = 0xff << 0, + VTX_REUSE_DEPTH_shift = 0, + VGT_OUT_DEALLOC_CNTL = 0x00028c5c, + DEALLOC_DIST_mask = 0x7f << 0, + DEALLOC_DIST_shift = 0, + CB_COLOR0_BASE = 0x00028c60, + CB_COLOR0_BASE_num = 12, + CB_COLOR0_BASE_offset = 51, + CB_COLOR0_PITCH = 0x00028c64, + CB_COLOR0_PITCH_num = 12, + CB_COLOR0_PITCH_offset = 51, + CB_COLOR0_PITCH__TILE_MAX_mask = 0x7ff << 0, + CB_COLOR0_PITCH__TILE_MAX_shift = 0, + CB_COLOR0_SLICE = 0x00028c68, + CB_COLOR0_SLICE_num = 12, + CB_COLOR0_SLICE_offset = 51, + CB_COLOR0_SLICE__TILE_MAX_mask = 0x3fffff << 0, + CB_COLOR0_SLICE__TILE_MAX_shift = 0, + CB_COLOR0_VIEW = 0x00028c6c, + CB_COLOR0_VIEW_num = 12, + CB_COLOR0_VIEW_offset = 51, +/* SLICE_START_mask = 0x7ff << 0, */ +/* SLICE_START_shift = 0, */ +/* SLICE_MAX_mask = 0x7ff << 13, */ +/* SLICE_MAX_shift = 13, */ + CB_COLOR0_INFO = 0x00028c70, + CB_COLOR0_INFO_num = 12, + CB_COLOR0_INFO_offset = 51, + ENDIAN_mask = 0x03 << 0, + ENDIAN_shift = 0, + ENDIAN_NONE = 0x00, + ENDIAN_8IN16 = 0x01, + ENDIAN_8IN32 = 0x02, + ENDIAN_8IN64 = 0x03, + CB_COLOR0_INFO__FORMAT_mask = 0x3f << 2, + CB_COLOR0_INFO__FORMAT_shift = 2, + COLOR_INVALID = 0x00, + COLOR_8 = 0x01, + COLOR_16 = 0x05, + COLOR_16_FLOAT = 0x06, + COLOR_8_8 = 0x07, + COLOR_5_6_5 = 0x08, + COLOR_1_5_5_5 = 0x0a, + COLOR_4_4_4_4 = 0x0b, + COLOR_5_5_5_1 = 0x0c, + COLOR_32 = 0x0d, + COLOR_32_FLOAT = 0x0e, + COLOR_16_16 = 0x0f, + COLOR_16_16_FLOAT = 0x10, + COLOR_8_24 = 0x11, + COLOR_24_8 = 0x13, + COLOR_10_11_11 = 0x15, + COLOR_10_11_11_FLOAT = 0x16, + COLOR_2_10_10_10 = 0x19, + COLOR_8_8_8_8 = 0x1a, + COLOR_10_10_10_2 = 0x1b, + COLOR_X24_8_32_FLOAT = 0x1c, + COLOR_32_32 = 0x1d, + COLOR_32_32_FLOAT = 0x1e, + COLOR_16_16_16_16 = 0x1f, + COLOR_16_16_16_16_FLOAT = 0x20, + COLOR_32_32_32_32 = 0x22, + COLOR_32_32_32_32_FLOAT = 0x23, + CB_COLOR0_INFO__ARRAY_MODE_mask = 0x0f << 8, + CB_COLOR0_INFO__ARRAY_MODE_shift = 8, +/* ARRAY_LINEAR_GENERAL = 0x00, */ +/* ARRAY_LINEAR_ALIGNED = 0x01, */ +/* ARRAY_1D_TILED_THIN1 = 0x02, */ +/* ARRAY_2D_TILED_THIN1 = 0x04, */ + NUMBER_TYPE_mask = 0x07 << 12, + NUMBER_TYPE_shift = 12, + NUMBER_UNORM = 0x00, + NUMBER_SNORM = 0x01, + NUMBER_UINT = 0x04, + NUMBER_SINT = 0x05, + NUMBER_SRGB = 0x06, + NUMBER_FLOAT = 0x07, + COMP_SWAP_mask = 0x03 << 15, + COMP_SWAP_shift = 15, + SWAP_STD = 0x00, + SWAP_ALT = 0x01, + SWAP_STD_REV = 0x02, + SWAP_ALT_REV = 0x03, + FAST_CLEAR_bit = 1 << 17, + COMPRESSION_bit = 1 << 18, + BLEND_CLAMP_bit = 1 << 19, + BLEND_BYPASS_bit = 1 << 20, + SIMPLE_FLOAT_bit = 1 << 21, + CB_COLOR0_INFO__ROUND_MODE_bit = 1 << 22, + CB_COLOR0_INFO__TILE_COMPACT_bit = 1 << 23, + SOURCE_FORMAT_mask = 0x03 << 24, + SOURCE_FORMAT_shift = 24, + EXPORT_4C_32BPC = 0x00, + EXPORT_4C_16BPC = 0x01, + RAT_bit = 1 << 26, + RESOURCE_TYPE_mask = 0x07 << 27, + RESOURCE_TYPE_shift = 27, + BUFFER = 0x00, + TEXTURE1D = 0x01, + TEXTURE1DARRAY = 0x02, + TEXTURE2D = 0x03, + TEXTURE2DARRAY = 0x04, + TEXTURE3D = 0x05, + CB_COLOR0_ATTRIB = 0x00028c74, + CB_COLOR0_ATTRIB_num = 12, + CB_COLOR0_ATTRIB_offset = 51, + IGNORE_SHADER_ENGINE_TILING_bit = 1 << 3, + CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit = 1 << 4, + CB_COLOR0_ATTRIB__TILE_SPLIT_mask = 0x0f << 5, + CB_COLOR0_ATTRIB__TILE_SPLIT_shift = 5, +/* ADDR_SURF_TILE_SPLIT_64B = 0x00, */ +/* ADDR_SURF_TILE_SPLIT_128B = 0x01, */ +/* ADDR_SURF_TILE_SPLIT_256B = 0x02, */ +/* ADDR_SURF_TILE_SPLIT_512B = 0x03, */ +/* ADDR_SURF_TILE_SPLIT_1KB = 0x04, */ +/* ADDR_SURF_TILE_SPLIT_2KB = 0x05, */ +/* ADDR_SURF_TILE_SPLIT_4KB = 0x06, */ + CB_COLOR0_ATTRIB__NUM_BANKS_mask = 0x03 << 10, + CB_COLOR0_ATTRIB__NUM_BANKS_shift = 10, +/* ADDR_SURF_2_BANK = 0x00, */ +/* ADDR_SURF_4_BANK = 0x01, */ +/* ADDR_SURF_8_BANK = 0x02, */ +/* ADDR_SURF_16_BANK = 0x03, */ + CB_COLOR0_ATTRIB__BANK_WIDTH_mask = 0x03 << 13, + CB_COLOR0_ATTRIB__BANK_WIDTH_shift = 13, +/* ADDR_SURF_BANK_WIDTH_1 = 0x00, */ +/* ADDR_SURF_BANK_WIDTH_2 = 0x01, */ +/* ADDR_SURF_BANK_WIDTH_4 = 0x02, */ +/* ADDR_SURF_BANK_WIDTH_8 = 0x03, */ + CB_COLOR0_ATTRIB__BANK_HEIGHT_mask = 0x03 << 16, + CB_COLOR0_ATTRIB__BANK_HEIGHT_shift = 16, +/* ADDR_SURF_BANK_HEIGHT_1 = 0x00, */ +/* ADDR_SURF_BANK_HEIGHT_2 = 0x01, */ +/* ADDR_SURF_BANK_HEIGHT_4 = 0x02, */ +/* ADDR_SURF_BANK_HEIGHT_8 = 0x03, */ + CB_COLOR0_ATTRIB__MACRO_TILE_ASPECT_mask = 0x03 << 19, + CB_COLOR0_ATTRIB__MACRO_TILE_ASPECT_shift = 19, +/* ADDR_SURF_MACRO_ASPECT_1 = 0x00, */ +/* ADDR_SURF_MACRO_ASPECT_2 = 0x01, */ +/* ADDR_SURF_MACRO_ASPECT_4 = 0x02, */ +/* ADDR_SURF_MACRO_ASPECT_8 = 0x03, */ + FMASK_BANK_HEIGHT_mask = 0x03 << 22, + FMASK_BANK_HEIGHT_shift = 22, +/* ADDR_SURF_BANK_HEIGHT_1 = 0x00, */ +/* ADDR_SURF_BANK_HEIGHT_2 = 0x01, */ +/* ADDR_SURF_BANK_HEIGHT_4 = 0x02, */ +/* ADDR_SURF_BANK_HEIGHT_8 = 0x03, */ + CB_COLOR0_DIM = 0x00028c78, + CB_COLOR0_DIM_num = 12, + CB_COLOR0_DIM_offset = 51, + WIDTH_MAX_mask = 0xffff << 0, + WIDTH_MAX_shift = 0, + HEIGHT_MAX_mask = 0xffff << 16, + HEIGHT_MAX_shift = 16, + CB_COLOR0_CMASK = 0x00028c7c, + CB_COLOR0_CMASK_num = 8, + CB_COLOR0_CMASK_offset = 60, + CB_COLOR0_CMASK_SLICE = 0x00028c80, + CB_COLOR0_CMASK_SLICE_num = 8, + CB_COLOR0_CMASK_SLICE_offset = 60, + CB_COLOR0_CMASK_SLICE__TILE_MAX_mask = 0x3fff << 0, + CB_COLOR0_CMASK_SLICE__TILE_MAX_shift = 0, + CB_COLOR0_FMASK = 0x00028c84, + CB_COLOR0_FMASK_num = 8, + CB_COLOR0_FMASK_offset = 60, + CB_COLOR0_FMASK_SLICE = 0x00028c88, + CB_COLOR0_FMASK_SLICE_num = 8, + CB_COLOR0_FMASK_SLICE_offset = 60, + CB_COLOR0_FMASK_SLICE__TILE_MAX_mask = 0x3fffff << 0, + CB_COLOR0_FMASK_SLICE__TILE_MAX_shift = 0, + CB_COLOR0_CLEAR_WORD0 = 0x00028c8c, + CB_COLOR0_CLEAR_WORD0_num = 8, + CB_COLOR0_CLEAR_WORD0_offset = 60, + CB_COLOR0_CLEAR_WORD1 = 0x00028c90, + CB_COLOR0_CLEAR_WORD1_num = 8, + CB_COLOR0_CLEAR_WORD1_offset = 60, + CB_COLOR0_CLEAR_WORD2 = 0x00028c94, + CB_COLOR0_CLEAR_WORD2_num = 8, + CB_COLOR0_CLEAR_WORD2_offset = 60, + CB_COLOR0_CLEAR_WORD3 = 0x00028c98, + CB_COLOR0_CLEAR_WORD3_num = 8, + CB_COLOR0_CLEAR_WORD3_offset = 60, + SQ_ALU_CONST_CACHE_HS_0 = 0x00028f00, + SQ_ALU_CONST_CACHE_HS_0_num = 16, + SQ_ALU_CONST_CACHE_LS_0 = 0x00028f40, + SQ_ALU_CONST_CACHE_LS_0_num = 16, + SQ_ALU_CONST_BUFFER_SIZE_HS_0 = 0x00028f80, + SQ_ALU_CONST_BUFFER_SIZE_HS_0_num = 16, + SQ_ALU_CONST_BUFFER_SIZE_HS_0__DATA_mask = 0x1ff << 0, + SQ_ALU_CONST_BUFFER_SIZE_HS_0__DATA_shift = 0, + SQ_ALU_CONST_BUFFER_SIZE_LS_0 = 0x00028fc0, + SQ_ALU_CONST_BUFFER_SIZE_LS_0_num = 16, + SQ_ALU_CONST_BUFFER_SIZE_LS_0__DATA_mask = 0x1ff << 0, + SQ_ALU_CONST_BUFFER_SIZE_LS_0__DATA_shift = 0, + SQ_VTX_CONSTANT_WORD0_0 = 0x00030000, + SQ_TEX_RESOURCE_WORD0_0 = 0x00030000, + DIM_mask = 0x07 << 0, + DIM_shift = 0, + SQ_TEX_DIM_1D = 0x00, + SQ_TEX_DIM_2D = 0x01, + SQ_TEX_DIM_3D = 0x02, + SQ_TEX_DIM_CUBEMAP = 0x03, + SQ_TEX_DIM_1D_ARRAY = 0x04, + SQ_TEX_DIM_2D_ARRAY = 0x05, + SQ_TEX_DIM_2D_MSAA = 0x06, + SQ_TEX_DIM_2D_ARRAY_MSAA = 0x07, +/* IGNORE_SHADER_ENGINE_TILING_bit = 1 << 3, */ + SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit= 1 << 5, + PITCH_mask = 0xfff << 6, + PITCH_shift = 6, + TEX_WIDTH_mask = 0x3fff << 18, + TEX_WIDTH_shift = 18, + SQ_VTX_CONSTANT_WORD1_0 = 0x00030004, + SQ_TEX_RESOURCE_WORD1_0 = 0x00030004, + TEX_HEIGHT_mask = 0x3fff << 0, + TEX_HEIGHT_shift = 0, + TEX_DEPTH_mask = 0x1fff << 14, + TEX_DEPTH_shift = 14, + SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_mask = 0x0f << 28, + SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift = 28, + SQ_VTX_CONSTANT_WORD2_0 = 0x00030008, + BASE_ADDRESS_HI_mask = 0xff << 0, + BASE_ADDRESS_HI_shift = 0, + SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask = 0x7ff << 8, + SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift = 8, + SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit = 1 << 19, + SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20, + SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift = 20, + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask = 0x03 << 26, + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift = 26, +/* SQ_NUM_FORMAT_NORM = 0x00, */ +/* SQ_NUM_FORMAT_INT = 0x01, */ +/* SQ_NUM_FORMAT_SCALED = 0x02, */ + SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit = 1 << 28, + SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit = 1 << 29, + SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_mask = 0x03 << 30, + SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift = 30, +/* SQ_ENDIAN_NONE = 0x00, */ +/* SQ_ENDIAN_8IN16 = 0x01, */ +/* SQ_ENDIAN_8IN32 = 0x02, */ + SQ_TEX_RESOURCE_WORD2_0 = 0x00030008, + SQ_VTX_CONSTANT_WORD3_0 = 0x0003000c, + SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit = 1 << 2, + SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_mask = 0x07 << 3, + SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift = 3, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_mask = 0x07 << 6, + SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift = 6, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_mask = 0x07 << 9, + SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift = 9, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_mask = 0x07 << 12, + SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift = 12, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_TEX_RESOURCE_WORD3_0 = 0x0003000c, + SQ_TEX_RESOURCE_WORD4_0 = 0x00030010, + FORMAT_COMP_X_mask = 0x03 << 0, + FORMAT_COMP_X_shift = 0, + SQ_FORMAT_COMP_UNSIGNED = 0x00, + SQ_FORMAT_COMP_SIGNED = 0x01, + SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, + FORMAT_COMP_Y_mask = 0x03 << 2, + FORMAT_COMP_Y_shift = 2, +/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */ +/* SQ_FORMAT_COMP_SIGNED = 0x01, */ +/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */ + FORMAT_COMP_Z_mask = 0x03 << 4, + FORMAT_COMP_Z_shift = 4, +/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */ +/* SQ_FORMAT_COMP_SIGNED = 0x01, */ +/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */ + FORMAT_COMP_W_mask = 0x03 << 6, + FORMAT_COMP_W_shift = 6, +/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */ +/* SQ_FORMAT_COMP_SIGNED = 0x01, */ +/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */ + SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask = 0x03 << 8, + SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift = 8, +/* SQ_NUM_FORMAT_NORM = 0x00, */ +/* SQ_NUM_FORMAT_INT = 0x01, */ +/* SQ_NUM_FORMAT_SCALED = 0x02, */ + SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit = 1 << 10, + SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit = 1 << 11, + SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask = 0x03 << 12, + SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift = 12, +/* SQ_ENDIAN_NONE = 0x00, */ +/* SQ_ENDIAN_8IN16 = 0x01, */ +/* SQ_ENDIAN_8IN32 = 0x02, */ + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask = 0x07 << 16, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift = 16, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask = 0x07 << 19, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift = 19, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask = 0x07 << 22, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift = 22, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask = 0x07 << 25, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift = 25, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + BASE_LEVEL_mask = 0x0f << 28, + BASE_LEVEL_shift = 28, + SQ_VTX_CONSTANT_WORD4_0 = 0x00030010, + SQ_TEX_RESOURCE_WORD5_0 = 0x00030014, + LAST_LEVEL_mask = 0x0f << 0, + LAST_LEVEL_shift = 0, + BASE_ARRAY_mask = 0x1fff << 4, + BASE_ARRAY_shift = 4, + LAST_ARRAY_mask = 0x1fff << 17, + LAST_ARRAY_shift = 17, + SQ_TEX_RESOURCE_WORD6_0 = 0x00030018, + PERF_MODULATION_mask = 0x07 << 3, + PERF_MODULATION_shift = 3, + INTERLACED_bit = 1 << 6, + SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_mask = 0xfff << 8, + SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift = 8, + SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_mask = 0x07 << 29, + SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_shift = 29, + SQ_ADDR_SURF_TILE_SPLIT_64B = 0x00, + SQ_ADDR_SURF_TILE_SPLIT_128B = 0x01, + SQ_ADDR_SURF_TILE_SPLIT_256B = 0x02, + SQ_ADDR_SURF_TILE_SPLIT_512B = 0x03, + SQ_ADDR_SURF_TILE_SPLIT_1KB = 0x04, + SQ_ADDR_SURF_TILE_SPLIT_2KB = 0x05, + SQ_ADDR_SURF_TILE_SPLIT_4KB = 0x06, + SQ_VTX_CONSTANT_WORD7_0 = 0x0003001c, + SQ_VTX_CONSTANT_WORD7_0__TYPE_mask = 0x03 << 30, + SQ_VTX_CONSTANT_WORD7_0__TYPE_shift = 30, + SQ_TEX_VTX_INVALID_TEXTURE = 0x00, + SQ_TEX_VTX_INVALID_BUFFER = 0x01, + SQ_TEX_VTX_VALID_TEXTURE = 0x02, + SQ_TEX_VTX_VALID_BUFFER = 0x03, + SQ_TEX_RESOURCE_WORD7_0 = 0x0003001c, + SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask = 0x3f << 0, + SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift = 0, + SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_mask = 0x03 << 6, + SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_shift = 6, + SQ_ADDR_SURF_MACRO_ASPECT_1 = 0x00, + SQ_ADDR_SURF_MACRO_ASPECT_2 = 0x01, + SQ_ADDR_SURF_MACRO_ASPECT_4 = 0x02, + SQ_ADDR_SURF_MACRO_ASPECT_8 = 0x03, + SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_mask = 0x03 << 8, + SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_shift = 8, + SQ_ADDR_SURF_BANK_WH_1 = 0x00, + SQ_ADDR_SURF_BANK_WH_2 = 0x01, + SQ_ADDR_SURF_BANK_WH_4 = 0x02, + SQ_ADDR_SURF_BANK_WH_8 = 0x03, + SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_mask = 0x03 << 10, + SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_shift = 10, +/* SQ_ADDR_SURF_BANK_WH_1 = 0x00, */ +/* SQ_ADDR_SURF_BANK_WH_2 = 0x01, */ +/* SQ_ADDR_SURF_BANK_WH_4 = 0x02, */ +/* SQ_ADDR_SURF_BANK_WH_8 = 0x03, */ + DEPTH_SAMPLE_ORDER_bit = 1 << 15, + SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_mask = 0x03 << 16, + SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_shift = 16, + SQ_ADDR_SURF_2_BANK = 0x00, + SQ_ADDR_SURF_4_BANK = 0x01, + SQ_ADDR_SURF_8_BANK = 0x02, + SQ_ADDR_SURF_16_BANK = 0x03, + SQ_TEX_RESOURCE_WORD7_0__TYPE_mask = 0x03 << 30, + SQ_TEX_RESOURCE_WORD7_0__TYPE_shift = 30, +/* SQ_TEX_VTX_INVALID_TEXTURE = 0x00, */ +/* SQ_TEX_VTX_INVALID_BUFFER = 0x01, */ +/* SQ_TEX_VTX_VALID_TEXTURE = 0x02, */ +/* SQ_TEX_VTX_VALID_BUFFER = 0x03, */ + SQ_LOOP_CONST_DX10_0 = 0x0003a200, + SQ_LOOP_CONST_0 = 0x0003a200, + SQ_LOOP_CONST_0__COUNT_mask = 0xfff << 0, + SQ_LOOP_CONST_0__COUNT_shift = 0, + INIT_mask = 0xfff << 12, + INIT_shift = 12, + INC_mask = 0xff << 24, + INC_shift = 24, + SQ_JUMPTABLE_CONST_0 = 0x0003a200, + CONST_A_mask = 0xff << 0, + CONST_A_shift = 0, + CONST_B_mask = 0xff << 8, + CONST_B_shift = 8, + CONST_C_mask = 0xff << 16, + CONST_C_shift = 16, + CONST_D_mask = 0xff << 24, + CONST_D_shift = 24, + SQ_BOOL_CONST_0 = 0x0003a500, + SQ_BOOL_CONST_0_num = 6, + SQ_TEX_SAMPLER_WORD0_0 = 0x0003c000, + SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask = 0x07 << 0, + SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift = 0, + SQ_TEX_WRAP = 0x00, + SQ_TEX_MIRROR = 0x01, + SQ_TEX_CLAMP_LAST_TEXEL = 0x02, + SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, + SQ_TEX_CLAMP_HALF_BORDER = 0x04, + SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, + SQ_TEX_CLAMP_BORDER = 0x06, + SQ_TEX_MIRROR_ONCE_BORDER = 0x07, + CLAMP_Y_mask = 0x07 << 3, + CLAMP_Y_shift = 3, +/* SQ_TEX_WRAP = 0x00, */ +/* SQ_TEX_MIRROR = 0x01, */ +/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */ +/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */ +/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */ +/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */ +/* SQ_TEX_CLAMP_BORDER = 0x06, */ +/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */ + CLAMP_Z_mask = 0x07 << 6, + CLAMP_Z_shift = 6, +/* SQ_TEX_WRAP = 0x00, */ +/* SQ_TEX_MIRROR = 0x01, */ +/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */ +/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */ +/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */ +/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */ +/* SQ_TEX_CLAMP_BORDER = 0x06, */ +/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */ + XY_MAG_FILTER_mask = 0x03 << 9, + XY_MAG_FILTER_shift = 9, + SQ_TEX_XY_FILTER_POINT = 0x00, + SQ_TEX_XY_FILTER_BILINEAR = 0x01, + XY_MIN_FILTER_mask = 0x03 << 11, + XY_MIN_FILTER_shift = 11, +/* SQ_TEX_XY_FILTER_POINT = 0x00, */ +/* SQ_TEX_XY_FILTER_BILINEAR = 0x01, */ + Z_FILTER_mask = 0x03 << 13, + Z_FILTER_shift = 13, + SQ_TEX_Z_FILTER_NONE = 0x00, + SQ_TEX_Z_FILTER_POINT = 0x01, + SQ_TEX_Z_FILTER_LINEAR = 0x02, + MIP_FILTER_mask = 0x03 << 15, + MIP_FILTER_shift = 15, +/* SQ_TEX_Z_FILTER_NONE = 0x00, */ +/* SQ_TEX_Z_FILTER_POINT = 0x01, */ +/* SQ_TEX_Z_FILTER_LINEAR = 0x02, */ + BORDER_COLOR_TYPE_mask = 0x03 << 20, + BORDER_COLOR_TYPE_shift = 20, + SQ_TEX_BORDER_COLOR_TRANS_BLACK = 0x00, + SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x01, + SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x02, + SQ_TEX_BORDER_COLOR_REGISTER = 0x03, + DEPTH_COMPARE_FUNCTION_mask = 0x07 << 22, + DEPTH_COMPARE_FUNCTION_shift = 22, + SQ_TEX_DEPTH_COMPARE_NEVER = 0x00, + SQ_TEX_DEPTH_COMPARE_LESS = 0x01, + SQ_TEX_DEPTH_COMPARE_EQUAL = 0x02, + SQ_TEX_DEPTH_COMPARE_LESSEQUAL = 0x03, + SQ_TEX_DEPTH_COMPARE_GREATER = 0x04, + SQ_TEX_DEPTH_COMPARE_NOTEQUAL = 0x05, + SQ_TEX_DEPTH_COMPARE_GREATEREQUAL = 0x06, + SQ_TEX_DEPTH_COMPARE_ALWAYS = 0x07, + CHROMA_KEY_mask = 0x03 << 25, + CHROMA_KEY_shift = 25, + SQ_TEX_CHROMA_KEY_DISABLED = 0x00, + SQ_TEX_CHROMA_KEY_KILL = 0x01, + SQ_TEX_CHROMA_KEY_BLEND = 0x02, + SQ_TEX_SAMPLER_WORD1_0 = 0x0003c004, + SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_mask = 0xfff << 0, + SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift = 0, + MAX_LOD_mask = 0xfff << 12, + MAX_LOD_shift = 12, + PERF_MIP_mask = 0x0f << 24, + PERF_MIP_shift = 24, + PERF_Z_mask = 0x0f << 28, + PERF_Z_shift = 28, + SQ_TEX_SAMPLER_WORD2_0 = 0x0003c008, + SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_mask = 0x3fff << 0, + SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift = 0, + LOD_BIAS_SEC_mask = 0x3f << 14, + LOD_BIAS_SEC_shift = 14, + MC_COORD_TRUNCATE_bit = 1 << 20, + SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit = 1 << 21, + TRUNCATE_COORD_bit = 1 << 28, + SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit = 1 << 29, + SQ_TEX_SAMPLER_WORD2_0__TYPE_bit = 1 << 31, + SQ_VTX_BASE_VTX_LOC = 0x0003cff0, + SQ_VTX_START_INST_LOC = 0x0003cff4, + SQ_TEX_SAMPLER_CLEAR = 0x0003ff00, + SQ_TEX_RESOURCE_CLEAR = 0x0003ff04, + SQ_LOOP_BOOL_CLEAR = 0x0003ff08, + +} ; + +#endif /* _EVERGREEN_REG_AUTO_H */ + diff --git a/evergreen_shader.c b/evergreen_shader.c new file mode 100644 index 0000000..8703424 --- /dev/null +++ b/evergreen_shader.c @@ -0,0 +1,3155 @@ +/* + * Copyright 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Author: Alex Deucher <alexander.deucher@amd.com> + * + */ + +#include "evergreen_shader.h" +#include "evergreen_reg.h" + +/* solid vs --------------------------------------- */ +int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader) +{ + int i = 0; + + /* 0 */ + shader[i++] = CF_DWORD0(ADDR(4), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_VC), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 1 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), + TYPE(SQ_EXPORT_POS), + RW_GPR(1), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + /* 2 - always export a param whether it's used or not */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(1), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(0)); + /* 3 - padding */ + shader[i++] = 0x00000000; + shader[i++] = 0x00000000; + /* 4/5 */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(8)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(0), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(1), + ALT_CONST(0), + BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = VTX_DWORD_PAD; + + return i; +} + +/* solid ps --------------------------------------- */ +int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader) +{ + int i = 0; + + /* 0 */ + shader[i++] = CF_ALU_DWORD0(ADDR(2), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 1 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(1)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(1), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + + /* 2 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + /* 3 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + /* 4 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + /* 5 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(1)); + + return i; +} + +/* copy vs --------------------------------------- */ +int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader) +{ + int i = 0; + + /* 0 */ + shader[i++] = CF_DWORD0(ADDR(4), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(2), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_VC), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 1 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), + TYPE(SQ_EXPORT_POS), + RW_GPR(1), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + /* 2 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(1), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(0)); + /* 3 */ + shader[i++] = 0x00000000; + shader[i++] = 0x00000000; + /* 4/5 */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(16)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(0), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(1), + ALT_CONST(0), + BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = VTX_DWORD_PAD; + /* 6/7 */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(8)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(8), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(0), + ALT_CONST(0), + BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = VTX_DWORD_PAD; + + return i; +} + +/* copy ps --------------------------------------- */ +int evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader) +{ + int i = 0; + + /* CF INST 0 */ + shader[i++] = CF_ALU_DWORD0(ADDR(3), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* CF INST 1 */ + shader[i++] = CF_DWORD0(ADDR(8), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_TC), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* CF INST 2 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(1)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(1), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + + /* 3 interpolate tex coords */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + /* 4 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + /* 5 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + /* 6 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 7 */ + shader[i++] = 0x00000000; + shader[i++] = 0x00000000; + + /* 8/9 TEX INST 0 */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(0), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), /* R */ + DST_SEL_Y(SQ_SEL_Y), /* G */ + DST_SEL_Z(SQ_SEL_Z), /* B */ + DST_SEL_W(SQ_SEL_W), /* A */ + LOD_BIAS(0), + COORD_TYPE_X(TEX_UNNORMALIZED), + COORD_TYPE_Y(TEX_UNNORMALIZED), + COORD_TYPE_Z(TEX_UNNORMALIZED), + COORD_TYPE_W(TEX_UNNORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + + return i; +} + +int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) +{ + int i = 0; + + /* 0 */ + shader[i++] = CF_DWORD0(ADDR(6), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(2), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_VC), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 1 - ALU */ + shader[i++] = CF_ALU_DWORD0(ADDR(4), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(2), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 2 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), + TYPE(SQ_EXPORT_POS), + RW_GPR(1), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(3)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + /* 3 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(3)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(1), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(0)); + + + /* 4 texX / w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + + /* 5 texY / h */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* 6/7 */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(16)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(0), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(1), + ALT_CONST(0), + BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = VTX_DWORD_PAD; + /* 8/9 */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(8)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(8), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(0), + ALT_CONST(0), + BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = VTX_DWORD_PAD; + + return i; +} + +int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) +{ + int i = 0; + + /* 0 */ + shader[i++] = CF_ALU_DWORD0(ADDR(5), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 1 */ + shader[i++] = CF_DWORD0(ADDR(21), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_BOOL), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_CALL), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 2 */ + shader[i++] = CF_DWORD0(ADDR(30), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_NOT_BOOL), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_CALL), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 3 */ + shader[i++] = CF_ALU_DWORD0(ADDR(9), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(12), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 4 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(2), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(3)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(1), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + /* 5 interpolate tex coords */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + /* 6 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + /* 7 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + /* 8 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 9,10,11,12 */ + /* r2.x = MAD(c0.w, r1.x, c0.x) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + /* r2.y = MAD(c0.w, r1.x, c0.y) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Y), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + /* r2.z = MAD(c0.w, r1.x, c0.z) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + /* r2.w = MAD(0, 0, 1) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(SQ_ALU_SRC_0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 13,14,15,16 */ + /* r2.x = MAD(c1.x, r1.y, pv.x) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + /* r2.y = MAD(c1.y, r1.y, pv.y) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Y), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + /* r2.z = MAD(c1.z, r1.y, pv.z) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + /* r2.w = MAD(0, 0, 1) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(SQ_ALU_SRC_0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_W), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + /* 17,18,19,20 */ + /* r2.x = MAD(c2.x, r1.z, pv.x) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + /* r2.y = MAD(c2.y, r1.z, pv.y) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Y), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + /* r2.z = MAD(c2.z, r1.z, pv.z) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + /* r2.w = MAD(0, 0, 1) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(SQ_ALU_SRC_0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(1)); + + /* 21 */ + shader[i++] = CF_DWORD0(ADDR(24), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(3), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_TC), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 22 */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_RETURN), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 23 */ + shader[i++] = 0x00000000; + shader[i++] = 0x00000000; + /* 24/25 */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(1), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_MASK), + DST_SEL_Z(SQ_SEL_MASK), + DST_SEL_W(SQ_SEL_1), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + /* 26/27 */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(1), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(1), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_MASK), + DST_SEL_Y(SQ_SEL_MASK), + DST_SEL_Z(SQ_SEL_X), + DST_SEL_W(SQ_SEL_MASK), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(1), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + /* 28/29 */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(2), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(1), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_MASK), + DST_SEL_Y(SQ_SEL_X), + DST_SEL_Z(SQ_SEL_MASK), + DST_SEL_W(SQ_SEL_MASK), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(2), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + /* 30 */ + shader[i++] = CF_DWORD0(ADDR(32), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(2), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_TC), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 31 */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_RETURN), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 32/33 */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(1), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_MASK), + DST_SEL_Z(SQ_SEL_MASK), + DST_SEL_W(SQ_SEL_1), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + /* 34/35 */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(1), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(1), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_MASK), + DST_SEL_Y(SQ_SEL_X), + DST_SEL_Z(SQ_SEL_Y), + DST_SEL_W(SQ_SEL_MASK), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(1), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + + return i; +} + +/* comp vs --------------------------------------- */ +int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) +{ + int i = 0; + + /* 0 */ + shader[i++] = CF_DWORD0(ADDR(3), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_BOOL), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_CALL), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 1 */ + shader[i++] = CF_DWORD0(ADDR(9), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_NOT_BOOL), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_CALL), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 2 */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(1), + CF_INST(SQ_CF_INST_NOP), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 3 - mask sub */ + shader[i++] = CF_DWORD0(ADDR(44), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(3), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_VC), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 4 - ALU */ + shader[i++] = CF_ALU_DWORD0(ADDR(14), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(20), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 5 - dst */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), + TYPE(SQ_EXPORT_POS), + RW_GPR(2), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + /* 6 - src */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(1), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_EXPORT), + MARK(0), + BARRIER(0)); + /* 7 - mask */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 8 */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_RETURN), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 9 - non-mask sub */ + shader[i++] = CF_DWORD0(ADDR(50), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(2), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_VC), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 10 - ALU */ + shader[i++] = CF_ALU_DWORD0(ADDR(34), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(10), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 11 - dst */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), + TYPE(SQ_EXPORT_POS), + RW_GPR(1), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1), + BURST_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + /* 12 - src */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1), + BURST_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(0)); + /* 13 */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_RETURN), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 14 srcX.x DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(3), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + + /* 15 srcX.y DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(3), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* 16 srcX.z DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(3), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + + /* 17 srcX.w DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(3), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 18 srcY.x DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(3), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + + /* 19 srcY.y DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(3), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* 20 srcY.z DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(3), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + + /* 21 srcY.w DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(3), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 22 maskX.x DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(4), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + + /* 23 maskX.y DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(4), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* 24 maskX.z DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(4), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + + /* 25 maskX.w DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(4), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 26 maskY.x DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(4), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + + /* 27 maskY.y DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(4), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* 28 maskY.z DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(4), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + + /* 29 maskY.w DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(4), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 30 srcX / w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + + /* 31 srcY / h */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* 32 maskX / w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + + /* 33 maskY / h */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* 34 srcX.x DOT4 - non-mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + + /* 35 srcX.y DOT4 - non-mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* 36 srcX.z DOT4 - non-mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + + /* 37 srcX.w DOT4 - non-mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 38 srcY.x DOT4 - non-mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + + /* 39 srcY.y DOT4 - non-mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* 40 srcY.z DOT4 - non-mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + + /* 41 srcY.w DOT4 - non-mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 42 srcX / w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + + /* 43 srcY / h */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* mask vfetch - 44/45 - dst */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(24)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(2), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(0), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(1), + ALT_CONST(0), + BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = VTX_DWORD_PAD; + /* 46/47 - src */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(8)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_1), + DST_SEL_W(SQ_SEL_0), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(8), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(0), + ALT_CONST(0), + BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = VTX_DWORD_PAD; + /* 48/49 - mask */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(8)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_1), + DST_SEL_W(SQ_SEL_0), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(16), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(0), + ALT_CONST(0), + BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = VTX_DWORD_PAD; + + /* no mask vfetch - 50/51 - dst */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(16)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(0), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(1), + ALT_CONST(0), + BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = VTX_DWORD_PAD; + /* 52/53 - src */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(8)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_1), + DST_SEL_W(SQ_SEL_0), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(8), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(0), + ALT_CONST(0), + BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = VTX_DWORD_PAD; + + return i; +} + +/* comp ps --------------------------------------- */ +int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) +{ + int i = 0; + + /* 0 */ + shader[i++] = CF_DWORD0(ADDR(3), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_BOOL), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_CALL), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 1 */ + shader[i++] = CF_DWORD0(ADDR(8), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_NOT_BOOL), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_CALL), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 2 */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(1), + CF_INST(SQ_CF_INST_NOP), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 3 - mask sub */ + shader[i++] = CF_ALU_DWORD0(ADDR(12), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(8), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 4 */ + shader[i++] = CF_DWORD0(ADDR(28), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(2), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_TC), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 5 */ + shader[i++] = CF_ALU_DWORD0(ADDR(20), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 6 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(2), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(1)); + + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + /* 7 */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_RETURN), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 8 - non-mask sub */ + shader[i++] = CF_ALU_DWORD0(ADDR(24), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 9 */ + shader[i++] = CF_DWORD0(ADDR(32), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_TC), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 10 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(1)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + + /* 11 */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_RETURN), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 12 interpolate src tex coords - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + /* 13 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + /* 14 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + /* 15 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 16 interpolate mask tex coords */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + /* 17 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + /* 18 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + /* 19 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 20 - alu 0 */ + /* MUL gpr[2].x gpr[0].x gpr[1].x */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + /* 21 - alu 1 */ + /* MUL gpr[2].y gpr[0].y gpr[1].y */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + /* 22 - alu 2 */ + /* MUL gpr[2].z gpr[0].z gpr[1].z */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + /* 23 - alu 3 */ + /* MUL gpr[2].w gpr[0].w gpr[1].w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(1)); + + /* 24 - interpolate tex coords - non-mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + /* 25 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + /* 26 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + /* 27 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 28/29 - src - mask */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(1), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(1), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_Z), + DST_SEL_W(SQ_SEL_W), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + /* 30/31 - mask */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(1), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(0), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_Z), + DST_SEL_W(SQ_SEL_W), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(1), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + + /* 32/33 - src - non-mask */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(0), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_Z), + DST_SEL_W(SQ_SEL_W), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + + return i; +} + diff --git a/evergreen_shader.h b/evergreen_shader.h new file mode 100644 index 0000000..c0a0601 --- /dev/null +++ b/evergreen_shader.h @@ -0,0 +1,292 @@ +/* + * Evergreen shaders + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Shader macros + */ + +#ifndef __SHADER_H__ +#define __SHADER_H__ + +#include "radeondemo.h" + +/* Oder of instructions: All CF, All ALU, All Tex/Vtx fetches */ + + +// CF insts +// addr +#define ADDR(x) (x) +// jumptable +#define JUMPTABLE_SEL(x) (x) +// pc +#define POP_COUNT(x) (x) +// const +#define CF_CONST(x) (x) +// cond +#define COND(x) (x) // SQ_COND_* +// count +#define I_COUNT(x) ((x) ? ((x) - 1) : 0) +// vpm +#define VALID_PIXEL_MODE(x) (x) +// eop +#define END_OF_PROGRAM(x) (x) +// cf inst +#define CF_INST(x) (x) // SQ_CF_INST_* +// wqm +#define WHOLE_QUAD_MODE(x) (x) +// barrier +#define BARRIER(x) (x) +//kb0 +#define KCACHE_BANK0(x) (x) +//kb1 +#define KCACHE_BANK1(x) (x) +// km0/1 +#define KCACHE_MODE0(x) (x) +#define KCACHE_MODE1(x) (x) // SQ_CF_KCACHE_* +// +#define KCACHE_ADDR0(x) (x) +#define KCACHE_ADDR1(x) (x) + +#define ALT_CONST(x) (x) + +#define ARRAY_BASE(x) (x) +// export pixel +#define CF_PIXEL_MRT0 0 +#define CF_PIXEL_MRT1 1 +#define CF_PIXEL_MRT2 2 +#define CF_PIXEL_MRT3 3 +#define CF_PIXEL_MRT4 4 +#define CF_PIXEL_MRT5 5 +#define CF_PIXEL_MRT6 6 +#define CF_PIXEL_MRT7 7 +// computed Z +#define CF_COMPUTED_Z 61 +// export pos +#define CF_POS0 60 +#define CF_POS1 61 +#define CF_POS2 62 +#define CF_POS3 63 +// export param +// 0...31 +#define TYPE(x) (x) // SQ_EXPORT_* +#define RW_GPR(x) (x) +#define RW_REL(x) (x) +#define ABSOLUTE 0 +#define RELATIVE 1 +#define INDEX_GPR(x) (x) +#define ELEM_SIZE(x) (x ? (x - 1) : 0) +#define BURST_COUNT(x) (x ? (x - 1) : 0) +#define MARK(x) (x) + +// swiz +#define SRC_SEL_X(x) (x) // SQ_SEL_* each +#define SRC_SEL_Y(x) (x) +#define SRC_SEL_Z(x) (x) +#define SRC_SEL_W(x) (x) + +#define CF_DWORD0(addr, jmptbl) cpu_to_le32(((addr) | ((jmptbl) << 24))) +#define CF_DWORD1(pc, cf_const, cond, count, vpm, eop, cf_inst, wqm, b) \ + cpu_to_le32((((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | ((count) << 10) | \ + ((vpm) << 20) | ((eop) << 21) | ((cf_inst) << 22) | ((wqm) << 30) | ((b) << 31))) + +#define CF_ALU_DWORD0(addr, kb0, kb1, km0) cpu_to_le32((((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30))) +#define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, alt_const, cf_inst, wqm, b) \ + cpu_to_le32((((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \ + ((count) << 18) | ((alt_const) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31))) + +#define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \ + cpu_to_le32((((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | \ + ((index_gpr) << 23) | ((es) << 30))) +#define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, bc, vpm, eop, cf_inst, m, b) \ + cpu_to_le32((((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | \ + ((bc) << 16) | ((vpm) << 20) | ((eop) << 21) | ((cf_inst) << 22) | \ + ((m) << 30) | ((b) << 31))) + +// ALU clause insts +#define SRC0_SEL(x) (x) +#define SRC1_SEL(x) (x) +#define SRC2_SEL(x) (x) +// src[0-2]_sel +// 0-127 GPR +// 128-159 kcache constants bank 0 +// 160-191 kcache constants bank 1 +// 192-255 inline const values +// 256-287 kcache constants bank 2 +// 288-319 kcache constants bank 3 +// 219-255 special SQ_ALU_SRC_* (0, 1, etc.) +// 488-520 src param space +#define ALU_SRC_GPR_BASE 0 +#define ALU_SRC_KCACHE0_BASE 128 +#define ALU_SRC_KCACHE1_BASE 160 +#define ALU_SRC_INLINE_K_BASE 192 +#define ALU_SRC_KCACHE2_BASE 256 +#define ALU_SRC_KCACHE3_BASE 288 +#define ALU_SRC_PARAM_BASE 448 + +#define SRC0_REL(x) (x) +#define SRC1_REL(x) (x) +#define SRC2_REL(x) (x) +// elem +#define SRC0_ELEM(x) (x) +#define SRC1_ELEM(x) (x) +#define SRC2_ELEM(x) (x) +#define ELEM_X 0 +#define ELEM_Y 1 +#define ELEM_Z 2 +#define ELEM_W 3 +// neg +#define SRC0_NEG(x) (x) +#define SRC1_NEG(x) (x) +#define SRC2_NEG(x) (x) +// im +#define INDEX_MODE(x) (x) // SQ_INDEX_* +// ps +#define PRED_SEL(x) (x) // SQ_PRED_SEL_* +// last +#define LAST(x) (x) +// abs +#define SRC0_ABS(x) (x) +#define SRC1_ABS(x) (x) +// uem +#define UPDATE_EXECUTE_MASK(x) (x) +// up +#define UPDATE_PRED(x) (x) +// wm +#define WRITE_MASK(x) (x) +// omod +#define OMOD(x) (x) // SQ_ALU_OMOD_* +// alu inst +#define ALU_INST(x) (x) // SQ_ALU_INST_* +//bs +#define BANK_SWIZZLE(x) (x) // SQ_ALU_VEC_* +#define DST_GPR(x) (x) +#define DST_REL(x) (x) +#define DST_ELEM(x) (x) +#define CLAMP(x) (x) + +#define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \ + cpu_to_le32((((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \ + ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \ + ((im) << 26) | ((ps) << 29) | ((last) << 31))) + +#define ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \ + cpu_to_le32((((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \ + ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \ + ((dr) << 28) | ((de) << 29) | ((clamp) << 31))) + +#define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \ + cpu_to_le32((((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \ + ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \ + ((de) << 29) | ((clamp) << 31))) + +// VTX clause insts +// vxt insts +#define VTX_INST(x) (x) // SQ_VTX_INST_* + +// fetch type +#define FETCH_TYPE(x) (x) // SQ_VTX_FETCH_* + +#define FETCH_WHOLE_QUAD(x) (x) +#define BUFFER_ID(x) (x) +#define SRC_GPR(x) (x) +#define SRC_REL(x) (x) +#define MEGA_FETCH_COUNT(x) ((x) ? ((x) - 1) : 0) + +#define DST_SEL_X(x) (x) +#define DST_SEL_Y(x) (x) +#define DST_SEL_Z(x) (x) +#define DST_SEL_W(x) (x) +#define USE_CONST_FIELDS(x) (x) +#define DATA_FORMAT(x) (x) +// num format +#define NUM_FORMAT_ALL(x) (x) // SQ_NUM_FORMAT_* +// format comp +#define FORMAT_COMP_ALL(x) (x) // SQ_FORMAT_COMP_* +// sma +#define SRF_MODE_ALL(x) (x) +#define SRF_MODE_ZERO_CLAMP_MINUS_ONE 0 +#define SRF_MODE_NO_ZERO 1 +#define OFFSET(x) (x) +// endian swap +#define ENDIAN_SWAP(x) (x) // SQ_ENDIAN_* +#define CONST_BUF_NO_STRIDE(x) (x) +// mf +#define MEGA_FETCH(x) (x) +#define BUFFER_INDEX_MODE(x) (x) + +#define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, mfc) \ + cpu_to_le32((((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \ + ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26))) +#define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \ + cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ + ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31))) +#define VTX_DWORD2(offset, es, cbns, mf, alt_const, bim) \ + cpu_to_le32((((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19) | ((alt_const) << 20) | ((bim) << 21))) +#define VTX_DWORD_PAD cpu_to_le32(0x00000000) + +// TEX clause insts +// tex insts +#define TEX_INST(x) (x) // SQ_TEX_INST_* +#define INST_MOD(x) (x) +#define FETCH_WHOLE_QUAD(x) (x) +#define RESOURCE_ID(x) (x) +#define RESOURCE_INDEX_MODE(x) (x) +#define SAMPLER_INDEX_MODE(x) (x) + +#define LOD_BIAS(x) (x) +//ct +#define COORD_TYPE_X(x) (x) +#define COORD_TYPE_Y(x) (x) +#define COORD_TYPE_Z(x) (x) +#define COORD_TYPE_W(x) (x) +#define TEX_UNNORMALIZED 0 +#define TEX_NORMALIZED 1 +#define OFFSET_X(x) (((int)(x) * 2) & 0x1f) /* 4:1-bits 2's-complement fixed-point: [-8.0..7.5] */ +#define OFFSET_Y(x) (((int)(x) * 2) & 0x1f) +#define OFFSET_Z(x) (((int)(x) * 2) & 0x1f) +#define SAMPLER_ID(x) (x) + +#define TEX_DWORD0(tex_inst, im, fwq, resource_id, src_gpr, sr, ac, rim, sim) \ + cpu_to_le32((((tex_inst) << 0) | ((im) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \ + ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24) | ((rim) << 25) | ((sim) << 27))) +#define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \ + cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ + ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31))) +#define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \ + cpu_to_le32((((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \ + ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29))) +#define TEX_DWORD_PAD cpu_to_le32(0x00000000) + +extern int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* vs); +extern int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* ps); + +extern int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* vs); +extern int evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* ps); + +extern int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader); +extern int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader); + +extern int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* vs); +extern int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* ps); + +#endif diff --git a/evergreen_state.h b/evergreen_state.h new file mode 100644 index 0000000..1acb484 --- /dev/null +++ b/evergreen_state.h @@ -0,0 +1,329 @@ +/* + * Copyright 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: Alex Deucher <alexander.deucher@amd.com> + * + */ + +#ifndef __EVERGREEN_STATE_H__ +#define __EVERGREEN_STATE_H__ + +typedef int bool_t; + +#define CLEAR(x) memset (&x, 0, sizeof(x)) + +/* Sequencer / thread handling */ +typedef struct { + int ps_prio; + int vs_prio; + int gs_prio; + int es_prio; + int hs_prio; + int ls_prio; + int cs_prio; + int num_ps_gprs; + int num_vs_gprs; + int num_gs_gprs; + int num_es_gprs; + int num_hs_gprs; + int num_ls_gprs; + int num_cs_gprs; + int num_temp_gprs; + int num_ps_threads; + int num_vs_threads; + int num_gs_threads; + int num_es_threads; + int num_hs_threads; + int num_ls_threads; + int num_ps_stack_entries; + int num_vs_stack_entries; + int num_gs_stack_entries; + int num_es_stack_entries; + int num_hs_stack_entries; + int num_ls_stack_entries; +} sq_config_t; + +/* Color buffer / render target */ +typedef struct { + int id; + int w; + int h; + uint64_t base; + int format; + int endian; + int array_mode; // tiling + int non_disp_tiling; + int number_type; + int read_size; + int comp_swap; + int tile_mode; + int blend_clamp; + int clear_color; + int blend_bypass; + int simple_float; + int round_mode; + int tile_compact; + int source_format; + int resource_type; + int fast_clear; + int compression; + int rat; + /* 2D related CB state */ + uint32_t pmask; + int rop; + int blend_enable; + uint32_t blendcntl; + struct radeon_bo *bo; +} cb_config_t; + +/* Shader */ +typedef struct { + uint64_t shader_addr; + uint32_t shader_size; + int num_gprs; + int stack_size; + int dx10_clamp; + int clamp_consts; + int export_mode; + int uncached_first_inst; + int single_round; + int double_round; + int allow_sdi; + int allow_sd0; + int allow_ddi; + int allow_ddo; + struct radeon_bo *bo; +} shader_config_t; + +/* Shader consts */ +typedef struct { + int type; + int size_bytes; + uint64_t const_addr; + struct radeon_bo *bo; +} const_config_t; + +/* Vertex buffer / vtx resource */ +typedef struct { + int id; + uint64_t vb_addr; + uint32_t vtx_num_entries; + uint32_t vtx_size_dw; + int clamp_x; + int format; + int num_format_all; + int format_comp_all; + int srf_mode_all; + int endian; + int mem_req_size; + int dst_sel_x; + int dst_sel_y; + int dst_sel_z; + int dst_sel_w; + int uncached; + struct radeon_bo *bo; +} vtx_resource_t; + +/* Texture resource */ +typedef struct { + int id; + int w; + int h; + int pitch; + int depth; + int dim; + int array_mode; + int tile_type; + int format; + uint64_t base; + uint64_t mip_base; + uint32_t size; + int format_comp_x; + int format_comp_y; + int format_comp_z; + int format_comp_w; + int num_format_all; + int srf_mode_all; + int force_degamma; + int endian; + int dst_sel_x; + int dst_sel_y; + int dst_sel_z; + int dst_sel_w; + int base_level; + int last_level; + int base_array; + int last_array; + int perf_modulation; + int interlaced; + int min_lod; + struct radeon_bo *bo; + struct radeon_bo *mip_bo; +} tex_resource_t; + +/* Texture sampler */ +typedef struct { + int id; + /* Clamping */ + int clamp_x, clamp_y, clamp_z; + int border_color; + /* Filtering */ + int xy_mag_filter, xy_min_filter; + int z_filter; + int mip_filter; + bool_t high_precision_filter; /* ? */ + int perf_mip; /* ? 0-7 */ + int perf_z; /* ? 3 */ + /* LoD selection */ + int min_lod, max_lod; /* 0-0x3ff */ + int lod_bias; /* 0-0xfff (signed?) */ + int lod_bias2; /* ? 0-0xfff (signed?) */ + bool_t lod_uses_minor_axis; /* ? */ + /* Other stuff */ + bool_t point_sampling_clamp; /* ? */ + bool_t tex_array_override; /* ? */ + bool_t mc_coord_truncate; /* ? */ + bool_t force_degamma; /* ? */ + bool_t fetch_4; /* ? */ + bool_t sample_is_pcf; /* ? */ + bool_t type; /* ? */ + int depth_compare; /* only depth textures? */ + int chroma_key; + int truncate_coord; + bool_t disable_cube_wrap; +} tex_sampler_t; + +/* Draw command */ +typedef struct { + uint32_t prim_type; + uint32_t vgt_draw_initiator; + uint32_t index_type; + uint32_t num_instances; + uint32_t num_indices; +} draw_config_t; + +#define BEGIN_BATCH(n) \ +do { \ + radeon_ddx_cs_start(radeon, (n), __FILE__, __func__, __LINE__); \ +} while(0) +#define END_BATCH() \ +do { \ + radeon_cs_end(radeon->cs, __FILE__, __func__, __LINE__); \ +} while(0) +#define RELOC_BATCH(bo, rd, wd) \ +do { \ + int _ret; \ + _ret = radeon_cs_write_reloc(radeon->cs, (bo), (rd), (wd), 0); \ + if (_ret) ErrorF("reloc emit failure %d (%s %d)\n", _ret, __func__, __LINE__); \ +} while(0) +#define E32(dword) \ +do { \ + radeon_cs_write_dword(radeon->cs, (dword)); \ +} while (0) + +#define EFLOAT(val) \ +do { \ + union { float f; uint32_t d; } a; \ + a.f = (val); \ + E32(a.d); \ +} while (0) + +#define PACK3(cmd, num) \ +do { \ + E32(RADEON_CP_PACKET3 | ((cmd) << 8) | ((((num) - 1) & 0x3fff) << 16)); \ +} while (0) + +/* write num registers, start at reg */ +/* If register falls in a special area, special commands are issued */ +#define PACK0(reg, num) \ +do { \ + if ((reg) >= SET_CONFIG_REG_offset && (reg) < SET_CONFIG_REG_end) { \ + PACK3(IT_SET_CONFIG_REG, (num) + 1); \ + E32(((reg) - SET_CONFIG_REG_offset) >> 2); \ + } else if ((reg) >= SET_CONTEXT_REG_offset && (reg) < SET_CONTEXT_REG_end) { \ + PACK3(IT_SET_CONTEXT_REG, (num) + 1); \ + E32(((reg) - SET_CONTEXT_REG_offset) >> 2); \ + } else if ((reg) >= SET_RESOURCE_offset && (reg) < SET_RESOURCE_end) { \ + PACK3(IT_SET_RESOURCE, num + 1); \ + E32(((reg) - SET_RESOURCE_offset) >> 2); \ + } else if ((reg) >= SET_SAMPLER_offset && (reg) < SET_SAMPLER_end) { \ + PACK3(IT_SET_SAMPLER, (num) + 1); \ + E32((reg - SET_SAMPLER_offset) >> 2); \ + } else if ((reg) >= SET_CTL_CONST_offset && (reg) < SET_CTL_CONST_end) { \ + PACK3(IT_SET_CTL_CONST, (num) + 1); \ + E32(((reg) - SET_CTL_CONST_offset) >> 2); \ + } else if ((reg) >= SET_LOOP_CONST_offset && (reg) < SET_LOOP_CONST_end) { \ + PACK3(IT_SET_LOOP_CONST, (num) + 1); \ + E32(((reg) - SET_LOOP_CONST_offset) >> 2); \ + } else if ((reg) >= SET_BOOL_CONST_offset && (reg) < SET_BOOL_CONST_end) { \ + PACK3(IT_SET_BOOL_CONST, (num) + 1); \ + E32(((reg) - SET_BOOL_CONST_offset) >> 2); \ + } else { \ + E32(CP_PACKET0 ((reg), (num) - 1)); \ + } \ +} while (0) + +/* write a single register */ +#define EREG(reg, val) \ +do { \ + PACK0((reg), 1); \ + E32((val)); \ +} while (0) + +void +evergreen_start_3d(struct radeon *radeon); +void +evergreen_set_render_target(struct radeon *radeon, cb_config_t *cb_conf, uint32_t domain); +void +evergreen_set_spi(struct radeon *radeon, int vs_export_count, int num_interp); +void +evergreen_fs_setup(struct radeon *radeon, shader_config_t *fs_conf, uint32_t domain); +void +evergreen_vs_setup(struct radeon *radeon, shader_config_t *vs_conf, uint32_t domain); +void +evergreen_ps_setup(struct radeon *radeon, shader_config_t *ps_conf, uint32_t domain); +void +evergreen_set_alu_consts(struct radeon *radeon, const_config_t *const_conf, uint32_t domain); +void +evergreen_set_bool_consts(struct radeon *radeon, int offset, uint32_t val); +void +evergreen_set_tex_resource(struct radeon *radeon, tex_resource_t *tex_res, uint32_t domain); +void +evergreen_set_tex_sampler(struct radeon *radeon, tex_sampler_t *s); +void +evergreen_set_screen_scissor(struct radeon *radeon, int x1, int y1, int x2, int y2); +void +evergreen_set_vport_scissor(struct radeon *radeon, int id, int x1, int y1, int x2, int y2); +void +evergreen_set_generic_scissor(struct radeon *radeon, int x1, int y1, int x2, int y2); +void +evergreen_set_window_scissor(struct radeon *radeon, int x1, int y1, int x2, int y2); +void +evergreen_set_clip_rect(struct radeon *radeon, int id, int x1, int y1, int x2, int y2); +void +evergreen_set_default_state(struct radeon *radeon); +void +evergreen_draw_auto(struct radeon *radeon, draw_config_t *draw_conf); + +void evergreen_finish_op(struct radeon *radeon, int vtx_size); + +#endif diff --git a/radeon_vbo.c b/radeon_vbo.c new file mode 100644 index 0000000..f8e14c4 --- /dev/null +++ b/radeon_vbo.c @@ -0,0 +1,204 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Dave Airlie <airlied@redhat.com> + * + */ + +#include <errno.h> +#include <stdlib.h> +#include "radeondemo.h" +#include "radeon_bo.h" +#include "radeon_cs.h" +#define VBO_SIZE (16*1024) + +/* KMS vertex buffer support - for R600 only but could be used on previous gpus */ + +static struct radeon_bo *radeon_vbo_get_bo(struct radeon *radeon); + +void radeon_vbo_put(struct radeon *radeon, struct radeon_vbo_object *vbo) +{ + + if (vbo->vb_bo) { + radeon_bo_unmap(vbo->vb_bo); + radeon_bo_unref(vbo->vb_bo); + vbo->vb_bo = NULL; + vbo->vb_total = 0; + } + + vbo->vb_offset = 0; +} + +void radeon_vbo_get(struct radeon *radeon, struct radeon_vbo_object *vbo) +{ + int ret; + + vbo->vb_bo = radeon_vbo_get_bo(radeon); + if (vbo->vb_bo) { + radeon_bo_ref(vbo->vb_bo); + ret = radeon_bo_map(vbo->vb_bo, 1); + if (ret) { + ErrorF("Failed to map vb %d\n", ret); + exit(-1); + } + } + + vbo->vb_total = VBO_SIZE; + vbo->vb_offset = 0; + vbo->vb_start_op = vbo->vb_offset; +} + +/* these functions could migrate to libdrm and + be shared with the radeon 3D driver */ +static int radeon_bo_is_idle(struct radeon_bo *bo) +{ + uint32_t domain; + int ret = radeon_bo_is_busy(bo, &domain); + return ret != -EBUSY; +} + +void radeon_vbo_init_lists(struct radeon *radeon) +{ + struct radeon_accel_state *accel_state = &radeon->accel_state; + + make_empty_list(&accel_state->bo_free); + make_empty_list(&accel_state->bo_wait); + make_empty_list(&accel_state->bo_reserved); +} + +void radeon_vbo_free_lists(struct radeon *radeon) +{ + struct radeon_accel_state *accel_state = &radeon->accel_state; + struct radeon_dma_bo *dma_bo, *temp; + + foreach_s(dma_bo, temp, &accel_state->bo_free) { + remove_from_list(dma_bo); + radeon_bo_unref(dma_bo->bo); + free(dma_bo); + } + + foreach_s(dma_bo, temp, &accel_state->bo_wait) { + remove_from_list(dma_bo); + radeon_bo_unref(dma_bo->bo); + free(dma_bo); + } + + foreach_s(dma_bo, temp, &accel_state->bo_reserved) { + remove_from_list(dma_bo); + radeon_bo_unref(dma_bo->bo); + free(dma_bo); + } +} + +void radeon_vbo_flush_bos(struct radeon *radeon) +{ + struct radeon_accel_state *accel_state = &radeon->accel_state; + struct radeon_dma_bo *dma_bo, *temp; + const int expire_at = ++accel_state->bo_free.expire_counter + DMA_BO_FREE_TIME; + const int time = accel_state->bo_free.expire_counter; + + foreach_s(dma_bo, temp, &accel_state->bo_wait) { + if (dma_bo->expire_counter == time) { + ErrorF("leaking dma buffer\n"); + while ((dma_bo->bo = radeon_bo_unref(dma_bo->bo))) {} + remove_from_list(dma_bo); + free(dma_bo); + continue; + } + + if (!radeon_bo_is_idle(dma_bo->bo)) + continue; + + if (dma_bo->bo->ptr) { + ErrorF("bo with pointer on wait list!\n"); + continue; + } + + remove_from_list(dma_bo); + dma_bo->expire_counter = expire_at; + insert_at_tail(&accel_state->bo_free, dma_bo); + } + + /* move reserved to wait list */ + foreach_s(dma_bo, temp, &accel_state->bo_reserved) { + remove_from_list(dma_bo); + dma_bo->expire_counter = expire_at; + insert_at_tail(&accel_state->bo_wait, dma_bo); + } + + /* free bos that have been unused */ + foreach_s(dma_bo, temp, &accel_state->bo_free) { + if (dma_bo->expire_counter != time) + break; + /* always keep one hanging around at end */ + if (at_end(&accel_state->bo_free, dma_bo)) { + dma_bo->expire_counter = time + DMA_BO_FREE_TIME; + break; + } + + remove_from_list(dma_bo); + radeon_bo_unref(dma_bo->bo); + free(dma_bo); + } +} + +static struct radeon_bo *radeon_vbo_get_bo(struct radeon *radeon) +{ + struct radeon_accel_state *accel_state = &radeon->accel_state; + struct radeon_dma_bo *dma_bo = NULL; + struct radeon_bo *bo; + + if (is_empty_list(&accel_state->bo_free)) { + dma_bo = calloc(1, sizeof(struct radeon_dma_bo)); + if (!dma_bo) + return NULL; + +again_alloc: + dma_bo->bo = radeon_bo_open(radeon->bufmgr, 0, VBO_SIZE, + 0, RADEON_GEM_DOMAIN_GTT, 0); + + if (!dma_bo->bo) { + ErrorF("failure to allocate DMA BO\n"); + return NULL; + } + insert_at_head(&accel_state->bo_reserved, dma_bo); + } else { + dma_bo = last_elem(&accel_state->bo_free); + remove_from_list(dma_bo); + insert_at_head(&accel_state->bo_reserved, dma_bo); + } + + if (is_empty_list(&accel_state->bo_reserved)) + goto again_alloc; + + bo = first_elem(&accel_state->bo_reserved)->bo; + + /* need a space check */ + if (radeon_cs_space_check_with_bo(radeon->cs, + bo, + RADEON_GEM_DOMAIN_GTT, 0)) + ErrorF("failed to revalidate\n"); + + return bo; +} + diff --git a/radeon_vbo.h b/radeon_vbo.h new file mode 100644 index 0000000..701825c --- /dev/null +++ b/radeon_vbo.h @@ -0,0 +1,45 @@ + +#ifndef RADEON_VBO_H +#define RADEON_VBO_H + +extern void radeon_vb_no_space(struct radeon *radeon, struct radeon_vbo_object *vbo, int vert_size); +extern void radeon_vbo_init_lists(struct radeon *radeon); +extern void radeon_vbo_free_lists(struct radeon *radeon); +extern void radeon_vbo_flush_bos(struct radeon *radeon); +extern void radeon_vbo_get(struct radeon *radeon, struct radeon_vbo_object *vbo); +extern void radeon_vbo_put(struct radeon *radeon, struct radeon_vbo_object *vbo); + +static inline void radeon_vbo_check(struct radeon *radeon, + struct radeon_vbo_object *vbo, + int vert_size) +{ + + if ((vbo->vb_offset + (vbo->verts_per_op * vert_size)) > vbo->vb_total) { + radeon_vb_no_space(radeon, vbo, vert_size); + } +} + +static inline void * +radeon_vbo_space(struct radeon *radeon, + struct radeon_vbo_object *vbo, + int vert_size) +{ + void *vb; + + /* we've ran out of space in the vertex buffer - need to get a + new one */ + radeon_vbo_check(radeon, vbo, vert_size); + + vbo->vb_op_vert_size = vert_size; + vb = (void*)((char *)vbo->vb_bo->ptr + vbo->vb_offset); + return vb; +} + +static inline void radeon_vbo_commit(struct radeon *radeon, + struct radeon_vbo_object *vbo) +{ + + vbo->vb_offset += vbo->verts_per_op * vbo->vb_op_vert_size; +} + +#endif diff --git a/radeondemo.c b/radeondemo.c new file mode 100644 index 0000000..7c41490 --- /dev/null +++ b/radeondemo.c @@ -0,0 +1,251 @@ +#include <sys/types.h> +#include "xf86drm.h" +#include "radeon_drm.h" + +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <errno.h> +#include <string.h> + +#include "radeondemo.h" + +#include "radeon_bo_gem.h" +#include "radeon_cs_gem.h" + +struct radeon _radeon_ctx; +struct radeon *radeon = &_radeon_ctx; + +int run_test(struct radeon *radeon) +{ + struct r600_accel_object test1; + int size; + struct radeon_bo *vram_bo, *gtt_bo; + + test1.pitch = 256; + test1.width = 256; + test1.height = 256; + test1.bpp = 32; + test1.domain = RADEON_GEM_DOMAIN_VRAM; + test1.tiling_flags = 0; + + size = test1.pitch*test1.height*test1.bpp; + test1.bo = radeon_bo_open(radeon->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_VRAM, 0); + if (!test1.bo) + return -1; + + do_solid_fill_prepare(radeon, &test1, 0xaa55aa55); + evergreen_solid(radeon, 0, 0, test1.width, test1.height); + evergreen_finish_op(radeon, 8); + +} + +int radeon_init(struct radeon *radeon, int fd) +{ + radeon->fd = fd; + + { + struct drm_radeon_gem_info mminfo; + + if (!drmCommandWriteRead(fd, DRM_RADEON_GEM_INFO, &mminfo, sizeof(mminfo))) + { + radeon->vram_size = mminfo.vram_visible; + radeon->gart_size = mminfo.gart_size; + printf( + "mem size init: gart size :%llx vram size: s:%llx visible:%llx\n", + (unsigned long long)mminfo.gart_size, + (unsigned long long)mminfo.vram_size, + (unsigned long long)mminfo.vram_visible); + } + } + radeon->ChipFamily = CHIP_FAMILY_PALM; + radeon->bufmgr = radeon_bo_manager_gem_ctor(fd); + if (!radeon->bufmgr) + return -1; + + radeon->csm = radeon_cs_manager_gem_ctor(fd); + if (!radeon->csm) + return -1; + + radeon->cs = radeon_cs_create(radeon->csm, 16384); + if (!radeon->cs) + return -1; + + radeon_cs_set_limit(radeon->cs, RADEON_GEM_DOMAIN_VRAM, radeon->vram_size); + radeon_cs_set_limit(radeon->cs, RADEON_GEM_DOMAIN_GTT, radeon->gart_size); + radeon_cs_space_set_flush(radeon->cs, (void(*)(void *))radeon_cs_flush_indirect, radeon); + + radeon_vbo_init_lists(radeon); + + if (radeon->ChipFamily == CHIP_FAMILY_PALM) { + EVERGREENAllocShaders(radeon); + EVERGREENLoadShaders(radeon); + } + + radeon->accel_state.XInited3D = false; + radeon->accel_state.src_obj[0].bo = NULL; + radeon->accel_state.src_obj[1].bo = NULL; + radeon->accel_state.dst_obj.bo = NULL; + radeon->accel_state.vbo.vb_start_op = -1; + radeon->accel_state.cbuf.vb_start_op = -1; + radeon->accel_state.finish_op = evergreen_finish_op; + radeon->accel_state.vbo.verts_per_op = 3; + radeon->accel_state.cbuf.verts_per_op = 1; + return 0; +} + +void radeon_fini(struct radeon *radeon) +{ + radeon_cs_destroy(radeon->cs); + radeon_bo_manager_gem_dtor(radeon->bufmgr); + radeon_cs_manager_gem_dtor(radeon->csm); +} + + +int main(int argc, char **argv) +{ + int drmFD; + int ret; + char *pciids = "pci:0000:00:01.0"; + drmFD = drmOpen(NULL, pciids); + if (drmFD < 0) { + drmError(drmFD, __func__); + fprintf(stderr, "Check that BusId is correct. You can find the correct BusId in /var/log/Xorg.0.log\n"); + fprintf(stderr, "You can also try setting the environment variable LIBGL_DEBUG to \"verbose\" to see what libdrm is trying to do.\n"); + exit(-1); + } + + ret = radeon_init(radeon, drmFD); + if (ret < 0) { + fprintf(stderr,"uanbel to init radeon\n"); + exit(-1); + } + + run_test(radeon); + + radeon_fini(radeon); + drmClose(drmFD); +} + +void ErrorF(const char *f, ...) +{ + +} + +void radeon_cs_flush_indirect(struct radeon *radeon) +{ + struct radeon_accel_state *accel_state = &radeon->accel_state; + int ret; + + if (!radeon->cs->cdw) + return; + + /* release the current VBO so we don't block on mapping it later */ + if (accel_state->vbo.vb_offset && accel_state->vbo.vb_bo) { + radeon_vbo_put(radeon, &accel_state->vbo); + accel_state->vbo.vb_start_op = -1; + } + + /* release the current VBO so we don't block on mapping it later */ + if (accel_state->cbuf.vb_bo) { + radeon_vbo_put(radeon, &accel_state->cbuf); + accel_state->cbuf.vb_start_op = -1; + } + radeon_cs_emit(radeon->cs); + radeon_cs_erase(radeon->cs); + + radeon_vbo_flush_bos(radeon); + + ret = radeon_cs_space_check_with_bo(radeon->cs, + accel_state->vbo.vb_bo, + RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + ErrorF("space check failed in flush\n"); + + accel_state->XInited3D = false; + +} + +void radeon_ddx_cs_start(struct radeon *radeon, + int n, const char *file, + const char *func, int line) +{ + if (radeon->cs->cdw + n > radeon->cs->ndw) { +// radeon_cs_flush_indirect(radeon); + + } + radeon_cs_begin(radeon->cs, n, file, func, line); +} + +void radeon_vb_no_space(struct radeon *radeon, + struct radeon_vbo_object *vbo, + int vert_size) +{ + struct radeon_accel_state *accel_state = &radeon->accel_state; + + if (vbo->vb_bo) { + if (vbo->vb_start_op != vbo->vb_offset) { + accel_state->finish_op(radeon, vert_size); + accel_state->ib_reset_op = radeon->cs->cdw; + } + + /* release the current VBO */ + radeon_vbo_put(radeon, vbo); + } + /* get a new one */ + radeon_vbo_get(radeon, vbo); + return; +} + +void radeon_ib_discard(struct radeon *radeon) +{ + struct radeon_accel_state *accel_state = &radeon->accel_state; + int ret; + + if (accel_state->ib_reset_op) { + /* if we have data just reset the CS and ignore the operation */ + radeon->cs->cdw = accel_state->ib_reset_op; + accel_state->ib_reset_op = 0; + goto out; + } + + accel_state->vbo.vb_offset = 0; + accel_state->vbo.vb_start_op = -1; + accel_state->cbuf.vb_offset = 0; + accel_state->cbuf.vb_start_op = -1; + + if (CS_FULL(radeon->cs)) { + radeon_cs_flush_indirect(radeon); + return; + } + radeon_cs_erase(radeon->cs); + ret = radeon_cs_space_check_with_bo(radeon->cs, + accel_state->vbo.vb_bo, + RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + ErrorF("space check failed in flush\n"); + + if (accel_state->cbuf.vb_bo) { + ret = radeon_cs_space_check_with_bo(radeon->cs, + accel_state->cbuf.vb_bo, + RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + ErrorF("space check failed in flush\n"); + } + +out: + accel_state->XInited3D = false; +} + +int radeon_cp_start(struct radeon *radeon) +{ + struct radeon_accel_state *accel_state = &radeon->accel_state; + + if (CS_FULL(radeon->cs)) { + radeon_cs_flush_indirect(radeon); + } + accel_state->ib_reset_op = radeon->cs->cdw; + accel_state->vbo.vb_start_op = accel_state->vbo.vb_offset; + accel_state->cbuf.vb_start_op = accel_state->cbuf.vb_offset; + return 0; +} diff --git a/radeondemo.h b/radeondemo.h new file mode 100644 index 0000000..b23c58e --- /dev/null +++ b/radeondemo.h @@ -0,0 +1,202 @@ +#ifndef RADEONDEMO_H +#define RADEONDEMO_H + +#include <stdint.h> +#include <sys/types.h> +#define le32_to_cpu(x) (x) +#define le16_to_cpu(x) (x) +#define cpu_to_le32(x) (x) +#define cpu_to_le16(x) (x) + +#include <stdbool.h> + +#include "simple_list.h" + +#include "radeon_bo.h" +#include "radeon_cs.h" +struct radeon_vbo_object { + struct radeon_bo *vb_bo; + unsigned verts_per_op; + int vb_offset; + uint32_t vb_start_op; + uint32_t vb_op_vert_size; + uint32_t vb_size; + int vb_total; + void *vb_ptr; +}; + +#define DMA_BO_FREE_TIME 1000 + +struct radeon_dma_bo { + struct radeon_dma_bo *next, *prev; + struct radeon_bo *bo; + int expire_counter; +}; + + +struct r600_accel_object { + uint32_t pitch; + uint32_t width; + uint32_t height; + uint32_t offset; + int bpp; + uint32_t domain; + struct radeon_bo *bo; + uint32_t tiling_flags; +}; + +struct radeon; + +struct radeon_accel_state { + bool XInited3D; /* X itself has the 3D context */ + struct radeon_vbo_object vbo; + struct radeon_vbo_object cbuf; + uint32_t ib_reset_op; + uint32_t src_size[2]; + uint32_t dst_size; + struct r600_accel_object src_obj[2]; + struct r600_accel_object dst_obj; + + struct radeon_dma_bo bo_free; + struct radeon_dma_bo bo_wait; + struct radeon_dma_bo bo_reserved; + + void (*finish_op)(struct radeon *, int); + + struct radeon_bo *shaders_bo; + uint32_t solid_vs_offset; + uint32_t solid_ps_offset; + uint32_t copy_vs_offset; + uint32_t copy_ps_offset; +}; +typedef enum { + CHIP_FAMILY_UNKNOW, + CHIP_FAMILY_LEGACY, + CHIP_FAMILY_RADEON, + CHIP_FAMILY_RV100, + CHIP_FAMILY_RS100, /* U1 (IGP320M) or A3 (IGP320)*/ + CHIP_FAMILY_RV200, + CHIP_FAMILY_RS200, /* U2 (IGP330M/340M/350M) or A4 (IGP330/340/345/350), RS250 (IGP 7000) */ + CHIP_FAMILY_R200, + CHIP_FAMILY_RV250, + CHIP_FAMILY_RS300, /* RS300/RS350 */ + CHIP_FAMILY_RV280, + CHIP_FAMILY_R300, + CHIP_FAMILY_R350, + CHIP_FAMILY_RV350, + CHIP_FAMILY_RV380, /* RV370/RV380/M22/M24 */ + CHIP_FAMILY_R420, /* R420/R423/M18 */ + CHIP_FAMILY_RV410, /* RV410, M26 */ + CHIP_FAMILY_RS400, /* xpress 200, 200m (RS400) Intel */ + CHIP_FAMILY_RS480, /* xpress 200, 200m (RS410/480/482/485) AMD */ + CHIP_FAMILY_RV515, /* rv515 */ + CHIP_FAMILY_R520, /* r520 */ + CHIP_FAMILY_RV530, /* rv530 */ + CHIP_FAMILY_R580, /* r580 */ + CHIP_FAMILY_RV560, /* rv560 */ + CHIP_FAMILY_RV570, /* rv570 */ + CHIP_FAMILY_RS600, + CHIP_FAMILY_RS690, + CHIP_FAMILY_RS740, + CHIP_FAMILY_R600, /* r600 */ + CHIP_FAMILY_RV610, + CHIP_FAMILY_RV630, + CHIP_FAMILY_RV670, + CHIP_FAMILY_RV620, + CHIP_FAMILY_RV635, + CHIP_FAMILY_RS780, + CHIP_FAMILY_RS880, + CHIP_FAMILY_RV770, /* r700 */ + CHIP_FAMILY_RV730, + CHIP_FAMILY_RV710, + CHIP_FAMILY_RV740, + CHIP_FAMILY_CEDAR, /* evergreen */ + CHIP_FAMILY_REDWOOD, + CHIP_FAMILY_JUNIPER, + CHIP_FAMILY_CYPRESS, + CHIP_FAMILY_HEMLOCK, + CHIP_FAMILY_PALM, + CHIP_FAMILY_BARTS, + CHIP_FAMILY_TURKS, + CHIP_FAMILY_CAICOS, + CHIP_FAMILY_CAYMAN, + CHIP_FAMILY_LAST +} RADEONChipFamily; + +struct radeon { + int fd; + struct radeon_bo *vb; + struct radeon_accel_state accel_state; + RADEONChipFamily ChipFamily; + + struct radeon_cs_manager *csm; + struct radeon_bo_manager *bufmgr; + struct radeon_cs *cs; + + uint64_t vram_size; + uint64_t gart_size; +}; + + + +# define RADEON_ROP3_ZERO 0x00000000 +# define RADEON_ROP3_DSa 0x00880000 +# define RADEON_ROP3_SDna 0x00440000 +# define RADEON_ROP3_S 0x00cc0000 +# define RADEON_ROP3_DSna 0x00220000 +# define RADEON_ROP3_D 0x00aa0000 +# define RADEON_ROP3_DSx 0x00660000 +# define RADEON_ROP3_DSo 0x00ee0000 +# define RADEON_ROP3_DSon 0x00110000 +# define RADEON_ROP3_DSxn 0x00990000 +# define RADEON_ROP3_Dn 0x00550000 +# define RADEON_ROP3_SDno 0x00dd0000 +# define RADEON_ROP3_Sn 0x00330000 +# define RADEON_ROP3_DSno 0x00bb0000 +# define RADEON_ROP3_DSan 0x00770000 +# define RADEON_ROP3_ONE 0x00ff0000 +# define RADEON_ROP3_DPa 0x00a00000 +# define RADEON_ROP3_PDna 0x00500000 +# define RADEON_ROP3_P 0x00f00000 +# define RADEON_ROP3_DPna 0x000a0000 +# define RADEON_ROP3_D 0x00aa0000 +# define RADEON_ROP3_DPx 0x005a0000 +# define RADEON_ROP3_DPo 0x00fa0000 +# define RADEON_ROP3_DPon 0x00050000 +# define RADEON_ROP3_PDxn 0x00a50000 +# define RADEON_ROP3_PDno 0x00f50000 +# define RADEON_ROP3_Pn 0x000f0000 +# define RADEON_ROP3_DPno 0x00af0000 +# define RADEON_ROP3_DPan 0x005f0000 + + +#define CP_PACKET0(reg, n) \ + (RADEON_CP_PACKET0 | ((n) << 16) | ((reg) >> 2)) +#define CP_PACKET1(reg0, reg1) \ + (RADEON_CP_PACKET1 | (((reg1) >> 2) << 11) | ((reg0) >> 2)) +#define CP_PACKET2() \ + (RADEON_CP_PACKET2) +#define CP_PACKET3(pkt, n) \ + (RADEON_CP_PACKET3 | (pkt) | ((n) << 16)) + +#define RADEON_CP_PACKET0 0x00000000 +#define RADEON_CP_PACKET3 0xC0000000 + +extern void ErrorF(const char *f, ...); + +void radeon_ddx_cs_start(struct radeon *radeon, + int n, const char *file, + const char *func, int line); +#define RADEON_ALIGN(x,bytes) (((x) + ((bytes) - 1)) & ~((bytes) - 1)) +#define CS_FULL(cs) ((cs)->cdw > 15 * 1024) + +#include "radeon_vbo.h" + + +void radeon_cs_flush_indirect(struct radeon *radeon); +int radeon_cp_start(struct radeon *radeon); +bool EVERGREENAllocShaders(struct radeon *radeon); +bool EVERGREENLoadShaders(struct radeon *radeon); + +void evergreen_finish_op(struct radeon *radeon, int vtx_size); +#endif diff --git a/simple_list.h b/simple_list.h new file mode 100644 index 0000000..ff7f888 --- /dev/null +++ b/simple_list.h @@ -0,0 +1,202 @@ +/** + * \file simple_list.h + * Simple macros for type-safe, intrusive lists. + * + * Intended to work with a list sentinal which is created as an empty + * list. Insert & delete are O(1). + * + * \author + * (C) 1997, Keith Whitwell + */ + +/* + * Mesa 3-D graphics library + * Version: 3.5 + * + * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef _SIMPLE_LIST_H +#define _SIMPLE_LIST_H + +struct simple_node { + struct simple_node *next; + struct simple_node *prev; +}; + +/** + * Remove an element from list. + * + * \param elem element to remove. + */ +#define remove_from_list(elem) \ +do { \ + (elem)->next->prev = (elem)->prev; \ + (elem)->prev->next = (elem)->next; \ +} while (0) + +/** + * Insert an element to the list head. + * + * \param list list. + * \param elem element to insert. + */ +#define insert_at_head(list, elem) \ +do { \ + (elem)->prev = list; \ + (elem)->next = (list)->next; \ + (list)->next->prev = elem; \ + (list)->next = elem; \ +} while(0) + +/** + * Insert an element to the list tail. + * + * \param list list. + * \param elem element to insert. + */ +#define insert_at_tail(list, elem) \ +do { \ + (elem)->next = list; \ + (elem)->prev = (list)->prev; \ + (list)->prev->next = elem; \ + (list)->prev = elem; \ +} while(0) + +/** + * Move an element to the list head. + * + * \param list list. + * \param elem element to move. + */ +#define move_to_head(list, elem) \ +do { \ + remove_from_list(elem); \ + insert_at_head(list, elem); \ +} while (0) + +/** + * Move an element to the list tail. + * + * \param list list. + * \param elem element to move. + */ +#define move_to_tail(list, elem) \ +do { \ + remove_from_list(elem); \ + insert_at_tail(list, elem); \ +} while (0) + +/** + * Make a empty list empty. + * + * \param sentinal list (sentinal element). + */ +#define make_empty_list(sentinal) \ +do { \ + (sentinal)->next = sentinal; \ + (sentinal)->prev = sentinal; \ +} while (0) + +/** + * Get list first element. + * + * \param list list. + * + * \return pointer to first element. + */ +#define first_elem(list) ((list)->next) + +/** + * Get list last element. + * + * \param list list. + * + * \return pointer to last element. + */ +#define last_elem(list) ((list)->prev) + +/** + * Get next element. + * + * \param elem element. + * + * \return pointer to next element. + */ +#define next_elem(elem) ((elem)->next) + +/** + * Get previous element. + * + * \param elem element. + * + * \return pointer to previous element. + */ +#define prev_elem(elem) ((elem)->prev) + +/** + * Test whether element is at end of the list. + * + * \param list list. + * \param elem element. + * + * \return non-zero if element is at end of list, or zero otherwise. + */ +#define at_end(list, elem) ((elem) == (list)) + +/** + * Test if a list is empty. + * + * \param list list. + * + * \return non-zero if list empty, or zero otherwise. + */ +#define is_empty_list(list) ((list)->next == (list)) + +/** + * Walk through the elements of a list. + * + * \param ptr pointer to the current element. + * \param list list. + * + * \note It should be followed by a { } block or a single statement, as in a \c + * for loop. + */ +#define foreach(ptr, list) \ + for( ptr=(list)->next ; ptr!=list ; ptr=(ptr)->next ) + +/** + * Walk through the elements of a list. + * + * Same as #foreach but lets you unlink the current value during a list + * traversal. Useful for freeing a list, element by element. + * + * \param ptr pointer to the current element. + * \param t temporary pointer. + * \param list list. + * + * \note It should be followed by a { } block or a single statement, as in a \c + * for loop. + */ +#define foreach_s(ptr, t, list) \ + for(ptr=(list)->next,t=(ptr)->next; list != ptr; ptr=t, t=(t)->next) + +#endif |