summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@kvothe.(none)>2011-04-29 13:40:05 +1000
committerDave Airlie <airlied@kvothe.(none)>2011-04-29 13:40:05 +1000
commitcb0d9ded8613dd1abab5eb5a9aa15c4f7480b15a (patch)
tree067ddc1474904ea2cc562f4991a92a96a1de5777
initial
-rw-r--r--Makefile.am29
-rwxr-xr-xautogen.sh14
-rw-r--r--configure.ac36
-rw-r--r--evergreen_accel.c1243
-rw-r--r--evergreen_ops.c153
-rw-r--r--evergreen_reg.h250
-rw-r--r--evergreen_reg_auto.h4039
-rw-r--r--evergreen_shader.c3155
-rw-r--r--evergreen_shader.h292
-rw-r--r--evergreen_state.h329
-rw-r--r--radeon_vbo.c204
-rw-r--r--radeon_vbo.h45
-rw-r--r--radeondemo.c251
-rw-r--r--radeondemo.h202
-rw-r--r--simple_list.h202
15 files changed, 10444 insertions, 0 deletions
diff --git a/Makefile.am b/Makefile.am
new file mode 100644
index 0000000..438298f
--- /dev/null
+++ b/Makefile.am
@@ -0,0 +1,29 @@
+# Copyright 2009 Dave Airlie
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# on the rights to use, copy, modify, merge, publish, distribute, sub
+# license, and/or sell copies of the Software, and to permit persons to whom
+# the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+AUTOMAKE_OPTIONS = foreign
+
+bin_PROGRAMS = radeondemo
+
+AM_CFLAGS = $(LIBDRM_CFLAGS) $(LIBDRM_RADEON_CFLAGS)
+
+radeondemo_SOURCES = radeondemo.c evergreen_accel.c evergreen_shader.c radeon_vbo.c evergreen_ops.c
+radeondemo_LDADD = $(LIBDRM_LIBS) $(LIBDRM_RADEON_LIBS)
+
diff --git a/autogen.sh b/autogen.sh
new file mode 100755
index 0000000..f028c2c
--- /dev/null
+++ b/autogen.sh
@@ -0,0 +1,14 @@
+#! /bin/sh
+
+srcdir=`dirname $0`
+test -z "$srcdir" && srcdir=.
+
+ORIGDIR=`pwd`
+cd $srcdir
+
+autoreconf -v --install || exit 1
+cd $ORIGDIR || exit $?
+
+$srcdir/configure --enable-maintainer-mode "$@"
+
+
diff --git a/configure.ac b/configure.ac
new file mode 100644
index 0000000..1c40a96
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,36 @@
+# Copyright 2009 Dave Airlie
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# on the rights to use, copy, modify, merge, publish, distribute, sub
+# license, and/or sell copies of the Software, and to permit persons to whom
+# the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+AC_PREREQ([2.60])
+AC_INIT([radeondemo], 1.6.2, [xorg-driver-ati@lists.x.org], radeondemo)
+AM_INIT_AUTOMAKE([dist-bzip2])
+
+#AM_CONFIG_HEADER([config.h])
+
+AC_PROG_LIBTOOL
+
+AC_DISABLE_STATIC
+AC_PROG_CC
+AC_PROG_INSTALL
+
+PKG_CHECK_MODULES(LIBDRM, libdrm)
+PKG_CHECK_MODULES(LIBDRM_RADEON, libdrm_radeon)
+
+AC_OUTPUT([Makefile])
diff --git a/evergreen_accel.c b/evergreen_accel.c
new file mode 100644
index 0000000..1d66172
--- /dev/null
+++ b/evergreen_accel.c
@@ -0,0 +1,1243 @@
+/*
+ * Copyright 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Alex Deucher <alexander.deucher@amd.com>
+ *
+ */
+#include <errno.h>
+#include <stdlib.h>
+#include "radeondemo.h"
+#include "evergreen_shader.h"
+#include "evergreen_reg.h"
+#include "evergreen_state.h"
+#include "radeon_drm.h"
+
+static const uint32_t EVERGREEN_ROP[16] = {
+ RADEON_ROP3_ZERO, /* GXclear */
+ RADEON_ROP3_DSa, /* Gxand */
+ RADEON_ROP3_SDna, /* GXandReverse */
+ RADEON_ROP3_S, /* GXcopy */
+ RADEON_ROP3_DSna, /* GXandInverted */
+ RADEON_ROP3_D, /* GXnoop */
+ RADEON_ROP3_DSx, /* GXxor */
+ RADEON_ROP3_DSo, /* GXor */
+ RADEON_ROP3_DSon, /* GXnor */
+ RADEON_ROP3_DSxn, /* GXequiv */
+ RADEON_ROP3_Dn, /* GXinvert */
+ RADEON_ROP3_SDno, /* GXorReverse */
+ RADEON_ROP3_Sn, /* GXcopyInverted */
+ RADEON_ROP3_DSno, /* GXorInverted */
+ RADEON_ROP3_DSan, /* GXnand */
+ RADEON_ROP3_ONE, /* GXset */
+};
+
+void
+evergreen_start_3d(struct radeon *radeon)
+{
+
+
+ BEGIN_BATCH(3);
+ PACK3(IT_CONTEXT_CONTROL, 2);
+ E32(0x80000000);
+ E32(0x80000000);
+ END_BATCH();
+
+}
+
+/*
+ * Setup of functional groups
+ */
+
+// asic stack/thread/gpr limits - need to query the drm
+static void
+evergreen_sq_setup(struct radeon *radeon, sq_config_t *sq_conf)
+{
+ uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3;
+ uint32_t sq_thread_resource_mgmt, sq_thread_resource_mgmt_2;
+ uint32_t sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3;
+
+
+ sq_config = 0;
+
+ sq_config |= (EXPORT_SRC_C_bit |
+ (sq_conf->cs_prio << CS_PRIO_shift) |
+ (sq_conf->ls_prio << LS_PRIO_shift) |
+ (sq_conf->hs_prio << HS_PRIO_shift) |
+ (sq_conf->ps_prio << PS_PRIO_shift) |
+ (sq_conf->vs_prio << VS_PRIO_shift) |
+ (sq_conf->gs_prio << GS_PRIO_shift) |
+ (sq_conf->es_prio << ES_PRIO_shift));
+
+ sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
+ (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
+ (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
+ sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
+ (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
+ sq_gpr_resource_mgmt_3 = ((sq_conf->num_hs_gprs << NUM_HS_GPRS_shift) |
+ (sq_conf->num_ls_gprs << NUM_LS_GPRS_shift));
+
+ sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
+ (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
+ (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
+ (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
+ sq_thread_resource_mgmt_2 = ((sq_conf->num_hs_threads << NUM_HS_THREADS_shift) |
+ (sq_conf->num_ls_threads << NUM_LS_THREADS_shift));
+
+ sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
+ (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
+
+ sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
+ (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
+
+ sq_stack_resource_mgmt_3 = ((sq_conf->num_hs_stack_entries << NUM_HS_STACK_ENTRIES_shift) |
+ (sq_conf->num_ls_stack_entries << NUM_LS_STACK_ENTRIES_shift));
+
+ BEGIN_BATCH(16);
+ /* disable dyn gprs */
+ EREG(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
+ PACK0(SQ_CONFIG, 4);
+ E32(sq_config);
+ E32(sq_gpr_resource_mgmt_1);
+ E32(sq_gpr_resource_mgmt_2);
+ E32(sq_gpr_resource_mgmt_3);
+ PACK0(SQ_THREAD_RESOURCE_MGMT, 5);
+ E32(sq_thread_resource_mgmt);
+ E32(sq_thread_resource_mgmt_2);
+ E32(sq_stack_resource_mgmt_1);
+ E32(sq_stack_resource_mgmt_2);
+ E32(sq_stack_resource_mgmt_3);
+ END_BATCH();
+}
+
+void
+evergreen_set_render_target(struct radeon *radeon, cb_config_t *cb_conf, uint32_t domain)
+{
+ uint32_t cb_color_info, cb_color_attrib = 0, cb_color_dim;
+ int pitch, slice, h;
+
+
+ cb_color_info = ((cb_conf->endian << ENDIAN_shift) |
+ (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) |
+ (cb_conf->array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) |
+ (cb_conf->number_type << NUMBER_TYPE_shift) |
+ (cb_conf->comp_swap << COMP_SWAP_shift) |
+ (cb_conf->source_format << SOURCE_FORMAT_shift) |
+ (cb_conf->resource_type << RESOURCE_TYPE_shift));
+ if (cb_conf->blend_clamp)
+ cb_color_info |= BLEND_CLAMP_bit;
+ if (cb_conf->fast_clear)
+ cb_color_info |= FAST_CLEAR_bit;
+ if (cb_conf->compression)
+ cb_color_info |= COMPRESSION_bit;
+ if (cb_conf->blend_bypass)
+ cb_color_info |= BLEND_BYPASS_bit;
+ if (cb_conf->simple_float)
+ cb_color_info |= SIMPLE_FLOAT_bit;
+ if (cb_conf->round_mode)
+ cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
+ if (cb_conf->tile_compact)
+ cb_color_info |= CB_COLOR0_INFO__TILE_COMPACT_bit;
+ if (cb_conf->rat)
+ cb_color_info |= RAT_bit;
+
+ /* bit 4 needs to be set for linear and depth/stencil surfaces */
+ if (cb_conf->non_disp_tiling)
+ cb_color_attrib |= CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit;
+
+ pitch = (cb_conf->w / 8) - 1;
+ h = RADEON_ALIGN(cb_conf->h, 8);
+ slice = ((cb_conf->w * h) / 64) - 1;
+
+ switch (cb_conf->resource_type) {
+ case BUFFER:
+ /* number of elements in the surface */
+ cb_color_dim = pitch * slice;
+ break;
+ default:
+ /* w/h of the surface */
+ cb_color_dim = (((cb_conf->w - 1) << WIDTH_MAX_shift) |
+ ((cb_conf->h - 1) << HEIGHT_MAX_shift));
+ break;
+ }
+
+ BEGIN_BATCH(3 + 2);
+ EREG(CB_COLOR0_BASE + (0x3c * cb_conf->id), (cb_conf->base >> 8));
+ RELOC_BATCH(cb_conf->bo, 0, domain);
+ END_BATCH();
+
+ /* Set CMASK & FMASK buffer to the offset of color buffer as
+ * we don't use those this shouldn't cause any issue and we
+ * then have a valid cmd stream
+ */
+ BEGIN_BATCH(3 + 2);
+ EREG(CB_COLOR0_CMASK + (0x3c * cb_conf->id), (0 >> 8));
+ RELOC_BATCH(cb_conf->bo, 0, domain);
+ END_BATCH();
+ BEGIN_BATCH(3 + 2);
+ EREG(CB_COLOR0_FMASK + (0x3c * cb_conf->id), (0 >> 8));
+ RELOC_BATCH(cb_conf->bo, 0, domain);
+ END_BATCH();
+
+ /* tiling config */
+ BEGIN_BATCH(3 + 2);
+ EREG(CB_COLOR0_ATTRIB + (0x3c * cb_conf->id), cb_color_attrib);
+ RELOC_BATCH(cb_conf->bo, 0, domain);
+ END_BATCH();
+ BEGIN_BATCH(3 + 2);
+ EREG(CB_COLOR0_INFO + (0x3c * cb_conf->id), cb_color_info);
+ RELOC_BATCH(cb_conf->bo, 0, domain);
+ END_BATCH();
+
+ BEGIN_BATCH(33);
+ EREG(CB_COLOR0_PITCH + (0x3c * cb_conf->id), pitch);
+ EREG(CB_COLOR0_SLICE + (0x3c * cb_conf->id), slice);
+ EREG(CB_COLOR0_VIEW + (0x3c * cb_conf->id), 0);
+ EREG(CB_COLOR0_DIM + (0x3c * cb_conf->id), cb_color_dim);
+ EREG(CB_COLOR0_CMASK_SLICE + (0x3c * cb_conf->id), 0);
+ EREG(CB_COLOR0_FMASK_SLICE + (0x3c * cb_conf->id), 0);
+ PACK0(CB_COLOR0_CLEAR_WORD0 + (0x3c * cb_conf->id), 4);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ EREG(CB_TARGET_MASK, (cb_conf->pmask << TARGET0_ENABLE_shift));
+ EREG(CB_COLOR_CONTROL, (EVERGREEN_ROP[cb_conf->rop] |
+ (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift)));
+ EREG(CB_BLEND0_CONTROL, cb_conf->blendcntl);
+ END_BATCH();
+
+}
+
+static void
+evergreen_cp_set_surface_sync(struct radeon *radeon, uint32_t sync_type,
+ uint32_t size, uint64_t mc_addr,
+ struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
+{
+
+ uint32_t cp_coher_size;
+ if (size == 0xffffffff)
+ cp_coher_size = 0xffffffff;
+ else
+ cp_coher_size = ((size + 255) >> 8);
+
+ BEGIN_BATCH(5 + 2);
+ PACK3(IT_SURFACE_SYNC, 4);
+ E32(sync_type);
+ E32(cp_coher_size);
+ E32((mc_addr >> 8));
+ E32(10); /* poll interval */
+ RELOC_BATCH(bo, rdomains, wdomain);
+ END_BATCH();
+}
+
+void
+evergreen_set_spi(struct radeon *radeon, int vs_export_count, int num_interp)
+{
+
+
+ BEGIN_BATCH(8);
+ /* Interpolator setup */
+ EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift));
+ PACK0(SPI_PS_IN_CONTROL_0, 3);
+ E32(((num_interp << NUM_INTERP_shift) |
+ LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0
+ E32(0); // SPI_PS_IN_CONTROL_1
+ E32(0); // SPI_INTERP_CONTROL_0
+ END_BATCH();
+}
+
+void
+evergreen_fs_setup(struct radeon *radeon, shader_config_t *fs_conf, uint32_t domain)
+{
+
+ uint32_t sq_pgm_resources;
+
+ sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
+ (fs_conf->stack_size << STACK_SIZE_shift));
+
+ if (fs_conf->dx10_clamp)
+ sq_pgm_resources |= DX10_CLAMP_bit;
+
+ BEGIN_BATCH(3 + 2);
+ EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
+ RELOC_BATCH(fs_conf->bo, domain, 0);
+ END_BATCH();
+
+ BEGIN_BATCH(3);
+ EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources);
+ END_BATCH();
+}
+
+void
+evergreen_vs_setup(struct radeon *radeon, shader_config_t *vs_conf, uint32_t domain)
+{
+
+ uint32_t sq_pgm_resources, sq_pgm_resources_2;
+
+ sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
+ (vs_conf->stack_size << STACK_SIZE_shift));
+
+ if (vs_conf->dx10_clamp)
+ sq_pgm_resources |= DX10_CLAMP_bit;
+ if (vs_conf->uncached_first_inst)
+ sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
+
+ sq_pgm_resources_2 = ((vs_conf->single_round << SINGLE_ROUND_shift) |
+ (vs_conf->double_round << DOUBLE_ROUND_shift));
+
+ if (vs_conf->allow_sdi)
+ sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
+ if (vs_conf->allow_sd0)
+ sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
+ if (vs_conf->allow_ddi)
+ sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
+ if (vs_conf->allow_ddo)
+ sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
+
+ /* flush SQ cache */
+ evergreen_cp_set_surface_sync(radeon, SH_ACTION_ENA_bit,
+ vs_conf->shader_size, vs_conf->shader_addr,
+ vs_conf->bo, domain, 0);
+
+ BEGIN_BATCH(3 + 2);
+ EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
+ RELOC_BATCH(vs_conf->bo, domain, 0);
+ END_BATCH();
+
+ BEGIN_BATCH(4);
+ PACK0(SQ_PGM_RESOURCES_VS, 2);
+ E32(sq_pgm_resources);
+ E32(sq_pgm_resources_2);
+ END_BATCH();
+}
+
+void
+evergreen_ps_setup(struct radeon *radeon, shader_config_t *ps_conf, uint32_t domain)
+{
+
+ uint32_t sq_pgm_resources, sq_pgm_resources_2;
+
+ sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
+ (ps_conf->stack_size << STACK_SIZE_shift));
+
+ if (ps_conf->dx10_clamp)
+ sq_pgm_resources |= DX10_CLAMP_bit;
+ if (ps_conf->uncached_first_inst)
+ sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
+ if (ps_conf->clamp_consts)
+ sq_pgm_resources |= CLAMP_CONSTS_bit;
+
+ sq_pgm_resources_2 = ((ps_conf->single_round << SINGLE_ROUND_shift) |
+ (ps_conf->double_round << DOUBLE_ROUND_shift));
+
+ if (ps_conf->allow_sdi)
+ sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
+ if (ps_conf->allow_sd0)
+ sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
+ if (ps_conf->allow_ddi)
+ sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
+ if (ps_conf->allow_ddo)
+ sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
+
+ /* flush SQ cache */
+ evergreen_cp_set_surface_sync(radeon, SH_ACTION_ENA_bit,
+ ps_conf->shader_size, ps_conf->shader_addr,
+ ps_conf->bo, domain, 0);
+
+ BEGIN_BATCH(3 + 2);
+ EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
+ RELOC_BATCH(ps_conf->bo, domain, 0);
+ END_BATCH();
+
+ BEGIN_BATCH(5);
+ PACK0(SQ_PGM_RESOURCES_PS, 3);
+ E32(sq_pgm_resources);
+ E32(sq_pgm_resources_2);
+ E32(ps_conf->export_mode);
+ END_BATCH();
+}
+
+void
+evergreen_set_alu_consts(struct radeon *radeon, const_config_t *const_conf, uint32_t domain)
+{
+
+ /* size reg is units of 16 consts (4 dwords each) */
+ uint32_t size = const_conf->size_bytes >> 8;
+
+ if (size == 0)
+ size = 1;
+
+ /* flush SQ cache */
+ evergreen_cp_set_surface_sync(radeon, SH_ACTION_ENA_bit,
+ const_conf->size_bytes, const_conf->const_addr,
+ const_conf->bo, domain, 0);
+
+ switch (const_conf->type) {
+ case SHADER_TYPE_VS:
+ BEGIN_BATCH(3);
+ EREG(SQ_ALU_CONST_BUFFER_SIZE_VS_0, size);
+ END_BATCH();
+ BEGIN_BATCH(3 + 2);
+ EREG(SQ_ALU_CONST_CACHE_VS_0, const_conf->const_addr >> 8);
+ RELOC_BATCH(const_conf->bo, domain, 0);
+ END_BATCH();
+ break;
+ case SHADER_TYPE_PS:
+ BEGIN_BATCH(3);
+ EREG(SQ_ALU_CONST_BUFFER_SIZE_PS_0, size);
+ END_BATCH();
+ BEGIN_BATCH(3 + 2);
+ EREG(SQ_ALU_CONST_CACHE_PS_0, const_conf->const_addr >> 8);
+ RELOC_BATCH(const_conf->bo, domain, 0);
+ END_BATCH();
+ break;
+ default:
+ ErrorF("Unsupported const type %d\n", const_conf->type);
+ break;
+ }
+
+}
+
+void
+evergreen_set_bool_consts(struct radeon *radeon, int offset, uint32_t val)
+{
+
+ /* bool register order is: ps, vs/es, gs, hs, ls, cs; one register each
+ * 1 bits per bool; 32 bools each for ps, vs/es, gs, hs, ls, cs.
+ */
+ BEGIN_BATCH(3);
+ EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
+ END_BATCH();
+}
+
+static void
+evergreen_set_vtx_resource(struct radeon *radeon, vtx_resource_t *res, uint32_t domain)
+{
+
+ struct radeon_accel_state *accel_state = &radeon->accel_state;
+ uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
+
+ sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
+ ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
+ (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
+ (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
+ (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
+ if (res->clamp_x)
+ sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
+
+ if (res->format_comp_all)
+ sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
+
+ if (res->srf_mode_all)
+ sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
+
+ sq_vtx_constant_word3 = ((res->dst_sel_x << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift) |
+ (res->dst_sel_y << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift) |
+ (res->dst_sel_z << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift) |
+ (res->dst_sel_w << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift));
+
+ if (res->uncached)
+ sq_vtx_constant_word3 |= SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit;
+
+ /* XXX ??? */
+ sq_vtx_constant_word4 = 0;
+
+ evergreen_cp_set_surface_sync(radeon, TC_ACTION_ENA_bit,
+ accel_state->vbo.vb_offset, 0,
+ res->bo,
+ domain, 0);
+
+ BEGIN_BATCH(10 + 2);
+ PACK0(SQ_FETCH_RESOURCE + res->id * SQ_FETCH_RESOURCE_offset, 8);
+ E32(res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS
+ E32((res->vtx_num_entries << 2) - 1); // 1: SIZE
+ E32(sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
+ E32(sq_vtx_constant_word3); // 3: swizzles
+ E32(sq_vtx_constant_word4); // 4: num elements
+ E32(0); // 5: n/a
+ E32(0); // 6: n/a
+ E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD7_0__TYPE_shift); // 7: TYPE
+ RELOC_BATCH(res->bo, domain, 0);
+ END_BATCH();
+}
+
+void
+evergreen_set_tex_resource(struct radeon *radeon, tex_resource_t *tex_res, uint32_t domain)
+{
+
+ uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
+ uint32_t sq_tex_resource_word5, sq_tex_resource_word6, sq_tex_resource_word7;
+
+ sq_tex_resource_word0 = (tex_res->dim << DIM_shift);
+
+ if (tex_res->w)
+ sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) |
+ ((tex_res->w - 1) << TEX_WIDTH_shift));
+
+ if (tex_res->tile_type)
+ sq_tex_resource_word0 |= SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit;
+
+ sq_tex_resource_word1 = (tex_res->array_mode << SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift);
+
+ if (tex_res->h)
+ sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
+ if (tex_res->depth)
+ sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
+
+ sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
+ (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
+ (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
+ (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
+ (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
+ (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
+ (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
+ (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
+ (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
+ (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
+ (tex_res->base_level << BASE_LEVEL_shift));
+
+ if (tex_res->srf_mode_all)
+ sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
+ if (tex_res->force_degamma)
+ sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
+
+ sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
+ (tex_res->base_array << BASE_ARRAY_shift) |
+ (tex_res->last_array << LAST_ARRAY_shift));
+
+ sq_tex_resource_word6 = ((tex_res->min_lod << SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift) |
+ (tex_res->perf_modulation << PERF_MODULATION_shift));
+
+ if (tex_res->interlaced)
+ sq_tex_resource_word6 |= INTERLACED_bit;
+
+ sq_tex_resource_word7 = ((tex_res->format << SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift) |
+ (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD7_0__TYPE_shift));
+
+ /* flush texture cache */
+ evergreen_cp_set_surface_sync(radeon, TC_ACTION_ENA_bit,
+ tex_res->size, tex_res->base,
+ tex_res->bo, domain, 0);
+
+ BEGIN_BATCH(10 + 4);
+ PACK0(SQ_FETCH_RESOURCE + tex_res->id * SQ_FETCH_RESOURCE_offset, 8);
+ E32(sq_tex_resource_word0);
+ E32(sq_tex_resource_word1);
+ E32(((tex_res->base) >> 8));
+ E32(((tex_res->mip_base) >> 8));
+ E32(sq_tex_resource_word4);
+ E32(sq_tex_resource_word5);
+ E32(sq_tex_resource_word6);
+ E32(sq_tex_resource_word7);
+ RELOC_BATCH(tex_res->bo, domain, 0);
+ RELOC_BATCH(tex_res->mip_bo, domain, 0);
+ END_BATCH();
+}
+
+void
+evergreen_set_tex_sampler (struct radeon *radeon, tex_sampler_t *s)
+{
+
+ uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
+
+ sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) |
+ (s->clamp_y << CLAMP_Y_shift) |
+ (s->clamp_z << CLAMP_Z_shift) |
+ (s->xy_mag_filter << XY_MAG_FILTER_shift) |
+ (s->xy_min_filter << XY_MIN_FILTER_shift) |
+ (s->z_filter << Z_FILTER_shift) |
+ (s->mip_filter << MIP_FILTER_shift) |
+ (s->border_color << BORDER_COLOR_TYPE_shift) |
+ (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) |
+ (s->chroma_key << CHROMA_KEY_shift));
+
+ sq_tex_sampler_word1 = ((s->min_lod << SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift) |
+ (s->max_lod << MAX_LOD_shift) |
+ (s->perf_mip << PERF_MIP_shift) |
+ (s->perf_z << PERF_Z_shift));
+
+
+ sq_tex_sampler_word2 = ((s->lod_bias << SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift) |
+ (s->lod_bias2 << LOD_BIAS_SEC_shift));
+
+ if (s->mc_coord_truncate)
+ sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
+ if (s->force_degamma)
+ sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
+ if (s->truncate_coord)
+ sq_tex_sampler_word2 |= TRUNCATE_COORD_bit;
+ if (s->disable_cube_wrap)
+ sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit;
+ if (s->type)
+ sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
+
+ BEGIN_BATCH(5);
+ PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
+ E32(sq_tex_sampler_word0);
+ E32(sq_tex_sampler_word1);
+ E32(sq_tex_sampler_word2);
+ END_BATCH();
+}
+
+//XXX deal with clip offsets in clip setup
+void
+evergreen_set_screen_scissor(struct radeon *radeon, int x1, int y1, int x2, int y2)
+{
+
+
+ BEGIN_BATCH(4);
+ PACK0(PA_SC_SCREEN_SCISSOR_TL, 2);
+ E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
+ (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
+ E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
+ (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
+ END_BATCH();
+}
+
+void
+evergreen_set_vport_scissor(struct radeon *radeon, int id, int x1, int y1, int x2, int y2)
+{
+
+
+ BEGIN_BATCH(4);
+ PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2);
+ E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
+ (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
+ WINDOW_OFFSET_DISABLE_bit));
+ E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
+ (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
+ END_BATCH();
+}
+
+void
+evergreen_set_generic_scissor(struct radeon *radeon, int x1, int y1, int x2, int y2)
+{
+
+
+ BEGIN_BATCH(4);
+ PACK0(PA_SC_GENERIC_SCISSOR_TL, 2);
+ E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
+ (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
+ WINDOW_OFFSET_DISABLE_bit));
+ E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
+ (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
+ END_BATCH();
+}
+
+void
+evergreen_set_window_scissor(struct radeon *radeon, int x1, int y1, int x2, int y2)
+{
+
+
+ BEGIN_BATCH(4);
+ PACK0(PA_SC_WINDOW_SCISSOR_TL, 2);
+ E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
+ (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
+ WINDOW_OFFSET_DISABLE_bit));
+ E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
+ (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
+ END_BATCH();
+}
+
+void
+evergreen_set_clip_rect(struct radeon *radeon, int id, int x1, int y1, int x2, int y2)
+{
+
+
+ BEGIN_BATCH(4);
+ PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2);
+ E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
+ (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
+ E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
+ (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
+ END_BATCH();
+}
+
+/*
+ * Setup of default state
+ */
+
+void
+evergreen_set_default_state(struct radeon *radeon)
+{
+ tex_resource_t tex_res;
+ shader_config_t fs_conf;
+ sq_config_t sq_conf;
+ int i;
+
+ struct radeon_accel_state *accel_state = &radeon->accel_state;
+
+ if (accel_state->XInited3D)
+ return;
+
+ memset(&tex_res, 0, sizeof(tex_resource_t));
+ memset(&fs_conf, 0, sizeof(shader_config_t));
+
+ accel_state->XInited3D = true;
+
+ evergreen_start_3d(radeon);
+
+ /* SQ */
+ sq_conf.ps_prio = 0;
+ sq_conf.vs_prio = 1;
+ sq_conf.gs_prio = 2;
+ sq_conf.es_prio = 3;
+ sq_conf.hs_prio = 0;
+ sq_conf.ls_prio = 0;
+ sq_conf.cs_prio = 0;
+
+ switch (radeon->ChipFamily) {
+ case CHIP_FAMILY_CEDAR:
+ default:
+ sq_conf.num_ps_gprs = 93;
+ sq_conf.num_vs_gprs = 46;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 31;
+ sq_conf.num_es_gprs = 31;
+ sq_conf.num_hs_gprs = 23;
+ sq_conf.num_ls_gprs = 23;
+ sq_conf.num_ps_threads = 96;
+ sq_conf.num_vs_threads = 16;
+ sq_conf.num_gs_threads = 16;
+ sq_conf.num_es_threads = 16;
+ sq_conf.num_hs_threads = 16;
+ sq_conf.num_ls_threads = 16;
+ sq_conf.num_ps_stack_entries = 42;
+ sq_conf.num_vs_stack_entries = 42;
+ sq_conf.num_gs_stack_entries = 42;
+ sq_conf.num_es_stack_entries = 42;
+ sq_conf.num_hs_stack_entries = 42;
+ sq_conf.num_ls_stack_entries = 42;
+ break;
+ case CHIP_FAMILY_REDWOOD:
+ sq_conf.num_ps_gprs = 93;
+ sq_conf.num_vs_gprs = 46;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 31;
+ sq_conf.num_es_gprs = 31;
+ sq_conf.num_hs_gprs = 23;
+ sq_conf.num_ls_gprs = 23;
+ sq_conf.num_ps_threads = 128;
+ sq_conf.num_vs_threads = 20;
+ sq_conf.num_gs_threads = 20;
+ sq_conf.num_es_threads = 20;
+ sq_conf.num_hs_threads = 20;
+ sq_conf.num_ls_threads = 20;
+ sq_conf.num_ps_stack_entries = 42;
+ sq_conf.num_vs_stack_entries = 42;
+ sq_conf.num_gs_stack_entries = 42;
+ sq_conf.num_es_stack_entries = 42;
+ sq_conf.num_hs_stack_entries = 42;
+ sq_conf.num_ls_stack_entries = 42;
+ break;
+ case CHIP_FAMILY_JUNIPER:
+ sq_conf.num_ps_gprs = 93;
+ sq_conf.num_vs_gprs = 46;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 31;
+ sq_conf.num_es_gprs = 31;
+ sq_conf.num_hs_gprs = 23;
+ sq_conf.num_ls_gprs = 23;
+ sq_conf.num_ps_threads = 128;
+ sq_conf.num_vs_threads = 20;
+ sq_conf.num_gs_threads = 20;
+ sq_conf.num_es_threads = 20;
+ sq_conf.num_hs_threads = 20;
+ sq_conf.num_ls_threads = 20;
+ sq_conf.num_ps_stack_entries = 85;
+ sq_conf.num_vs_stack_entries = 85;
+ sq_conf.num_gs_stack_entries = 85;
+ sq_conf.num_es_stack_entries = 85;
+ sq_conf.num_hs_stack_entries = 85;
+ sq_conf.num_ls_stack_entries = 85;
+ break;
+ case CHIP_FAMILY_CYPRESS:
+ case CHIP_FAMILY_HEMLOCK:
+ sq_conf.num_ps_gprs = 93;
+ sq_conf.num_vs_gprs = 46;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 31;
+ sq_conf.num_es_gprs = 31;
+ sq_conf.num_hs_gprs = 23;
+ sq_conf.num_ls_gprs = 23;
+ sq_conf.num_ps_threads = 128;
+ sq_conf.num_vs_threads = 20;
+ sq_conf.num_gs_threads = 20;
+ sq_conf.num_es_threads = 20;
+ sq_conf.num_hs_threads = 20;
+ sq_conf.num_ls_threads = 20;
+ sq_conf.num_ps_stack_entries = 85;
+ sq_conf.num_vs_stack_entries = 85;
+ sq_conf.num_gs_stack_entries = 85;
+ sq_conf.num_es_stack_entries = 85;
+ sq_conf.num_hs_stack_entries = 85;
+ sq_conf.num_ls_stack_entries = 85;
+ break;
+ case CHIP_FAMILY_PALM:
+ sq_conf.num_ps_gprs = 93;
+ sq_conf.num_vs_gprs = 46;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 31;
+ sq_conf.num_es_gprs = 31;
+ sq_conf.num_hs_gprs = 23;
+ sq_conf.num_ls_gprs = 23;
+ sq_conf.num_ps_threads = 96;
+ sq_conf.num_vs_threads = 16;
+ sq_conf.num_gs_threads = 16;
+ sq_conf.num_es_threads = 16;
+ sq_conf.num_hs_threads = 16;
+ sq_conf.num_ls_threads = 16;
+ sq_conf.num_ps_stack_entries = 42;
+ sq_conf.num_vs_stack_entries = 42;
+ sq_conf.num_gs_stack_entries = 42;
+ sq_conf.num_es_stack_entries = 42;
+ sq_conf.num_hs_stack_entries = 42;
+ sq_conf.num_ls_stack_entries = 42;
+ break;
+ case CHIP_FAMILY_BARTS:
+ sq_conf.num_ps_gprs = 93;
+ sq_conf.num_vs_gprs = 46;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 31;
+ sq_conf.num_es_gprs = 31;
+ sq_conf.num_hs_gprs = 23;
+ sq_conf.num_ls_gprs = 23;
+ sq_conf.num_ps_threads = 128;
+ sq_conf.num_vs_threads = 20;
+ sq_conf.num_gs_threads = 20;
+ sq_conf.num_es_threads = 20;
+ sq_conf.num_hs_threads = 20;
+ sq_conf.num_ls_threads = 20;
+ sq_conf.num_ps_stack_entries = 85;
+ sq_conf.num_vs_stack_entries = 85;
+ sq_conf.num_gs_stack_entries = 85;
+ sq_conf.num_es_stack_entries = 85;
+ sq_conf.num_hs_stack_entries = 85;
+ sq_conf.num_ls_stack_entries = 85;
+ break;
+ case CHIP_FAMILY_TURKS:
+ sq_conf.num_ps_gprs = 93;
+ sq_conf.num_vs_gprs = 46;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 31;
+ sq_conf.num_es_gprs = 31;
+ sq_conf.num_hs_gprs = 23;
+ sq_conf.num_ls_gprs = 23;
+ sq_conf.num_ps_threads = 128;
+ sq_conf.num_vs_threads = 20;
+ sq_conf.num_gs_threads = 20;
+ sq_conf.num_es_threads = 20;
+ sq_conf.num_hs_threads = 20;
+ sq_conf.num_ls_threads = 20;
+ sq_conf.num_ps_stack_entries = 42;
+ sq_conf.num_vs_stack_entries = 42;
+ sq_conf.num_gs_stack_entries = 42;
+ sq_conf.num_es_stack_entries = 42;
+ sq_conf.num_hs_stack_entries = 42;
+ sq_conf.num_ls_stack_entries = 42;
+ break;
+ case CHIP_FAMILY_CAICOS:
+ sq_conf.num_ps_gprs = 93;
+ sq_conf.num_vs_gprs = 46;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 31;
+ sq_conf.num_es_gprs = 31;
+ sq_conf.num_hs_gprs = 23;
+ sq_conf.num_ls_gprs = 23;
+ sq_conf.num_ps_threads = 128;
+ sq_conf.num_vs_threads = 10;
+ sq_conf.num_gs_threads = 10;
+ sq_conf.num_es_threads = 10;
+ sq_conf.num_hs_threads = 10;
+ sq_conf.num_ls_threads = 10;
+ sq_conf.num_ps_stack_entries = 42;
+ sq_conf.num_vs_stack_entries = 42;
+ sq_conf.num_gs_stack_entries = 42;
+ sq_conf.num_es_stack_entries = 42;
+ sq_conf.num_hs_stack_entries = 42;
+ sq_conf.num_ls_stack_entries = 42;
+ break;
+ }
+
+ evergreen_sq_setup(radeon, &sq_conf);
+
+ BEGIN_BATCH(24);
+ EREG(SQ_LDS_ALLOC_PS, 0);
+ EREG(SQ_DYN_GPR_RESOURCE_LIMIT_1, 0);
+
+ PACK0(SQ_ESGS_RING_ITEMSIZE, 6);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+
+ PACK0(SQ_GS_VERT_ITEMSIZE, 4);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+
+ PACK0(SQ_VTX_BASE_VTX_LOC, 2);
+ E32(0);
+ E32(0);
+ END_BATCH();
+
+ /* DB */
+ BEGIN_BATCH(3 + 2);
+ EREG(DB_Z_INFO, 0);
+ RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+
+ BEGIN_BATCH(3 + 2);
+ EREG(DB_STENCIL_INFO, 0);
+ RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+
+ BEGIN_BATCH(3 + 2);
+ EREG(DB_HTILE_DATA_BASE, 0);
+ RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+
+ BEGIN_BATCH(49);
+ EREG(DB_DEPTH_CONTROL, 0);
+
+ PACK0(PA_SC_VPORT_ZMIN_0, 2);
+ EFLOAT(0.0); // PA_SC_VPORT_ZMIN_0
+ EFLOAT(1.0); // PA_SC_VPORT_ZMAX_0
+
+ PACK0(DB_RENDER_CONTROL, 5);
+ E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); // DB_RENDER_CONTROL
+ E32(0); // DB_COUNT_CONTROL
+ E32(0); // DB_DEPTH_VIEW
+ E32(0x2a); // DB_RENDER_OVERRIDE
+ E32(0); // DB_RENDER_OVERRIDE2
+
+ PACK0(DB_STENCIL_CLEAR, 2);
+ E32(0); // DB_STENCIL_CLEAR
+ E32(0); // DB_DEPTH_CLEAR
+
+ EREG(DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) |
+ (2 << ALPHA_TO_MASK_OFFSET1_shift) |
+ (2 << ALPHA_TO_MASK_OFFSET2_shift) |
+ (2 << ALPHA_TO_MASK_OFFSET3_shift)));
+
+ EREG(DB_SHADER_CONTROL, ((EARLY_Z_THEN_LATE_Z << Z_ORDER_shift) |
+ DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
+
+ // SX
+ EREG(SX_MISC, 0);
+
+ // CB
+ PACK0(SX_ALPHA_TEST_CONTROL, 5);
+ E32(0); // SX_ALPHA_TEST_CONTROL
+ E32(0x00000000); //CB_BLEND_RED
+ E32(0x00000000); //CB_BLEND_GREEN
+ E32(0x00000000); //CB_BLEND_BLUE
+ E32(0x00000000); //CB_BLEND_ALPHA
+
+ EREG(CB_SHADER_MASK, OUTPUT0_ENABLE_mask);
+
+ // SC
+ EREG(PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) |
+ (0 << WINDOW_Y_OFFSET_shift)));
+ EREG(PA_SC_CLIPRECT_RULE, CLIP_RULE_mask);
+ EREG(PA_SC_EDGERULE, 0xAAAAAAAA);
+ EREG(PA_SU_HARDWARE_SCREEN_OFFSET, 0);
+ END_BATCH();
+
+ /* clip boolean is set to always visible -> doesn't matter */
+ for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
+ evergreen_set_clip_rect (radeon, i, 0, 0, 8192, 8192);
+
+ for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
+ evergreen_set_vport_scissor (radeon, i, 0, 0, 8192, 8192);
+
+ BEGIN_BATCH(57);
+ PACK0(PA_SC_MODE_CNTL_0, 2);
+ E32(0); // PA_SC_MODE_CNTL_0
+ E32(0); // PA_SC_MODE_CNTL_1
+
+ PACK0(PA_SC_LINE_CNTL, 16);
+ E32(0); // PA_SC_LINE_CNTL
+ E32(0); // PA_SC_AA_CONFIG
+ E32(((X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
+ PIX_CENTER_bit)); // PA_SU_VTX_CNTL
+ EFLOAT(1.0); // PA_CL_GB_VERT_CLIP_ADJ
+ EFLOAT(1.0); // PA_CL_GB_VERT_DISC_ADJ
+ EFLOAT(1.0); // PA_CL_GB_HORZ_CLIP_ADJ
+ EFLOAT(1.0); // PA_CL_GB_HORZ_DISC_ADJ
+ E32(0); // PA_SC_AA_SAMPLE_LOCS_0
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0); // PA_SC_AA_SAMPLE_LOCS_7
+ E32(0xFFFFFFFF); // PA_SC_AA_MASK
+
+ // CL
+ PACK0(PA_CL_CLIP_CNTL, 8);
+ E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL
+ E32(FACE_bit); // PA_SU_SC_MODE_CNTL
+ E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL
+ E32(0); // PA_CL_VS_OUT_CNTL
+ E32(0); // PA_CL_NANINF_CNTL
+ E32(0); // PA_SU_LINE_STIPPLE_CNTL
+ E32(0); // PA_SU_LINE_STIPPLE_SCALE
+ E32(0); // PA_SU_PRIM_FILTER_CNTL
+
+ // SU
+ PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+
+ /* src = semantic id 0; mask = semantic id 1 */
+ EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
+ (1 << SEMANTIC_1_shift)));
+ PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2);
+ /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
+ E32(((0 << SEMANTIC_shift) |
+ (0x01 << DEFAULT_VAL_shift)));
+ /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
+ E32(((1 << SEMANTIC_shift) |
+ (0x01 << DEFAULT_VAL_shift)));
+
+ PACK0(SPI_INPUT_Z, 8);
+ E32(0); // SPI_INPUT_Z
+ E32(0); // SPI_FOG_CNTL
+ E32(LINEAR_CENTROID_ENA__X_ON_AT_CENTROID << LINEAR_CENTROID_ENA_shift); // SPI_BARYC_CNTL
+ E32(0); // SPI_PS_IN_CONTROL_2
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ END_BATCH();
+
+ // clear FS
+ fs_conf.bo = accel_state->shaders_bo;
+ evergreen_fs_setup(radeon, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
+
+ // VGT
+ BEGIN_BATCH(46);
+
+ PACK0(VGT_MAX_VTX_INDX, 4);
+ E32(0xffffff);
+ E32(0);
+ E32(0);
+ E32(0);
+
+ PACK0(VGT_INSTANCE_STEP_RATE_0, 2);
+ E32(0);
+ E32(0);
+
+ PACK0(VGT_REUSE_OFF, 2);
+ E32(0);
+ E32(0);
+
+ PACK0(PA_SU_POINT_SIZE, 17);
+ E32(0); // PA_SU_POINT_SIZE
+ E32(0); // PA_SU_POINT_MINMAX
+ E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL
+ E32(0); // PA_SC_LINE_STIPPLE
+ E32(0); // VGT_OUTPUT_PATH_CNTL
+ E32(0); // VGT_HOS_CNTL
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0); // VGT_GS_MODE
+
+ EREG(VGT_PRIMITIVEID_EN, 0);
+ EREG(VGT_MULTI_PRIM_IB_RESET_EN, 0);
+ EREG(VGT_SHADER_STAGES_EN, 0);
+
+ PACK0(VGT_STRMOUT_CONFIG, 2);
+ E32(0);
+ E32(0);
+ END_BATCH();
+}
+
+
+/*
+ * Commands
+ */
+
+void
+evergreen_draw_auto(struct radeon *radeon, draw_config_t *draw_conf)
+{
+
+
+ BEGIN_BATCH(10);
+ EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
+ PACK3(IT_INDEX_TYPE, 1);
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
+#else
+ E32(draw_conf->index_type);
+#endif
+ PACK3(IT_NUM_INSTANCES, 1);
+ E32(draw_conf->num_instances);
+ PACK3(IT_DRAW_INDEX_AUTO, 2);
+ E32(draw_conf->num_indices);
+ E32(draw_conf->vgt_draw_initiator);
+ END_BATCH();
+}
+
+void evergreen_finish_op(struct radeon *radeon, int vtx_size)
+{
+
+ struct radeon_accel_state *accel_state = &radeon->accel_state;
+ draw_config_t draw_conf;
+ vtx_resource_t vtx_res;
+
+ if (accel_state->vbo.vb_start_op == -1)
+ return;
+
+ CLEAR (draw_conf);
+ CLEAR (vtx_res);
+
+ if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) {
+ radeon_ib_discard(radeon);
+ radeon_cs_flush_indirect(radeon);
+ return;
+ }
+
+ /* Vertex buffer setup */
+ accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op;
+ vtx_res.id = SQ_FETCH_RESOURCE_vs;
+ vtx_res.vtx_size_dw = vtx_size / 4;
+ vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4;
+ vtx_res.vb_addr = accel_state->vbo.vb_start_op;
+ vtx_res.bo = accel_state->vbo.vb_bo;
+ vtx_res.dst_sel_x = SQ_SEL_X;
+ vtx_res.dst_sel_y = SQ_SEL_Y;
+ vtx_res.dst_sel_z = SQ_SEL_Z;
+ vtx_res.dst_sel_w = SQ_SEL_W;
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ vtx_res.endian = SQ_ENDIAN_8IN32;
+#endif
+ evergreen_set_vtx_resource(radeon, &vtx_res, RADEON_GEM_DOMAIN_GTT);
+
+ /* Draw */
+ draw_conf.prim_type = DI_PT_RECTLIST;
+ draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
+ draw_conf.num_instances = 1;
+ draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
+ draw_conf.index_type = DI_INDEX_SIZE_16_BIT;
+
+ evergreen_draw_auto(radeon, &draw_conf);
+
+ /* sync dst surface */
+ evergreen_cp_set_surface_sync(radeon, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
+ accel_state->dst_size, accel_state->dst_obj.offset,
+ accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
+
+ accel_state->vbo.vb_start_op = -1;
+ accel_state->cbuf.vb_start_op = -1;
+ accel_state->ib_reset_op = 0;
+
+}
+
+bool
+EVERGREENAllocShaders(struct radeon *radeon)
+{
+ struct radeon_accel_state *accel_state = &radeon->accel_state;
+
+ /* 512 bytes per shader for now */
+ int size = 512 * 9;
+
+ accel_state->shaders_bo = radeon_bo_open(radeon->bufmgr, 0, size, 0,
+ RADEON_GEM_DOMAIN_VRAM, 0);
+ if (accel_state->shaders_bo == NULL) {
+ ErrorF("Allocating shader failed\n");
+ return false;
+ }
+ return true;
+}
+
+
+bool
+EVERGREENLoadShaders(struct radeon *radeon)
+{
+ struct radeon_accel_state *accel_state = &radeon->accel_state;
+ RADEONChipFamily ChipSet = radeon->ChipFamily;
+ uint32_t *shader;
+ int ret;
+
+ ret = radeon_bo_map(accel_state->shaders_bo, 1);
+ if (ret) {
+ ErrorF("failed to map shader %d\n", ret);
+ exit(-1);
+ return false;
+ }
+ shader = accel_state->shaders_bo->ptr;
+
+ /* solid vs --------------------------------------- */
+ accel_state->solid_vs_offset = 0;
+ evergreen_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
+
+ /* solid ps --------------------------------------- */
+ accel_state->solid_ps_offset = 512;
+ evergreen_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
+
+ /* copy vs --------------------------------------- */
+ accel_state->copy_vs_offset = 1024;
+ evergreen_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
+
+ /* copy ps --------------------------------------- */
+ accel_state->copy_ps_offset = 1536;
+ evergreen_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
+
+#if 0
+ /* comp vs --------------------------------------- */
+ accel_state->comp_vs_offset = 2048;
+ evergreen_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
+
+ /* comp ps --------------------------------------- */
+ accel_state->comp_ps_offset = 2560;
+ evergreen_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
+
+ /* xv vs --------------------------------------- */
+ accel_state->xv_vs_offset = 3072;
+ evergreen_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
+
+ /* xv ps --------------------------------------- */
+ accel_state->xv_ps_offset = 3584;
+ evergreen_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
+#endif
+ radeon_bo_unmap(accel_state->shaders_bo);
+
+ return true;
+}
diff --git a/evergreen_ops.c b/evergreen_ops.c
new file mode 100644
index 0000000..3235dab
--- /dev/null
+++ b/evergreen_ops.c
@@ -0,0 +1,153 @@
+
+#include "radeondemo.h"
+#include "evergreen_reg.h"
+#include "evergreen_state.h"
+
+void do_solid_fill_prepare(struct radeon *radeon, struct r600_accel_object *obj, int fg)
+{
+ struct radeon_accel_state *accel_state = &radeon->accel_state;
+ cb_config_t cb_conf;
+ shader_config_t vs_conf, ps_conf;
+ uint32_t a, r, g, b;
+ const_config_t ps_const_conf;
+ float *ps_alu_consts;
+ int ret;
+
+ CLEAR (cb_conf);
+ CLEAR (vs_conf);
+ CLEAR (ps_conf);
+ CLEAR (ps_const_conf);
+
+ accel_state->dst_obj = *obj;
+ memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object));
+ memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object));
+
+ radeon_cs_space_reset_bos(radeon->cs);
+ radeon_cs_space_add_persistent_bo(radeon->cs, accel_state->shaders_bo,
+ RADEON_GEM_DOMAIN_VRAM, 0);
+ radeon_cs_space_add_persistent_bo(radeon->cs, accel_state->dst_obj.bo,
+ 0, accel_state->dst_obj.domain);
+ ret = radeon_cs_space_check(radeon->cs);
+ if (ret) {
+ fprintf(stderr,"fail\n");
+ exit(-1);
+ }
+
+ radeon_vbo_check(radeon, &accel_state->vbo, 16);
+ radeon_vbo_check(radeon, &accel_state->cbuf, 256);
+ radeon_cp_start(radeon);
+
+ evergreen_set_default_state(radeon);
+
+ evergreen_set_generic_scissor(radeon, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ evergreen_set_screen_scissor(radeon, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ evergreen_set_window_scissor(radeon, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+
+ /* Shader */
+ vs_conf.shader_addr = accel_state->solid_vs_offset;
+ vs_conf.shader_size = 512;
+ vs_conf.num_gprs = 2;
+ vs_conf.stack_size = 0;
+ vs_conf.bo = accel_state->shaders_bo;
+ evergreen_vs_setup(radeon, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
+
+ ps_conf.shader_addr = accel_state->solid_ps_offset;
+ ps_conf.shader_size = 512;
+ ps_conf.num_gprs = 1;
+ ps_conf.stack_size = 0;
+ ps_conf.clamp_consts = 0;
+ ps_conf.export_mode = 2;
+ ps_conf.bo = accel_state->shaders_bo;
+ evergreen_ps_setup(radeon, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
+
+ cb_conf.id = 0;
+ cb_conf.w = accel_state->dst_obj.pitch;
+ cb_conf.h = accel_state->dst_obj.height;
+ cb_conf.base = accel_state->dst_obj.offset;
+ cb_conf.bo = accel_state->dst_obj.bo;
+
+ if (accel_state->dst_obj.bpp == 8) {
+ cb_conf.format = COLOR_8;
+ cb_conf.comp_swap = 3; /* A */
+ } else if (accel_state->dst_obj.bpp == 16) {
+ cb_conf.format = COLOR_5_6_5;
+ cb_conf.comp_swap = 2; /* RGB */
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ cb_conf.endian = ENDIAN_8IN16;
+#endif
+ } else {
+ cb_conf.format = COLOR_8_8_8_8;
+ cb_conf.comp_swap = 1; /* ARGB */
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ cb_conf.endian = ENDIAN_8IN32;
+#endif
+ }
+ cb_conf.source_format = EXPORT_4C_16BPC;
+ cb_conf.blend_clamp = 1;
+ /* Render setup */
+ cb_conf.pmask |= 4; /* B */
+ cb_conf.pmask |= 2; /* G */
+ cb_conf.pmask |= 1; /* R */
+ cb_conf.pmask |= 8; /* A */
+ cb_conf.rop = RADEON_ROP3_P;
+ if (accel_state->dst_obj.tiling_flags == 0) {
+ cb_conf.array_mode = 1;
+ cb_conf.non_disp_tiling = 1;
+ }
+ evergreen_set_render_target(radeon, &cb_conf, accel_state->dst_obj.domain);
+
+ evergreen_set_spi(radeon, 0, 0);
+
+ /* PS alu constants */
+ ps_const_conf.size_bytes = 256;
+ ps_const_conf.type = SHADER_TYPE_PS;
+ ps_alu_consts = radeon_vbo_space(radeon, &accel_state->cbuf, 256);
+ ps_const_conf.bo = accel_state->cbuf.vb_bo;
+ ps_const_conf.const_addr = accel_state->cbuf.vb_offset;
+ if (accel_state->dst_obj.bpp == 16) {
+ r = (fg >> 11) & 0x1f;
+ g = (fg >> 5) & 0x3f;
+ b = (fg >> 0) & 0x1f;
+ ps_alu_consts[0] = (float)r / 31; /* R */
+ ps_alu_consts[1] = (float)g / 63; /* G */
+ ps_alu_consts[2] = (float)b / 31; /* B */
+ ps_alu_consts[3] = 1.0; /* A */
+ } else if (accel_state->dst_obj.bpp == 8) {
+ a = (fg >> 0) & 0xff;
+ ps_alu_consts[0] = 0.0; /* R */
+ ps_alu_consts[1] = 0.0; /* G */
+ ps_alu_consts[2] = 0.0; /* B */
+ ps_alu_consts[3] = (float)a / 255; /* A */
+ } else {
+ a = (fg >> 24) & 0xff;
+ r = (fg >> 16) & 0xff;
+ g = (fg >> 8) & 0xff;
+ b = (fg >> 0) & 0xff;
+ ps_alu_consts[0] = (float)r / 255; /* R */
+ ps_alu_consts[1] = (float)g / 255; /* G */
+ ps_alu_consts[2] = (float)b / 255; /* B */
+ ps_alu_consts[3] = (float)a / 255; /* A */
+ }
+ radeon_vbo_commit(radeon, &accel_state->cbuf);
+ evergreen_set_alu_consts(radeon, &ps_const_conf, RADEON_GEM_DOMAIN_GTT);
+}
+
+void evergreen_solid(struct radeon *radeon,
+ int x1, int y1, int x2, int y2)
+{
+ struct radeon_accel_state *accel_state = &radeon->accel_state;
+ float *vb;
+ vb = radeon_vbo_space(radeon, &accel_state->vbo, 8);
+
+ vb[0] = (float)x1;
+ vb[1] = (float)y1;
+
+ vb[2] = (float)x1;
+ vb[3] = (float)y2;
+
+ vb[4] = (float)x2;
+ vb[5] = (float)y2;
+
+ radeon_vbo_commit(radeon, &accel_state->vbo);
+}
+
diff --git a/evergreen_reg.h b/evergreen_reg.h
new file mode 100644
index 0000000..b08dbf9
--- /dev/null
+++ b/evergreen_reg.h
@@ -0,0 +1,250 @@
+/*
+ * Evergeen Register documentation
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EVERGREEN_REG_H_
+#define _EVERGREEN_REG_H_
+
+/*
+ * Register definitions
+ */
+
+#include "evergreen_reg_auto.h"
+
+enum {
+ SHADER_TYPE_PS,
+ SHADER_TYPE_VS,
+ SHADER_TYPE_GS,
+ SHADER_TYPE_HS,
+ SHADER_TYPE_LS,
+ SHADER_TYPE_CS,
+ SHADER_TYPE_FS,
+};
+
+
+/* SET_*_REG offsets + ends */
+enum {
+ SET_CONFIG_REG_offset = 0x00008000,
+ SET_CONFIG_REG_end = 0x0000ac00,
+ SET_CONTEXT_REG_offset = 0x00028000,
+ SET_CONTEXT_REG_end = 0x00029000,
+ SET_RESOURCE_offset = 0x00030000,
+ SET_RESOURCE_end = 0x00038000,
+ SET_SAMPLER_offset = 0x0003c000,
+ SET_SAMPLER_end = 0x0003c600,
+ SET_CTL_CONST_offset = 0x0003cff0,
+ SET_CTL_CONST_end = 0x0003ff0c,
+ SET_LOOP_CONST_offset = 0x0003a200,
+ SET_LOOP_CONST_end = 0x0003a500,
+ SET_BOOL_CONST_offset = 0x0003a500,
+ SET_BOOL_CONST_end = 0x0003a518,
+};
+
+/* Packet3 commands */
+enum {
+ IT_NOP = 0x10,
+ IT_INDIRECT_BUFFER_END = 0x17,
+ IT_SET_PREDICATION = 0x20,
+ IT_COND_EXEC = 0x22,
+ IT_PRED_EXEC = 0x23,
+ IT_DRAW_INDEX_2 = 0x27,
+ IT_CONTEXT_CONTROL = 0x28,
+ IT_DRAW_INDEX_OFFSET = 0x29,
+ IT_INDEX_TYPE = 0x2A,
+ IT_DRAW_INDEX = 0x2B,
+ IT_DRAW_INDEX_AUTO = 0x2D,
+ IT_DRAW_INDEX_IMMD = 0x2E,
+ IT_NUM_INSTANCES = 0x2F,
+ IT_INDIRECT_BUFFER = 0x32,
+ IT_STRMOUT_BUFFER_UPDATE = 0x34,
+ IT_MEM_SEMAPHORE = 0x39,
+ IT_MPEG_INDEX = 0x3A,
+ IT_WAIT_REG_MEM = 0x3C,
+ IT_MEM_WRITE = 0x3D,
+ IT_SURFACE_SYNC = 0x43,
+ IT_ME_INITIALIZE = 0x44,
+ IT_COND_WRITE = 0x45,
+ IT_EVENT_WRITE = 0x46,
+ IT_EVENT_WRITE_EOP = 0x47,
+ IT_EVENT_WRITE_EOS = 0x48,
+ IT_SET_CONFIG_REG = 0x68,
+ IT_SET_CONTEXT_REG = 0x69,
+ IT_SET_ALU_CONST = 0x6A,
+ IT_SET_BOOL_CONST = 0x6B,
+ IT_SET_LOOP_CONST = 0x6C,
+ IT_SET_RESOURCE = 0x6D,
+ IT_SET_SAMPLER = 0x6E,
+ IT_SET_CTL_CONST = 0x6F,
+};
+
+/* IT_WAIT_REG_MEM operation encoding */
+
+#define IT_WAIT_ALWAYS (0 << 0)
+#define IT_WAIT_LT (1 << 0)
+#define IT_WAIT_LE (2 << 0)
+#define IT_WAIT_EQ (3 << 0)
+#define IT_WAIT_NE (4 << 0)
+#define IT_WAIT_GE (5 << 0)
+#define IT_WAIT_GT (6 << 0)
+#define IT_WAIT_REG (0 << 4)
+#define IT_WAIT_MEM (1 << 4)
+
+#define IT_WAIT_ADDR(x) ((x) >> 2)
+
+/* IT_INDEX_TYPE */
+#define IT_INDEX_TYPE_SWAP_MODE(x) ((x) << 2)
+
+enum {
+
+ SQ_LDS_ALLOC_PS = 0x288ec,
+ SQ_DYN_GPR_RESOURCE_LIMIT_1 = 0x28838,
+ SQ_DYN_GPR_CNTL_PS_FLUSH_REQ = 0x8d8c,
+
+ WAIT_UNTIL = 0x8040,
+ WAIT_CP_DMA_IDLE_bit = 1 << 8,
+ WAIT_CMDFIFO_bit = 1 << 10,
+ WAIT_3D_IDLE_bit = 1 << 15,
+ WAIT_3D_IDLECLEAN_bit = 1 << 17,
+ WAIT_EXTERN_SIG_bit = 1 << 19,
+ CMDFIFO_ENTRIES_mask = 0xf << 20,
+ CMDFIFO_ENTRIES_shift = 20,
+
+ CP_COHER_CNTL = 0x85f0,
+ DEST_BASE_0_ENA_bit = 1 << 0,
+ DEST_BASE_1_ENA_bit = 1 << 1,
+ SO0_DEST_BASE_ENA_bit = 1 << 2,
+ SO1_DEST_BASE_ENA_bit = 1 << 3,
+ SO2_DEST_BASE_ENA_bit = 1 << 4,
+ SO3_DEST_BASE_ENA_bit = 1 << 5,
+ CB0_DEST_BASE_ENA_bit = 1 << 6,
+ CB1_DEST_BASE_ENA_bit = 1 << 7,
+ CB2_DEST_BASE_ENA_bit = 1 << 8,
+ CB3_DEST_BASE_ENA_bit = 1 << 9,
+ CB4_DEST_BASE_ENA_bit = 1 << 10,
+ CB5_DEST_BASE_ENA_bit = 1 << 11,
+ CB6_DEST_BASE_ENA_bit = 1 << 12,
+ CB7_DEST_BASE_ENA_bit = 1 << 13,
+ DB_DEST_BASE_ENA_bit = 1 << 14,
+ CB8_DEST_BASE_ENA_bit = 1 << 15,
+ CB9_DEST_BASE_ENA_bit = 1 << 16,
+ CB10_DEST_BASE_ENA_bit = 1 << 17,
+ CB11_DEST_BASE_ENA_bit = 1 << 18,
+ FULL_CACHE_ENA_bit = 1 << 20,
+ TC_ACTION_ENA_bit = 1 << 23,
+ VC_ACTION_ENA_bit = 1 << 24,
+ CB_ACTION_ENA_bit = 1 << 25,
+ DB_ACTION_ENA_bit = 1 << 26,
+ SH_ACTION_ENA_bit = 1 << 27,
+ SX_ACTION_ENA_bit = 1 << 28,
+ CP_COHER_SIZE = 0x85f4,
+ CP_COHER_BASE = 0x85f8,
+ CP_COHER_STATUS = 0x85fc,
+ MATCHING_GFX_CNTX_mask = 0xff << 0,
+ MATCHING_GFX_CNTX_shift = 0,
+ STATUS_bit = 1 << 31,
+
+// SQ_VTX_CONSTANT_WORD2_0 = 0x00030008,
+// SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20,
+ FMT_INVALID=0, FMT_8, FMT_4_4, FMT_3_3_2,
+ FMT_16=5, FMT_16_FLOAT, FMT_8_8,
+ FMT_5_6_5, FMT_6_5_5, FMT_1_5_5_5, FMT_4_4_4_4,
+ FMT_5_5_5_1, FMT_32, FMT_32_FLOAT, FMT_16_16,
+ FMT_16_16_FLOAT=16, FMT_8_24, FMT_8_24_FLOAT, FMT_24_8,
+ FMT_24_8_FLOAT, FMT_10_11_11, FMT_10_11_11_FLOAT, FMT_11_11_10,
+ FMT_11_11_10_FLOAT, FMT_2_10_10_10, FMT_8_8_8_8, FMT_10_10_10_2,
+ FMT_X24_8_32_FLOAT, FMT_32_32, FMT_32_32_FLOAT, FMT_16_16_16_16,
+ FMT_16_16_16_16_FLOAT=32, FMT_32_32_32_32=34, FMT_32_32_32_32_FLOAT,
+ FMT_1 = 37, FMT_GB_GR=39,
+ FMT_BG_RG, FMT_32_AS_8, FMT_32_AS_8_8, FMT_5_9_9_9_SHAREDEXP,
+ FMT_8_8_8, FMT_16_16_16, FMT_16_16_16_FLOAT, FMT_32_32_32,
+ FMT_32_32_32_FLOAT=48,
+
+// High level register file lengths
+ SQ_FETCH_RESOURCE = SQ_TEX_RESOURCE_WORD0_0,
+ SQ_FETCH_RESOURCE_ps_num = 176,
+ SQ_FETCH_RESOURCE_vs_num = 160,
+ SQ_FETCH_RESOURCE_gs_num = 160,
+ SQ_FETCH_RESOURCE_hs_num = 160,
+ SQ_FETCH_RESOURCE_ls_num = 160,
+ SQ_FETCH_RESOURCE_cs_num = 176,
+ SQ_FETCH_RESOURCE_fs_num = 32,
+ SQ_FETCH_RESOURCE_all_num = 1024,
+ SQ_FETCH_RESOURCE_offset = 32,
+ SQ_FETCH_RESOURCE_ps = 0, // 0...175
+ SQ_FETCH_RESOURCE_vs = SQ_FETCH_RESOURCE_ps + SQ_FETCH_RESOURCE_ps_num, // 176...335
+ SQ_FETCH_RESOURCE_gs = SQ_FETCH_RESOURCE_vs + SQ_FETCH_RESOURCE_vs_num, // 336...495
+ SQ_FETCH_RESOURCE_hs = SQ_FETCH_RESOURCE_gs + SQ_FETCH_RESOURCE_gs_num, // 496...655
+ SQ_FETCH_RESOURCE_ls = SQ_FETCH_RESOURCE_hs + SQ_FETCH_RESOURCE_hs_num, // 656...815
+ SQ_FETCH_RESOURCE_cs = SQ_FETCH_RESOURCE_ls + SQ_FETCH_RESOURCE_ls_num, // 816...991
+ SQ_FETCH_RESOURCE_fs = SQ_FETCH_RESOURCE_cs + SQ_FETCH_RESOURCE_cs_num, // 992...1023
+
+ SQ_TEX_SAMPLER_WORD = SQ_TEX_SAMPLER_WORD0_0,
+ SQ_TEX_SAMPLER_WORD_ps_num = 18,
+ SQ_TEX_SAMPLER_WORD_vs_num = 18,
+ SQ_TEX_SAMPLER_WORD_gs_num = 18,
+ SQ_TEX_SAMPLER_WORD_hs_num = 18,
+ SQ_TEX_SAMPLER_WORD_ls_num = 18,
+ SQ_TEX_SAMPLER_WORD_cs_num = 18,
+ SQ_TEX_SAMPLER_WORD_all_num = 108,
+ SQ_TEX_SAMPLER_WORD_offset = 12,
+ SQ_TEX_SAMPLER_WORD_ps = 0, // 0...17
+ SQ_TEX_SAMPLER_WORD_vs = SQ_TEX_SAMPLER_WORD_ps + SQ_TEX_SAMPLER_WORD_ps_num, // 18...35
+ SQ_TEX_SAMPLER_WORD_gs = SQ_TEX_SAMPLER_WORD_vs + SQ_TEX_SAMPLER_WORD_vs_num, // 36...53
+ SQ_TEX_SAMPLER_WORD_hs = SQ_TEX_SAMPLER_WORD_gs + SQ_TEX_SAMPLER_WORD_gs_num, // 54...71
+ SQ_TEX_SAMPLER_WORD_ls = SQ_TEX_SAMPLER_WORD_hs + SQ_TEX_SAMPLER_WORD_hs_num, // 72...89
+ SQ_TEX_SAMPLER_WORD_cs = SQ_TEX_SAMPLER_WORD_ls + SQ_TEX_SAMPLER_WORD_ls_num, // 90...107
+
+ SQ_LOOP_CONST = SQ_LOOP_CONST_0,
+ SQ_LOOP_CONST_ps_num = 32,
+ SQ_LOOP_CONST_vs_num = 32,
+ SQ_LOOP_CONST_gs_num = 32,
+ SQ_LOOP_CONST_hs_num = 32,
+ SQ_LOOP_CONST_ls_num = 32,
+ SQ_LOOP_CONST_cs_num = 32,
+ SQ_LOOP_CONST_all_num = 192,
+ SQ_LOOP_CONST_offset = 4,
+ SQ_LOOP_CONST_ps = 0, // 0...31
+ SQ_LOOP_CONST_vs = SQ_LOOP_CONST_ps + SQ_LOOP_CONST_ps_num, // 32...63
+ SQ_LOOP_CONST_gs = SQ_LOOP_CONST_vs + SQ_LOOP_CONST_vs_num, // 64...95
+ SQ_LOOP_CONST_hs = SQ_LOOP_CONST_gs + SQ_LOOP_CONST_gs_num, // 96...127
+ SQ_LOOP_CONST_ls = SQ_LOOP_CONST_hs + SQ_LOOP_CONST_hs_num, // 128...159
+ SQ_LOOP_CONST_cs = SQ_LOOP_CONST_ls + SQ_LOOP_CONST_ls_num, // 160...191
+
+ SQ_BOOL_CONST = SQ_BOOL_CONST_0, /* 32 bits each */
+ SQ_BOOL_CONST_ps_num = 1,
+ SQ_BOOL_CONST_vs_num = 1,
+ SQ_BOOL_CONST_gs_num = 1,
+ SQ_BOOL_CONST_hs_num = 1,
+ SQ_BOOL_CONST_ls_num = 1,
+ SQ_BOOL_CONST_cs_num = 1,
+ SQ_BOOL_CONST_all_num = 6,
+ SQ_BOOL_CONST_offset = 4,
+ SQ_BOOL_CONST_ps = 0,
+ SQ_BOOL_CONST_vs = SQ_BOOL_CONST_ps + SQ_BOOL_CONST_ps_num,
+ SQ_BOOL_CONST_gs = SQ_BOOL_CONST_vs + SQ_BOOL_CONST_vs_num,
+ SQ_BOOL_CONST_hs = SQ_BOOL_CONST_gs + SQ_BOOL_CONST_gs_num,
+ SQ_BOOL_CONST_ls = SQ_BOOL_CONST_hs + SQ_BOOL_CONST_hs_num,
+ SQ_BOOL_CONST_cs = SQ_BOOL_CONST_ls + SQ_BOOL_CONST_ls_num,
+
+};
+
+#endif
diff --git a/evergreen_reg_auto.h b/evergreen_reg_auto.h
new file mode 100644
index 0000000..5c61586
--- /dev/null
+++ b/evergreen_reg_auto.h
@@ -0,0 +1,4039 @@
+/*
+ * Evergreen Register documentation
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EVERGREEN_REG_AUTO_H
+#define _EVERGREEN_REG_AUTO_H
+
+enum {
+
+ VGT_VTX_VECT_EJECT_REG = 0x000088b0,
+ PRIM_COUNT_mask = 0x3ff << 0,
+ PRIM_COUNT_shift = 0,
+ VGT_LAST_COPY_STATE = 0x000088c0,
+ SRC_STATE_ID_mask = 0x07 << 0,
+ SRC_STATE_ID_shift = 0,
+ DST_STATE_ID_mask = 0x07 << 16,
+ DST_STATE_ID_shift = 16,
+ VGT_CACHE_INVALIDATION = 0x000088c4,
+ CACHE_INVALIDATION_mask = 0x03 << 0,
+ CACHE_INVALIDATION_shift = 0,
+ VC_ONLY = 0x00,
+ TC_ONLY = 0x01,
+ VC_AND_TC = 0x02,
+ VS_NO_EXTRA_BUFFER_bit = 1 << 5,
+ AUTO_INVLD_EN_mask = 0x03 << 6,
+ AUTO_INVLD_EN_shift = 6,
+ VGT_GS_VERTEX_REUSE = 0x000088d4,
+ VERT_REUSE_mask = 0x1f << 0,
+ VERT_REUSE_shift = 0,
+ VGT_CNTL_STATUS = 0x000088f0,
+ VGT_OUT_INDX_BUSY_bit = 1 << 0,
+ VGT_OUT_BUSY_bit = 1 << 1,
+ VGT_PT_BUSY_bit = 1 << 2,
+ VGT_TE_BUSY_bit = 1 << 3,
+ VGT_VR_BUSY_bit = 1 << 4,
+ VGT_GRP_BUSY_bit = 1 << 5,
+ VGT_DMA_REQ_BUSY_bit = 1 << 6,
+ VGT_DMA_BUSY_bit = 1 << 7,
+ VGT_GS_BUSY_bit = 1 << 8,
+ VGT_HS_BUSY_bit = 1 << 9,
+ VGT_TE11_BUSY_bit = 1 << 10,
+ VGT_BUSY_bit = 1 << 11,
+ VGT_PRIMITIVE_TYPE = 0x00008958,
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask = 0x3f << 0,
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift = 0,
+ DI_PT_NONE = 0x00,
+ DI_PT_POINTLIST = 0x01,
+ DI_PT_LINELIST = 0x02,
+ DI_PT_LINESTRIP = 0x03,
+ DI_PT_TRILIST = 0x04,
+ DI_PT_TRIFAN = 0x05,
+ DI_PT_TRISTRIP = 0x06,
+ DI_PT_UNUSED_0 = 0x07,
+ DI_PT_UNUSED_1 = 0x08,
+ DI_PT_PATCH = 0x09,
+ DI_PT_LINELIST_ADJ = 0x0a,
+ DI_PT_LINESTRIP_ADJ = 0x0b,
+ DI_PT_TRILIST_ADJ = 0x0c,
+ DI_PT_TRISTRIP_ADJ = 0x0d,
+ DI_PT_UNUSED_3 = 0x0e,
+ DI_PT_UNUSED_4 = 0x0f,
+ DI_PT_TRI_WITH_WFLAGS = 0x10,
+ DI_PT_RECTLIST = 0x11,
+ DI_PT_LINELOOP = 0x12,
+ DI_PT_QUADLIST = 0x13,
+ DI_PT_QUADSTRIP = 0x14,
+ DI_PT_POLYGON = 0x15,
+ DI_PT_2D_COPY_RECT_LIST_V0 = 0x16,
+ DI_PT_2D_COPY_RECT_LIST_V1 = 0x17,
+ DI_PT_2D_COPY_RECT_LIST_V2 = 0x18,
+ DI_PT_2D_COPY_RECT_LIST_V3 = 0x19,
+ DI_PT_2D_FILL_RECT_LIST = 0x1a,
+ DI_PT_2D_LINE_STRIP = 0x1b,
+ DI_PT_2D_TRI_STRIP = 0x1c,
+ VGT_INDEX_TYPE = 0x0000895c,
+ INDEX_TYPE_mask = 0x03 << 0,
+ INDEX_TYPE_shift = 0,
+ DI_INDEX_SIZE_16_BIT = 0x00,
+ DI_INDEX_SIZE_32_BIT = 0x01,
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_0 = 0x00008960,
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_1 = 0x00008964,
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_2 = 0x00008968,
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_3 = 0x0000896c,
+ VGT_NUM_INDICES = 0x00008970,
+ VGT_NUM_INSTANCES = 0x00008974,
+ PA_CL_CNTL_STATUS = 0x00008a10,
+ CL_BUSY_bit = 1 << 31,
+ PA_CL_ENHANCE = 0x00008a14,
+ CLIP_VTX_REORDER_ENA_bit = 1 << 0,
+ NUM_CLIP_SEQ_mask = 0x03 << 1,
+ NUM_CLIP_SEQ_shift = 1,
+ CLIPPED_PRIM_SEQ_STALL_bit = 1 << 3,
+ VE_NAN_PROC_DISABLE_bit = 1 << 4,
+ PA_SU_CNTL_STATUS = 0x00008a50,
+ SU_BUSY_bit = 1 << 31,
+ PA_SU_LINE_STIPPLE_VALUE = 0x00008a60,
+ LINE_STIPPLE_VALUE_mask = 0xffffff << 0,
+ LINE_STIPPLE_VALUE_shift = 0,
+ PA_SC_LINE_STIPPLE_STATE = 0x00008b10,
+ CURRENT_PTR_mask = 0x0f << 0,
+ CURRENT_PTR_shift = 0,
+ CURRENT_COUNT_mask = 0xff << 8,
+ CURRENT_COUNT_shift = 8,
+ SQ_CONFIG = 0x00008c00,
+ VC_ENABLE_bit = 1 << 0,
+ EXPORT_SRC_C_bit = 1 << 1,
+ CS_PRIO_mask = 0x03 << 18,
+ CS_PRIO_shift = 18,
+ LS_PRIO_mask = 0x03 << 20,
+ LS_PRIO_shift = 20,
+ HS_PRIO_mask = 0x03 << 22,
+ HS_PRIO_shift = 22,
+ PS_PRIO_mask = 0x03 << 24,
+ PS_PRIO_shift = 24,
+ VS_PRIO_mask = 0x03 << 26,
+ VS_PRIO_shift = 26,
+ GS_PRIO_mask = 0x03 << 28,
+ GS_PRIO_shift = 28,
+ ES_PRIO_mask = 0x03 << 30,
+ ES_PRIO_shift = 30,
+ SQ_GPR_RESOURCE_MGMT_1 = 0x00008c04,
+ NUM_PS_GPRS_mask = 0xff << 0,
+ NUM_PS_GPRS_shift = 0,
+ NUM_VS_GPRS_mask = 0xff << 16,
+ NUM_VS_GPRS_shift = 16,
+ NUM_CLAUSE_TEMP_GPRS_mask = 0x0f << 28,
+ NUM_CLAUSE_TEMP_GPRS_shift = 28,
+ SQ_GPR_RESOURCE_MGMT_2 = 0x00008c08,
+ NUM_GS_GPRS_mask = 0xff << 0,
+ NUM_GS_GPRS_shift = 0,
+ NUM_ES_GPRS_mask = 0xff << 16,
+ NUM_ES_GPRS_shift = 16,
+ SQ_GPR_RESOURCE_MGMT_3 = 0x00008c0c,
+ NUM_HS_GPRS_mask = 0xff << 0,
+ NUM_HS_GPRS_shift = 0,
+ NUM_LS_GPRS_mask = 0xff << 16,
+ NUM_LS_GPRS_shift = 16,
+ SQ_GLOBAL_GPR_RESOURCE_MGMT_1 = 0x00008c10,
+ PS_GGPR_BASE_mask = 0xff << 0,
+ PS_GGPR_BASE_shift = 0,
+ VS_GGPR_BASE_mask = 0xff << 8,
+ VS_GGPR_BASE_shift = 8,
+ GS_GGPR_BASE_mask = 0xff << 16,
+ GS_GGPR_BASE_shift = 16,
+ ES_GGPR_BASE_mask = 0xff << 24,
+ ES_GGPR_BASE_shift = 24,
+ SQ_GLOBAL_GPR_RESOURCE_MGMT_2 = 0x00008c14,
+ HS_GGPR_BASE_mask = 0xff << 0,
+ HS_GGPR_BASE_shift = 0,
+ LS_GGPR_BASE_mask = 0xff << 8,
+ LS_GGPR_BASE_shift = 8,
+ CS_GGPR_BASE_mask = 0xff << 16,
+ CS_GGPR_BASE_shift = 16,
+ SQ_THREAD_RESOURCE_MGMT = 0x00008c18,
+ NUM_PS_THREADS_mask = 0xff << 0,
+ NUM_PS_THREADS_shift = 0,
+ NUM_VS_THREADS_mask = 0xff << 8,
+ NUM_VS_THREADS_shift = 8,
+ NUM_GS_THREADS_mask = 0xff << 16,
+ NUM_GS_THREADS_shift = 16,
+ NUM_ES_THREADS_mask = 0xff << 24,
+ NUM_ES_THREADS_shift = 24,
+ SQ_THREAD_RESOURCE_MGMT_2 = 0x00008c1c,
+ NUM_HS_THREADS_mask = 0xff << 0,
+ NUM_HS_THREADS_shift = 0,
+ NUM_LS_THREADS_mask = 0xff << 8,
+ NUM_LS_THREADS_shift = 8,
+ SQ_STACK_RESOURCE_MGMT_1 = 0x00008c20,
+ NUM_PS_STACK_ENTRIES_mask = 0xfff << 0,
+ NUM_PS_STACK_ENTRIES_shift = 0,
+ NUM_VS_STACK_ENTRIES_mask = 0xfff << 16,
+ NUM_VS_STACK_ENTRIES_shift = 16,
+ SQ_STACK_RESOURCE_MGMT_2 = 0x00008c24,
+ NUM_GS_STACK_ENTRIES_mask = 0xfff << 0,
+ NUM_GS_STACK_ENTRIES_shift = 0,
+ NUM_ES_STACK_ENTRIES_mask = 0xfff << 16,
+ NUM_ES_STACK_ENTRIES_shift = 16,
+ SQ_STACK_RESOURCE_MGMT_3 = 0x00008c28,
+ NUM_HS_STACK_ENTRIES_mask = 0xfff << 0,
+ NUM_HS_STACK_ENTRIES_shift = 0,
+ NUM_LS_STACK_ENTRIES_mask = 0xfff << 16,
+ NUM_LS_STACK_ENTRIES_shift = 16,
+ SQ_ESGS_RING_BASE = 0x00008c40,
+ SQ_ESGS_RING_SIZE = 0x00008c44,
+ SQ_GSVS_RING_BASE = 0x00008c48,
+ SQ_GSVS_RING_SIZE = 0x00008c4c,
+ SQ_ESTMP_RING_BASE = 0x00008c50,
+ SQ_ESTMP_RING_SIZE = 0x00008c54,
+ SQ_GSTMP_RING_BASE = 0x00008c58,
+ SQ_GSTMP_RING_SIZE = 0x00008c5c,
+ SQ_VSTMP_RING_BASE = 0x00008c60,
+ SQ_VSTMP_RING_SIZE = 0x00008c64,
+ SQ_PSTMP_RING_BASE = 0x00008c68,
+ SQ_PSTMP_RING_SIZE = 0x00008c6c,
+ SQ_CONST_MEM_BASE = 0x00008df8,
+ SQ_ALU_WORD1_OP3 = 0x00008dfc,
+ SRC2_SEL_mask = 0x1ff << 0,
+ SRC2_SEL_shift = 0,
+ SQ_ALU_SRC_LDS_OQ_A = 0xdb,
+ SQ_ALU_SRC_LDS_OQ_B = 0xdc,
+ SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd,
+ SQ_ALU_SRC_LDS_OQ_B_POP = 0xde,
+ SQ_ALU_SRC_LDS_DIRECT_A = 0xdf,
+ SQ_ALU_SRC_LDS_DIRECT_B = 0xe0,
+ SQ_ALU_SRC_TIME_HI = 0xe3,
+ SQ_ALU_SRC_TIME_LO = 0xe4,
+ SQ_ALU_SRC_MASK_HI = 0xe5,
+ SQ_ALU_SRC_MASK_LO = 0xe6,
+ SQ_ALU_SRC_HW_WAVE_ID = 0xe7,
+ SQ_ALU_SRC_SIMD_ID = 0xe8,
+ SQ_ALU_SRC_SE_ID = 0xe9,
+ SQ_ALU_SRC_HW_THREADGRP_ID = 0xea,
+ SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb,
+ SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec,
+ SQ_ALU_SRC_HW_ALU_ODD = 0xed,
+ SQ_ALU_SRC_LOOP_IDX = 0xee,
+ SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0,
+ SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1,
+ SQ_ALU_SRC_PRIM_MASK_HI = 0xf2,
+ SQ_ALU_SRC_PRIM_MASK_LO = 0xf3,
+ SQ_ALU_SRC_1_DBL_L = 0xf4,
+ SQ_ALU_SRC_1_DBL_M = 0xf5,
+ SQ_ALU_SRC_0_5_DBL_L = 0xf6,
+ SQ_ALU_SRC_0_5_DBL_M = 0xf7,
+ SQ_ALU_SRC_0 = 0xf8,
+ SQ_ALU_SRC_1 = 0xf9,
+ SQ_ALU_SRC_1_INT = 0xfa,
+ SQ_ALU_SRC_M_1_INT = 0xfb,
+ SQ_ALU_SRC_0_5 = 0xfc,
+ SQ_ALU_SRC_LITERAL = 0xfd,
+ SQ_ALU_SRC_PV = 0xfe,
+ SQ_ALU_SRC_PS = 0xff,
+ SRC2_REL_bit = 1 << 9,
+ SRC2_CHAN_mask = 0x03 << 10,
+ SRC2_CHAN_shift = 10,
+ SQ_CHAN_X = 0x00,
+ SQ_CHAN_Y = 0x01,
+ SQ_CHAN_Z = 0x02,
+ SQ_CHAN_W = 0x03,
+ SRC2_NEG_bit = 1 << 12,
+ SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13,
+ SQ_ALU_WORD1_OP3__ALU_INST_shift = 13,
+ SQ_OP3_INST_BFE_UINT = 0x04,
+ SQ_OP3_INST_BFE_INT = 0x05,
+ SQ_OP3_INST_BFI_INT = 0x06,
+ SQ_OP3_INST_FMA = 0x07,
+ SQ_OP3_INST_CNDNE_64 = 0x09,
+ SQ_OP3_INST_FMA_64 = 0x0a,
+ SQ_OP3_INST_LERP_UINT = 0x0b,
+ SQ_OP3_INST_BIT_ALIGN_INT = 0x0c,
+ SQ_OP3_INST_BYTE_ALIGN_INT = 0x0d,
+ SQ_OP3_INST_SAD_ACCUM_UINT = 0x0e,
+ SQ_OP3_INST_SAD_ACCUM_HI_UINT = 0x0f,
+ SQ_OP3_INST_MULADD_UINT24 = 0x10,
+ SQ_OP3_INST_LDS_IDX_OP = 0x11,
+ SQ_OP3_INST_MULADD = 0x14,
+ SQ_OP3_INST_MULADD_M2 = 0x15,
+ SQ_OP3_INST_MULADD_M4 = 0x16,
+ SQ_OP3_INST_MULADD_D2 = 0x17,
+ SQ_OP3_INST_MULADD_IEEE = 0x18,
+ SQ_OP3_INST_CNDE = 0x19,
+ SQ_OP3_INST_CNDGT = 0x1a,
+ SQ_OP3_INST_CNDGE = 0x1b,
+ SQ_OP3_INST_CNDE_INT = 0x1c,
+ SQ_OP3_INST_CNDGT_INT = 0x1d,
+ SQ_OP3_INST_CNDGE_INT = 0x1e,
+ SQ_OP3_INST_MUL_LIT = 0x1f,
+ SQ_ALU_WORD1_LDS_DIRECT_LITERAL_LO = 0x00008dfc,
+ OFFSET_A_mask = 0x1fff << 0,
+ OFFSET_A_shift = 0,
+ STRIDE_A_mask = 0x7f << 13,
+ STRIDE_A_shift = 13,
+ THREAD_REL_A_bit = 1 << 22,
+ SQ_TEX_WORD2 = 0x00008dfc,
+ OFFSET_X_mask = 0x1f << 0,
+ OFFSET_X_shift = 0,
+ OFFSET_Y_mask = 0x1f << 5,
+ OFFSET_Y_shift = 5,
+ OFFSET_Z_mask = 0x1f << 10,
+ OFFSET_Z_shift = 10,
+ SAMPLER_ID_mask = 0x1f << 15,
+ SAMPLER_ID_shift = 15,
+ SQ_TEX_WORD2__SRC_SEL_X_mask = 0x07 << 20,
+ SQ_TEX_WORD2__SRC_SEL_X_shift = 20,
+ SQ_SEL_X = 0x00,
+ SQ_SEL_Y = 0x01,
+ SQ_SEL_Z = 0x02,
+ SQ_SEL_W = 0x03,
+ SQ_SEL_0 = 0x04,
+ SQ_SEL_1 = 0x05,
+ SRC_SEL_Y_mask = 0x07 << 23,
+ SRC_SEL_Y_shift = 23,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SRC_SEL_Z_mask = 0x07 << 26,
+ SRC_SEL_Z_shift = 26,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SRC_SEL_W_mask = 0x07 << 29,
+ SRC_SEL_W_shift = 29,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc,
+ BURST_COUNT_mask = 0x0f << 16,
+ BURST_COUNT_shift = 16,
+ VALID_PIXEL_MODE_bit = 1 << 20,
+ END_OF_PROGRAM_bit = 1 << 21,
+ SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0xff << 22,
+ SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_shift = 22,
+ SQ_CF_INST_MEM_STREAM0_BUF0 = 0x40,
+ SQ_CF_INST_MEM_STREAM0_BUF1 = 0x41,
+ SQ_CF_INST_MEM_STREAM0_BUF2 = 0x42,
+ SQ_CF_INST_MEM_STREAM0_BUF3 = 0x43,
+ SQ_CF_INST_MEM_STREAM1_BUF0 = 0x44,
+ SQ_CF_INST_MEM_STREAM1_BUF1 = 0x45,
+ SQ_CF_INST_MEM_STREAM1_BUF2 = 0x46,
+ SQ_CF_INST_MEM_STREAM1_BUF3 = 0x47,
+ SQ_CF_INST_MEM_STREAM2_BUF0 = 0x48,
+ SQ_CF_INST_MEM_STREAM2_BUF1 = 0x49,
+ SQ_CF_INST_MEM_STREAM2_BUF2 = 0x4a,
+ SQ_CF_INST_MEM_STREAM2_BUF3 = 0x4b,
+ SQ_CF_INST_MEM_STREAM3_BUF0 = 0x4c,
+ SQ_CF_INST_MEM_STREAM3_BUF1 = 0x4d,
+ SQ_CF_INST_MEM_STREAM3_BUF2 = 0x4e,
+ SQ_CF_INST_MEM_STREAM3_BUF3 = 0x4f,
+ SQ_CF_INST_MEM_SCRATCH = 0x50,
+ SQ_CF_INST_MEM_RING = 0x52,
+ SQ_CF_INST_EXPORT = 0x53,
+ SQ_CF_INST_EXPORT_DONE = 0x54,
+ SQ_CF_INST_MEM_EXPORT = 0x55,
+ SQ_CF_INST_MEM_RAT = 0x56,
+ SQ_CF_INST_MEM_RAT_CACHELESS = 0x57,
+ SQ_CF_INST_MEM_RING1 = 0x58,
+ SQ_CF_INST_MEM_RING2 = 0x59,
+ SQ_CF_INST_MEM_RING3 = 0x5a,
+ SQ_CF_INST_MEM_EXPORT_COMBINED = 0x5b,
+ SQ_CF_INST_MEM_RAT_COMBINED_CACHELESS = 0x5c,
+ MARK_bit = 1 << 30,
+ BARRIER_bit = 1 << 31,
+ SQ_CF_ALU_WORD1 = 0x00008dfc,
+ KCACHE_MODE1_mask = 0x03 << 0,
+ KCACHE_MODE1_shift = 0,
+ SQ_CF_KCACHE_NOP = 0x00,
+ SQ_CF_KCACHE_LOCK_1 = 0x01,
+ SQ_CF_KCACHE_LOCK_2 = 0x02,
+ SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03,
+ KCACHE_ADDR0_mask = 0xff << 2,
+ KCACHE_ADDR0_shift = 2,
+ KCACHE_ADDR1_mask = 0xff << 10,
+ KCACHE_ADDR1_shift = 10,
+ SQ_CF_ALU_WORD1__COUNT_mask = 0x7f << 18,
+ SQ_CF_ALU_WORD1__COUNT_shift = 18,
+ SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25,
+ SQ_CF_ALU_WORD1__CF_INST_mask = 0x0f << 26,
+ SQ_CF_ALU_WORD1__CF_INST_shift = 26,
+ SQ_CF_INST_ALU = 0x08,
+ SQ_CF_INST_ALU_PUSH_BEFORE = 0x09,
+ SQ_CF_INST_ALU_POP_AFTER = 0x0a,
+ SQ_CF_INST_ALU_POP2_AFTER = 0x0b,
+ SQ_CF_INST_ALU_EXTENDED = 0x0c,
+ SQ_CF_INST_ALU_CONTINUE = 0x0d,
+ SQ_CF_INST_ALU_BREAK = 0x0e,
+ SQ_CF_INST_ALU_ELSE_AFTER = 0x0f,
+ WHOLE_QUAD_MODE_bit = 1 << 30,
+/* BARRIER_bit = 1 << 31, */
+ SQ_TEX_WORD1 = 0x00008dfc,
+ SQ_TEX_WORD1__DST_GPR_mask = 0x7f << 0,
+ SQ_TEX_WORD1__DST_GPR_shift = 0,
+ SQ_TEX_WORD1__DST_REL_bit = 1 << 7,
+ SQ_TEX_WORD1__DST_SEL_X_mask = 0x07 << 9,
+ SQ_TEX_WORD1__DST_SEL_X_shift = 9,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_SEL_MASK = 0x07,
+ SQ_TEX_WORD1__DST_SEL_Y_mask = 0x07 << 12,
+ SQ_TEX_WORD1__DST_SEL_Y_shift = 12,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_TEX_WORD1__DST_SEL_Z_mask = 0x07 << 15,
+ SQ_TEX_WORD1__DST_SEL_Z_shift = 15,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_TEX_WORD1__DST_SEL_W_mask = 0x07 << 18,
+ SQ_TEX_WORD1__DST_SEL_W_shift = 18,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_TEX_WORD1__LOD_BIAS_mask = 0x7f << 21,
+ SQ_TEX_WORD1__LOD_BIAS_shift = 21,
+ COORD_TYPE_X_bit = 1 << 28,
+ COORD_TYPE_Y_bit = 1 << 29,
+ COORD_TYPE_Z_bit = 1 << 30,
+ COORD_TYPE_W_bit = 1 << 31,
+ SQ_VTX_WORD0 = 0x00008dfc,
+ VTX_INST_mask = 0x1f << 0,
+ VTX_INST_shift = 0,
+ SQ_VTX_INST_FETCH = 0x00,
+ SQ_VTX_INST_SEMANTIC = 0x01,
+ SQ_VTX_INST_GET_BUFFER_RESINFO = 0x0e,
+ FETCH_TYPE_mask = 0x03 << 5,
+ FETCH_TYPE_shift = 5,
+ SQ_VTX_FETCH_VERTEX_DATA = 0x00,
+ SQ_VTX_FETCH_INSTANCE_DATA = 0x01,
+ SQ_VTX_FETCH_NO_INDEX_OFFSET = 0x02,
+ FETCH_WHOLE_QUAD_bit = 1 << 7,
+ BUFFER_ID_mask = 0xff << 8,
+ BUFFER_ID_shift = 8,
+ SQ_VTX_WORD0__SRC_GPR_mask = 0x7f << 16,
+ SQ_VTX_WORD0__SRC_GPR_shift = 16,
+ SRC_REL_bit = 1 << 23,
+ SQ_VTX_WORD0__SRC_SEL_X_mask = 0x03 << 24,
+ SQ_VTX_WORD0__SRC_SEL_X_shift = 24,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+ MEGA_FETCH_COUNT_mask = 0x3f << 26,
+ MEGA_FETCH_COUNT_shift = 26,
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ = 0x00008dfc,
+ SEL_X_mask = 0x07 << 0,
+ SEL_X_shift = 0,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SEL_Y_mask = 0x07 << 3,
+ SEL_Y_shift = 3,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SEL_Z_mask = 0x07 << 6,
+ SEL_Z_shift = 6,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SEL_W_mask = 0x07 << 9,
+ SEL_W_shift = 9,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_MEM_RD_WORD0 = 0x00008dfc,
+ MEM_INST_mask = 0x1f << 0,
+ MEM_INST_shift = 0,
+ SQ_MEM_INST_MEM = 0x02,
+ SQ_MEM_RD_WORD0__ELEM_SIZE_mask = 0x03 << 5,
+ SQ_MEM_RD_WORD0__ELEM_SIZE_shift = 5,
+/* FETCH_WHOLE_QUAD_bit = 1 << 7, */
+ MEM_OP_mask = 0x07 << 8,
+ MEM_OP_shift = 8,
+ SQ_MEM_OP_RD_SCRATCH = 0x00,
+ SQ_MEM_OP_RD_SCATTER = 0x02,
+ SQ_MEM_OP_GDS = 0x04,
+ SQ_MEM_OP_TF_WRITE = 0x05,
+ SQ_MEM_RD_WORD0__UNCACHED_bit = 1 << 11,
+ INDEXED_bit = 1 << 12,
+ SQ_MEM_RD_WORD0__SRC_GPR_mask = 0x7f << 16,
+ SQ_MEM_RD_WORD0__SRC_GPR_shift = 16,
+/* SRC_REL_bit = 1 << 23, */
+ SQ_MEM_RD_WORD0__SRC_SEL_X_mask = 0x03 << 24,
+ SQ_MEM_RD_WORD0__SRC_SEL_X_shift = 24,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+ BURST_CNT_mask = 0x0f << 26,
+ BURST_CNT_shift = 26,
+ SQ_ALU_WORD1 = 0x00008dfc,
+ SQ_ALU_WORD1__ENCODING_mask = 0x07 << 15,
+ SQ_ALU_WORD1__ENCODING_shift = 15,
+ BANK_SWIZZLE_mask = 0x07 << 18,
+ BANK_SWIZZLE_shift = 18,
+ SQ_ALU_VEC_012 = 0x00,
+ SQ_ALU_VEC_021 = 0x01,
+ SQ_ALU_VEC_120 = 0x02,
+ SQ_ALU_VEC_102 = 0x03,
+ SQ_ALU_VEC_201 = 0x04,
+ SQ_ALU_VEC_210 = 0x05,
+ SQ_ALU_WORD1__DST_GPR_mask = 0x7f << 21,
+ SQ_ALU_WORD1__DST_GPR_shift = 21,
+ SQ_ALU_WORD1__DST_REL_bit = 1 << 28,
+ DST_CHAN_mask = 0x03 << 29,
+ DST_CHAN_shift = 29,
+ CHAN_X = 0x00,
+ CHAN_Y = 0x01,
+ CHAN_Z = 0x02,
+ CHAN_W = 0x03,
+ SQ_ALU_WORD1__CLAMP_bit = 1 << 31,
+ SQ_CF_ALU_WORD0_EXT = 0x00008dfc,
+ KCACHE_BANK_INDEX_MODE0_mask = 0x03 << 4,
+ KCACHE_BANK_INDEX_MODE0_shift = 4,
+ SQ_CF_INDEX_NONE = 0x00,
+ SQ_CF_INDEX_0 = 0x01,
+ SQ_CF_INDEX_1 = 0x02,
+ SQ_CF_INVALID = 0x03,
+ KCACHE_BANK_INDEX_MODE1_mask = 0x03 << 6,
+ KCACHE_BANK_INDEX_MODE1_shift = 6,
+/* SQ_CF_INDEX_NONE = 0x00, */
+/* SQ_CF_INDEX_0 = 0x01, */
+/* SQ_CF_INDEX_1 = 0x02, */
+/* SQ_CF_INVALID = 0x03, */
+ KCACHE_BANK_INDEX_MODE2_mask = 0x03 << 8,
+ KCACHE_BANK_INDEX_MODE2_shift = 8,
+/* SQ_CF_INDEX_NONE = 0x00, */
+/* SQ_CF_INDEX_0 = 0x01, */
+/* SQ_CF_INDEX_1 = 0x02, */
+/* SQ_CF_INVALID = 0x03, */
+ KCACHE_BANK_INDEX_MODE3_mask = 0x03 << 10,
+ KCACHE_BANK_INDEX_MODE3_shift = 10,
+/* SQ_CF_INDEX_NONE = 0x00, */
+/* SQ_CF_INDEX_0 = 0x01, */
+/* SQ_CF_INDEX_1 = 0x02, */
+/* SQ_CF_INVALID = 0x03, */
+ KCACHE_BANK2_mask = 0x0f << 22,
+ KCACHE_BANK2_shift = 22,
+ KCACHE_BANK3_mask = 0x0f << 26,
+ KCACHE_BANK3_shift = 26,
+ KCACHE_MODE2_mask = 0x03 << 30,
+ KCACHE_MODE2_shift = 30,
+/* SQ_CF_KCACHE_NOP = 0x00, */
+/* SQ_CF_KCACHE_LOCK_1 = 0x01, */
+/* SQ_CF_KCACHE_LOCK_2 = 0x02, */
+/* SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, */
+ SQ_ALU_WORD0_LDS_IDX_OP = 0x00008dfc,
+ SRC0_SEL_mask = 0x1ff << 0,
+ SRC0_SEL_shift = 0,
+/* SQ_ALU_SRC_LDS_OQ_A = 0xdb, */
+/* SQ_ALU_SRC_LDS_OQ_B = 0xdc, */
+/* SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, */
+/* SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, */
+/* SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, */
+/* SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, */
+/* SQ_ALU_SRC_TIME_HI = 0xe3, */
+/* SQ_ALU_SRC_TIME_LO = 0xe4, */
+/* SQ_ALU_SRC_MASK_HI = 0xe5, */
+/* SQ_ALU_SRC_MASK_LO = 0xe6, */
+/* SQ_ALU_SRC_HW_WAVE_ID = 0xe7, */
+/* SQ_ALU_SRC_SIMD_ID = 0xe8, */
+/* SQ_ALU_SRC_SE_ID = 0xe9, */
+/* SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, */
+/* SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, */
+/* SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, */
+/* SQ_ALU_SRC_HW_ALU_ODD = 0xed, */
+/* SQ_ALU_SRC_LOOP_IDX = 0xee, */
+/* SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, */
+/* SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, */
+/* SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, */
+/* SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, */
+/* SQ_ALU_SRC_1_DBL_L = 0xf4, */
+/* SQ_ALU_SRC_1_DBL_M = 0xf5, */
+/* SQ_ALU_SRC_0_5_DBL_L = 0xf6, */
+/* SQ_ALU_SRC_0_5_DBL_M = 0xf7, */
+/* SQ_ALU_SRC_0 = 0xf8, */
+/* SQ_ALU_SRC_1 = 0xf9, */
+/* SQ_ALU_SRC_1_INT = 0xfa, */
+/* SQ_ALU_SRC_M_1_INT = 0xfb, */
+/* SQ_ALU_SRC_0_5 = 0xfc, */
+/* SQ_ALU_SRC_LITERAL = 0xfd, */
+/* SQ_ALU_SRC_PV = 0xfe, */
+/* SQ_ALU_SRC_PS = 0xff, */
+ SRC0_REL_bit = 1 << 9,
+ SRC0_CHAN_mask = 0x03 << 10,
+ SRC0_CHAN_shift = 10,
+/* SQ_CHAN_X = 0x00, */
+/* SQ_CHAN_Y = 0x01, */
+/* SQ_CHAN_Z = 0x02, */
+/* SQ_CHAN_W = 0x03, */
+ IDX_OFFSET_4_bit = 1 << 12,
+ SRC1_SEL_mask = 0x1ff << 13,
+ SRC1_SEL_shift = 13,
+/* SQ_ALU_SRC_LDS_OQ_A = 0xdb, */
+/* SQ_ALU_SRC_LDS_OQ_B = 0xdc, */
+/* SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, */
+/* SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, */
+/* SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, */
+/* SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, */
+/* SQ_ALU_SRC_TIME_HI = 0xe3, */
+/* SQ_ALU_SRC_TIME_LO = 0xe4, */
+/* SQ_ALU_SRC_MASK_HI = 0xe5, */
+/* SQ_ALU_SRC_MASK_LO = 0xe6, */
+/* SQ_ALU_SRC_HW_WAVE_ID = 0xe7, */
+/* SQ_ALU_SRC_SIMD_ID = 0xe8, */
+/* SQ_ALU_SRC_SE_ID = 0xe9, */
+/* SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, */
+/* SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, */
+/* SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, */
+/* SQ_ALU_SRC_HW_ALU_ODD = 0xed, */
+/* SQ_ALU_SRC_LOOP_IDX = 0xee, */
+/* SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, */
+/* SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, */
+/* SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, */
+/* SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, */
+/* SQ_ALU_SRC_1_DBL_L = 0xf4, */
+/* SQ_ALU_SRC_1_DBL_M = 0xf5, */
+/* SQ_ALU_SRC_0_5_DBL_L = 0xf6, */
+/* SQ_ALU_SRC_0_5_DBL_M = 0xf7, */
+/* SQ_ALU_SRC_0 = 0xf8, */
+/* SQ_ALU_SRC_1 = 0xf9, */
+/* SQ_ALU_SRC_1_INT = 0xfa, */
+/* SQ_ALU_SRC_M_1_INT = 0xfb, */
+/* SQ_ALU_SRC_0_5 = 0xfc, */
+/* SQ_ALU_SRC_LITERAL = 0xfd, */
+/* SQ_ALU_SRC_PV = 0xfe, */
+/* SQ_ALU_SRC_PS = 0xff, */
+ SRC1_REL_bit = 1 << 22,
+ SRC1_CHAN_mask = 0x03 << 23,
+ SRC1_CHAN_shift = 23,
+/* SQ_CHAN_X = 0x00, */
+/* SQ_CHAN_Y = 0x01, */
+/* SQ_CHAN_Z = 0x02, */
+/* SQ_CHAN_W = 0x03, */
+ IDX_OFFSET_5_bit = 1 << 25,
+ INDEX_MODE_mask = 0x07 << 26,
+ INDEX_MODE_shift = 26,
+ SQ_INDEX_AR_X = 0x00,
+ SQ_INDEX_LOOP = 0x04,
+ SQ_INDEX_GLOBAL = 0x05,
+ SQ_INDEX_GLOBAL_AR_X = 0x06,
+ PRED_SEL_mask = 0x03 << 29,
+ PRED_SEL_shift = 29,
+ SQ_PRED_SEL_OFF = 0x00,
+ SQ_PRED_SEL_ZERO = 0x02,
+ SQ_PRED_SEL_ONE = 0x03,
+ LAST_bit = 1 << 31,
+ SQ_MEM_GDS_WORD2 = 0x00008dfc,
+ SQ_MEM_GDS_WORD2__DST_SEL_X_mask = 0x07 << 0,
+ SQ_MEM_GDS_WORD2__DST_SEL_X_shift = 0,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_MEM_GDS_WORD2__DST_SEL_Y_mask = 0x07 << 3,
+ SQ_MEM_GDS_WORD2__DST_SEL_Y_shift = 3,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_MEM_GDS_WORD2__DST_SEL_Z_mask = 0x07 << 6,
+ SQ_MEM_GDS_WORD2__DST_SEL_Z_shift = 6,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_MEM_GDS_WORD2__DST_SEL_W_mask = 0x07 << 9,
+ SQ_MEM_GDS_WORD2__DST_SEL_W_shift = 9,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_CF_ALLOC_EXPORT_WORD0_RAT = 0x00008dfc,
+ RAT_ID_mask = 0x0f << 0,
+ RAT_ID_shift = 0,
+ RAT_INST_mask = 0x3f << 4,
+ RAT_INST_shift = 4,
+ SQ_EXPORT_RAT_INST_NOP = 0x00,
+ SQ_EXPORT_RAT_INST_STORE_TYPED = 0x01,
+ SQ_EXPORT_RAT_INST_STORE_RAW = 0x02,
+ SQ_EXPORT_RAT_INST_STORE_RAW_FDENORM = 0x03,
+ SQ_EXPORT_RAT_INST_CMPXCHG_INT = 0x04,
+ SQ_EXPORT_RAT_INST_CMPXCHG_FLT = 0x05,
+ SQ_EXPORT_RAT_INST_CMPXCHG_FDENORM = 0x06,
+ SQ_EXPORT_RAT_INST_ADD = 0x07,
+ SQ_EXPORT_RAT_INST_SUB = 0x08,
+ SQ_EXPORT_RAT_INST_RSUB = 0x09,
+ SQ_EXPORT_RAT_INST_MIN_INT = 0x0a,
+ SQ_EXPORT_RAT_INST_MIN_UINT = 0x0b,
+ SQ_EXPORT_RAT_INST_MAX_INT = 0x0c,
+ SQ_EXPORT_RAT_INST_MAX_UINT = 0x0d,
+ SQ_EXPORT_RAT_INST_AND = 0x0e,
+ SQ_EXPORT_RAT_INST_OR = 0x0f,
+ SQ_EXPORT_RAT_INST_XOR = 0x10,
+ SQ_EXPORT_RAT_INST_MSKOR = 0x11,
+ SQ_EXPORT_RAT_INST_INC_UINT = 0x12,
+ SQ_EXPORT_RAT_INST_DEC_UINT = 0x13,
+ SQ_EXPORT_RAT_INST_NOP_RTN = 0x20,
+ SQ_EXPORT_RAT_INST_XCHG_RTN = 0x22,
+ SQ_EXPORT_RAT_INST_XCHG_FDENORM_RTN = 0x23,
+ SQ_EXPORT_RAT_INST_CMPXCHG_INT_RTN = 0x24,
+ SQ_EXPORT_RAT_INST_CMPXCHG_FLT_RTN = 0x25,
+ SQ_EXPORT_RAT_INST_CMPXCHG_FDENORM_RTN = 0x26,
+ SQ_EXPORT_RAT_INST_ADD_RTN = 0x27,
+ SQ_EXPORT_RAT_INST_SUB_RTN = 0x28,
+ SQ_EXPORT_RAT_INST_RSUB_RTN = 0x29,
+ SQ_EXPORT_RAT_INST_MIN_INT_RTN = 0x2a,
+ SQ_EXPORT_RAT_INST_MIN_UINT_RTN = 0x2b,
+ SQ_EXPORT_RAT_INST_MAX_INT_RTN = 0x2c,
+ SQ_EXPORT_RAT_INST_MAX_UINT_RTN = 0x2d,
+ SQ_EXPORT_RAT_INST_AND_RTN = 0x2e,
+ SQ_EXPORT_RAT_INST_OR_RTN = 0x2f,
+ SQ_EXPORT_RAT_INST_XOR_RTN = 0x30,
+ SQ_EXPORT_RAT_INST_MSKOR_RTN = 0x31,
+ SQ_EXPORT_RAT_INST_INC_UINT_RTN = 0x32,
+ SQ_EXPORT_RAT_INST_DEC_UINT_RTN = 0x33,
+ RAT_INDEX_MODE_mask = 0x03 << 11,
+ RAT_INDEX_MODE_shift = 11,
+/* SQ_CF_INDEX_NONE = 0x00, */
+/* SQ_CF_INDEX_0 = 0x01, */
+/* SQ_CF_INDEX_1 = 0x02, */
+/* SQ_CF_INVALID = 0x03, */
+ SQ_CF_ALLOC_EXPORT_WORD0_RAT__TYPE_mask = 0x03 << 13,
+ SQ_CF_ALLOC_EXPORT_WORD0_RAT__TYPE_shift = 13,
+ SQ_EXPORT_PIXEL = 0x00,
+ SQ_EXPORT_POS = 0x01,
+ SQ_EXPORT_PARAM = 0x02,
+ X_UNUSED_FOR_SX_EXPORTS = 0x03,
+ RW_GPR_mask = 0x7f << 15,
+ RW_GPR_shift = 15,
+ RW_REL_bit = 1 << 22,
+ INDEX_GPR_mask = 0x7f << 23,
+ INDEX_GPR_shift = 23,
+ SQ_CF_ALLOC_EXPORT_WORD0_RAT__ELEM_SIZE_mask = 0x03 << 30,
+ SQ_CF_ALLOC_EXPORT_WORD0_RAT__ELEM_SIZE_shift = 30,
+ SQ_CF_ALU_WORD0 = 0x00008dfc,
+ SQ_CF_ALU_WORD0__ADDR_mask = 0x3fffff << 0,
+ SQ_CF_ALU_WORD0__ADDR_shift = 0,
+ KCACHE_BANK0_mask = 0x0f << 22,
+ KCACHE_BANK0_shift = 22,
+ KCACHE_BANK1_mask = 0x0f << 26,
+ KCACHE_BANK1_shift = 26,
+ KCACHE_MODE0_mask = 0x03 << 30,
+ KCACHE_MODE0_shift = 30,
+/* SQ_CF_KCACHE_NOP = 0x00, */
+/* SQ_CF_KCACHE_LOCK_1 = 0x01, */
+/* SQ_CF_KCACHE_LOCK_2 = 0x02, */
+/* SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, */
+ SQ_MEM_GDS_WORD1 = 0x00008dfc,
+ SQ_MEM_GDS_WORD1__DST_GPR_mask = 0x7f << 0,
+ SQ_MEM_GDS_WORD1__DST_GPR_shift = 0,
+ DST_REL_MODE_mask = 0x03 << 7,
+ DST_REL_MODE_shift = 7,
+ SQ_REL_NONE = 0x00,
+ SQ_REL_LOOP = 0x01,
+ SQ_REL_GLOBAL = 0x02,
+ GDS_OP_mask = 0x3f << 9,
+ GDS_OP_shift = 9,
+ SQ_DS_INST_ADD = 0x00,
+ SQ_DS_INST_SUB = 0x01,
+ SQ_DS_INST_RSUB = 0x02,
+ SQ_DS_INST_INC = 0x03,
+ SQ_DS_INST_DEC = 0x04,
+ SQ_DS_INST_MIN_INT = 0x05,
+ SQ_DS_INST_MAX_INT = 0x06,
+ SQ_DS_INST_MIN_UINT = 0x07,
+ SQ_DS_INST_MAX_UINT = 0x08,
+ SQ_DS_INST_AND = 0x09,
+ SQ_DS_INST_OR = 0x0a,
+ SQ_DS_INST_XOR = 0x0b,
+ SQ_DS_INST_MSKOR = 0x0c,
+ SQ_DS_INST_WRITE = 0x0d,
+ SQ_DS_INST_WRITE_REL = 0x0e,
+ SQ_DS_INST_WRITE2 = 0x0f,
+ SQ_DS_INST_CMP_STORE = 0x10,
+ SQ_DS_INST_CMP_STORE_SPF = 0x11,
+ SQ_DS_INST_BYTE_WRITE = 0x12,
+ SQ_DS_INST_SHORT_WRITE = 0x13,
+ SQ_DS_INST_ADD_RET = 0x20,
+ SQ_DS_INST_SUB_RET = 0x21,
+ SQ_DS_INST_RSUB_RET = 0x22,
+ SQ_DS_INST_INC_RET = 0x23,
+ SQ_DS_INST_DEC_RET = 0x24,
+ SQ_DS_INST_MIN_INT_RET = 0x25,
+ SQ_DS_INST_MAX_INT_RET = 0x26,
+ SQ_DS_INST_MIN_UINT_RET = 0x27,
+ SQ_DS_INST_MAX_UINT_RET = 0x28,
+ SQ_DS_INST_AND_RET = 0x29,
+ SQ_DS_INST_OR_RET = 0x2a,
+ SQ_DS_INST_XOR_RET = 0x2b,
+ SQ_DS_INST_MSKOR_RET = 0x2c,
+ SQ_DS_INST_XCHG_RET = 0x2d,
+ SQ_DS_INST_XCHG_REL_RET = 0x2e,
+ SQ_DS_INST_XCHG2_RET = 0x2f,
+ SQ_DS_INST_CMP_XCHG_RET = 0x30,
+ SQ_DS_INST_CMP_XCHG_SPF_RET = 0x31,
+ SQ_DS_INST_READ_RET = 0x32,
+ SQ_DS_INST_READ_REL_RET = 0x33,
+ SQ_DS_INST_READ2_RET = 0x34,
+ SQ_DS_INST_READWRITE_RET = 0x35,
+ SQ_DS_INST_BYTE_READ_RET = 0x36,
+ SQ_DS_INST_UBYTE_READ_RET = 0x37,
+ SQ_DS_INST_SHORT_READ_RET = 0x38,
+ SQ_DS_INST_USHORT_READ_RET = 0x39,
+ SQ_DS_INST_ATOMIC_ORDERED_ALLOC_RET = 0x3f,
+ DS_OFFSET_mask = 0x7f << 16,
+ DS_OFFSET_shift = 16,
+ UAV_INDEX_MODE_mask = 0x03 << 24,
+ UAV_INDEX_MODE_shift = 24,
+/* SQ_CF_INDEX_NONE = 0x00, */
+/* SQ_CF_INDEX_0 = 0x01, */
+/* SQ_CF_INDEX_1 = 0x02, */
+/* SQ_CF_INVALID = 0x03, */
+ UAV_ID_mask = 0x0f << 26,
+ UAV_ID_shift = 26,
+ ALLOC_CONSUME_bit = 1 << 30,
+ BCAST_FIRST_REQ_bit = 1 << 31,
+ SQ_MEM_RD_WORD2 = 0x00008dfc,
+ ARRAY_BASE_mask = 0x1fff << 0,
+ ARRAY_BASE_shift = 0,
+ SQ_MEM_RD_WORD2__ENDIAN_SWAP_mask = 0x03 << 16,
+ SQ_MEM_RD_WORD2__ENDIAN_SWAP_shift = 16,
+ SQ_ENDIAN_NONE = 0x00,
+ SQ_ENDIAN_8IN16 = 0x01,
+ SQ_ENDIAN_8IN32 = 0x02,
+ SQ_MEM_RD_WORD2__ARRAY_SIZE_mask = 0xfff << 20,
+ SQ_MEM_RD_WORD2__ARRAY_SIZE_shift = 20,
+ SQ_CF_ALU_WORD1_EXT = 0x00008dfc,
+ KCACHE_MODE3_mask = 0x03 << 0,
+ KCACHE_MODE3_shift = 0,
+/* SQ_CF_KCACHE_NOP = 0x00, */
+/* SQ_CF_KCACHE_LOCK_1 = 0x01, */
+/* SQ_CF_KCACHE_LOCK_2 = 0x02, */
+/* SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, */
+ KCACHE_ADDR2_mask = 0xff << 2,
+ KCACHE_ADDR2_shift = 2,
+ KCACHE_ADDR3_mask = 0xff << 10,
+ KCACHE_ADDR3_shift = 10,
+ SQ_CF_ALU_WORD1_EXT__CF_INST_mask = 0x0f << 26,
+ SQ_CF_ALU_WORD1_EXT__CF_INST_shift = 26,
+/* SQ_CF_INST_ALU = 0x08, */
+/* SQ_CF_INST_ALU_PUSH_BEFORE = 0x09, */
+/* SQ_CF_INST_ALU_POP_AFTER = 0x0a, */
+/* SQ_CF_INST_ALU_POP2_AFTER = 0x0b, */
+/* SQ_CF_INST_ALU_EXTENDED = 0x0c, */
+/* SQ_CF_INST_ALU_CONTINUE = 0x0d, */
+/* SQ_CF_INST_ALU_BREAK = 0x0e, */
+/* SQ_CF_INST_ALU_ELSE_AFTER = 0x0f, */
+/* BARRIER_bit = 1 << 31, */
+ SQ_CF_GWS_WORD0 = 0x00008dfc,
+ VALUE_mask = 0x3ff << 0,
+ VALUE_shift = 0,
+ RESOURCE_mask = 0x1f << 16,
+ RESOURCE_shift = 16,
+ SIGN_bit = 1 << 25,
+ VAL_INDEX_MODE_mask = 0x03 << 26,
+ VAL_INDEX_MODE_shift = 26,
+ SQ_GWS_INDEX_NONE = 0x00,
+ SQ_GWS_INDEX_0 = 0x01,
+ SQ_GWS_INDEX_1 = 0x02,
+ SQ_GWS_INDEX_MIX = 0x03,
+ RSRC_INDEX_MODE_mask = 0x03 << 28,
+ RSRC_INDEX_MODE_shift = 28,
+/* SQ_CF_INDEX_NONE = 0x00, */
+/* SQ_CF_INDEX_0 = 0x01, */
+/* SQ_CF_INDEX_1 = 0x02, */
+/* SQ_CF_INVALID = 0x03, */
+ GWS_OPCODE_mask = 0x03 << 30,
+ GWS_OPCODE_shift = 30,
+ SQ_GWS_SEMA_V = 0x00,
+ SQ_GWS_SEMA_P = 0x01,
+ SQ_GWS_BARRIER = 0x02,
+ SQ_GWS_INIT = 0x03,
+ SQ_VTX_WORD2 = 0x00008dfc,
+ SQ_VTX_WORD2__OFFSET_mask = 0xffff << 0,
+ SQ_VTX_WORD2__OFFSET_shift = 0,
+ SQ_VTX_WORD2__ENDIAN_SWAP_mask = 0x03 << 16,
+ SQ_VTX_WORD2__ENDIAN_SWAP_shift = 16,
+/* SQ_ENDIAN_NONE = 0x00, */
+/* SQ_ENDIAN_8IN16 = 0x01, */
+/* SQ_ENDIAN_8IN32 = 0x02, */
+ CONST_BUF_NO_STRIDE_bit = 1 << 18,
+ MEGA_FETCH_bit = 1 << 19,
+ SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20,
+ BUFFER_INDEX_MODE_mask = 0x03 << 21,
+ BUFFER_INDEX_MODE_shift = 21,
+/* SQ_CF_INDEX_NONE = 0x00, */
+/* SQ_CF_INDEX_0 = 0x01, */
+/* SQ_CF_INDEX_1 = 0x02, */
+/* SQ_CF_INVALID = 0x03, */
+ SQ_CF_ALLOC_EXPORT_WORD1_BUF = 0x00008dfc,
+ SQ_CF_ALLOC_EXPORT_WORD1_BUF__ARRAY_SIZE_mask = 0xfff << 0,
+ SQ_CF_ALLOC_EXPORT_WORD1_BUF__ARRAY_SIZE_shift = 0,
+ COMP_MASK_mask = 0x0f << 12,
+ COMP_MASK_shift = 12,
+ SQ_CF_WORD0 = 0x00008dfc,
+ SQ_CF_WORD0__ADDR_mask = 0xffffff << 0,
+ SQ_CF_WORD0__ADDR_shift = 0,
+ JUMPTABLE_SEL_mask = 0x07 << 24,
+ JUMPTABLE_SEL_shift = 24,
+ SQ_CF_JUMPTABLE_SEL_CONST_A = 0x00,
+ SQ_CF_JUMPTABLE_SEL_CONST_B = 0x01,
+ SQ_CF_JUMPTABLE_SEL_CONST_C = 0x02,
+ SQ_CF_JUMPTABLE_SEL_CONST_D = 0x03,
+ SQ_CF_JUMPTABLE_SEL_INDEX_0 = 0x04,
+ SQ_CF_JUMPTABLE_SEL_INDEX_1 = 0x05,
+ SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc,
+/* ARRAY_BASE_mask = 0x1fff << 0, */
+/* ARRAY_BASE_shift = 0, */
+ SQ_CF_ALLOC_EXPORT_WORD0__TYPE_mask = 0x03 << 13,
+ SQ_CF_ALLOC_EXPORT_WORD0__TYPE_shift = 13,
+/* SQ_EXPORT_PIXEL = 0x00, */
+/* SQ_EXPORT_POS = 0x01, */
+/* SQ_EXPORT_PARAM = 0x02, */
+/* X_UNUSED_FOR_SX_EXPORTS = 0x03, */
+/* RW_GPR_mask = 0x7f << 15, */
+/* RW_GPR_shift = 15, */
+/* RW_REL_bit = 1 << 22, */
+/* INDEX_GPR_mask = 0x7f << 23, */
+/* INDEX_GPR_shift = 23, */
+ SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE_mask = 0x03 << 30,
+ SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE_shift = 30,
+ SQ_MEM_GDS_WORD0 = 0x00008dfc,
+/* MEM_INST_mask = 0x1f << 0, */
+/* MEM_INST_shift = 0, */
+/* SQ_MEM_INST_MEM = 0x02, */
+/* MEM_OP_mask = 0x07 << 8, */
+/* MEM_OP_shift = 8, */
+/* SQ_MEM_OP_RD_SCRATCH = 0x00, */
+/* SQ_MEM_OP_RD_SCATTER = 0x02, */
+/* SQ_MEM_OP_GDS = 0x04, */
+/* SQ_MEM_OP_TF_WRITE = 0x05, */
+ SQ_MEM_GDS_WORD0__SRC_GPR_mask = 0x7f << 11,
+ SQ_MEM_GDS_WORD0__SRC_GPR_shift = 11,
+ SRC_REL_MODE_mask = 0x03 << 18,
+ SRC_REL_MODE_shift = 18,
+/* SQ_REL_NONE = 0x00, */
+/* SQ_REL_LOOP = 0x01, */
+/* SQ_REL_GLOBAL = 0x02, */
+ SQ_MEM_GDS_WORD0__SRC_SEL_X_mask = 0x07 << 20,
+ SQ_MEM_GDS_WORD0__SRC_SEL_X_shift = 20,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SRC_SEL_Y_mask = 0x07 << 23, */
+/* SRC_SEL_Y_shift = 23, */
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SRC_SEL_Z_mask = 0x07 << 26, */
+/* SRC_SEL_Z_shift = 26, */
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_ALU_WORD1_LDS_DIRECT_LITERAL_HI = 0x00008dfc,
+ OFFSET_B_mask = 0x1fff << 0,
+ OFFSET_B_shift = 0,
+ STRIDE_B_mask = 0x7f << 13,
+ STRIDE_B_shift = 13,
+ THREAD_REL_B_bit = 1 << 22,
+ DIRECT_READ_32_bit = 1 << 31,
+ SQ_VTX_WORD1 = 0x00008dfc,
+ SQ_VTX_WORD1__DST_SEL_X_mask = 0x07 << 9,
+ SQ_VTX_WORD1__DST_SEL_X_shift = 9,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_VTX_WORD1__DST_SEL_Y_mask = 0x07 << 12,
+ SQ_VTX_WORD1__DST_SEL_Y_shift = 12,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_VTX_WORD1__DST_SEL_Z_mask = 0x07 << 15,
+ SQ_VTX_WORD1__DST_SEL_Z_shift = 15,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_VTX_WORD1__DST_SEL_W_mask = 0x07 << 18,
+ SQ_VTX_WORD1__DST_SEL_W_shift = 18,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ USE_CONST_FIELDS_bit = 1 << 21,
+ SQ_VTX_WORD1__DATA_FORMAT_mask = 0x3f << 22,
+ SQ_VTX_WORD1__DATA_FORMAT_shift = 22,
+ SQ_VTX_WORD1__NUM_FORMAT_ALL_mask = 0x03 << 28,
+ SQ_VTX_WORD1__NUM_FORMAT_ALL_shift = 28,
+ SQ_NUM_FORMAT_NORM = 0x00,
+ SQ_NUM_FORMAT_INT = 0x01,
+ SQ_NUM_FORMAT_SCALED = 0x02,
+ SQ_VTX_WORD1__FORMAT_COMP_ALL_bit = 1 << 30,
+ SQ_VTX_WORD1__SRF_MODE_ALL_bit = 1 << 31,
+ SQ_ALU_WORD1_OP2 = 0x00008dfc,
+ SRC0_ABS_bit = 1 << 0,
+ SRC1_ABS_bit = 1 << 1,
+ UPDATE_EXECUTE_MASK_bit = 1 << 2,
+ UPDATE_PRED_bit = 1 << 3,
+ WRITE_MASK_bit = 1 << 4,
+ OMOD_mask = 0x03 << 5,
+ OMOD_shift = 5,
+ SQ_ALU_OMOD_OFF = 0x00,
+ SQ_ALU_OMOD_M2 = 0x01,
+ SQ_ALU_OMOD_M4 = 0x02,
+ SQ_ALU_OMOD_D2 = 0x03,
+ SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x7ff << 7,
+ SQ_ALU_WORD1_OP2__ALU_INST_shift = 7,
+ SQ_OP2_INST_ADD = 0x00,
+ SQ_OP2_INST_MUL = 0x01,
+ SQ_OP2_INST_MUL_IEEE = 0x02,
+ SQ_OP2_INST_MAX = 0x03,
+ SQ_OP2_INST_MIN = 0x04,
+ SQ_OP2_INST_MAX_DX10 = 0x05,
+ SQ_OP2_INST_MIN_DX10 = 0x06,
+ SQ_OP2_INST_SETE = 0x08,
+ SQ_OP2_INST_SETGT = 0x09,
+ SQ_OP2_INST_SETGE = 0x0a,
+ SQ_OP2_INST_SETNE = 0x0b,
+ SQ_OP2_INST_SETE_DX10 = 0x0c,
+ SQ_OP2_INST_SETGT_DX10 = 0x0d,
+ SQ_OP2_INST_SETGE_DX10 = 0x0e,
+ SQ_OP2_INST_SETNE_DX10 = 0x0f,
+ SQ_OP2_INST_FRACT = 0x10,
+ SQ_OP2_INST_TRUNC = 0x11,
+ SQ_OP2_INST_CEIL = 0x12,
+ SQ_OP2_INST_RNDNE = 0x13,
+ SQ_OP2_INST_FLOOR = 0x14,
+ SQ_OP2_INST_ASHR_INT = 0x15,
+ SQ_OP2_INST_LSHR_INT = 0x16,
+ SQ_OP2_INST_LSHL_INT = 0x17,
+ SQ_OP2_INST_MOV = 0x19,
+ SQ_OP2_INST_NOP = 0x1a,
+ SQ_OP2_INST_PRED_SETGT_UINT = 0x1e,
+ SQ_OP2_INST_PRED_SETGE_UINT = 0x1f,
+ SQ_OP2_INST_PRED_SETE = 0x20,
+ SQ_OP2_INST_PRED_SETGT = 0x21,
+ SQ_OP2_INST_PRED_SETGE = 0x22,
+ SQ_OP2_INST_PRED_SETNE = 0x23,
+ SQ_OP2_INST_PRED_SET_INV = 0x24,
+ SQ_OP2_INST_PRED_SET_POP = 0x25,
+ SQ_OP2_INST_PRED_SET_CLR = 0x26,
+ SQ_OP2_INST_PRED_SET_RESTORE = 0x27,
+ SQ_OP2_INST_PRED_SETE_PUSH = 0x28,
+ SQ_OP2_INST_PRED_SETGT_PUSH = 0x29,
+ SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a,
+ SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b,
+ SQ_OP2_INST_KILLE = 0x2c,
+ SQ_OP2_INST_KILLGT = 0x2d,
+ SQ_OP2_INST_KILLGE = 0x2e,
+ SQ_OP2_INST_KILLNE = 0x2f,
+ SQ_OP2_INST_AND_INT = 0x30,
+ SQ_OP2_INST_OR_INT = 0x31,
+ SQ_OP2_INST_XOR_INT = 0x32,
+ SQ_OP2_INST_NOT_INT = 0x33,
+ SQ_OP2_INST_ADD_INT = 0x34,
+ SQ_OP2_INST_SUB_INT = 0x35,
+ SQ_OP2_INST_MAX_INT = 0x36,
+ SQ_OP2_INST_MIN_INT = 0x37,
+ SQ_OP2_INST_MAX_UINT = 0x38,
+ SQ_OP2_INST_MIN_UINT = 0x39,
+ SQ_OP2_INST_SETE_INT = 0x3a,
+ SQ_OP2_INST_SETGT_INT = 0x3b,
+ SQ_OP2_INST_SETGE_INT = 0x3c,
+ SQ_OP2_INST_SETNE_INT = 0x3d,
+ SQ_OP2_INST_SETGT_UINT = 0x3e,
+ SQ_OP2_INST_SETGE_UINT = 0x3f,
+ SQ_OP2_INST_KILLGT_UINT = 0x40,
+ SQ_OP2_INST_KILLGE_UINT = 0x41,
+ SQ_OP2_INST_PRED_SETE_INT = 0x42,
+ SQ_OP2_INST_PRED_SETGT_INT = 0x43,
+ SQ_OP2_INST_PRED_SETGE_INT = 0x44,
+ SQ_OP2_INST_PRED_SETNE_INT = 0x45,
+ SQ_OP2_INST_KILLE_INT = 0x46,
+ SQ_OP2_INST_KILLGT_INT = 0x47,
+ SQ_OP2_INST_KILLGE_INT = 0x48,
+ SQ_OP2_INST_KILLNE_INT = 0x49,
+ SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a,
+ SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b,
+ SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c,
+ SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d,
+ SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e,
+ SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f,
+ SQ_OP2_INST_FLT_TO_INT = 0x50,
+ SQ_OP2_INST_BFREV_INT = 0x51,
+ SQ_OP2_INST_ADDC_UINT = 0x52,
+ SQ_OP2_INST_SUBB_UINT = 0x53,
+ SQ_OP2_INST_GROUP_BARRIER = 0x54,
+ SQ_OP2_INST_GROUP_SEQ_BEGIN = 0x55,
+ SQ_OP2_INST_GROUP_SEQ_END = 0x56,
+ SQ_OP2_INST_SET_MODE = 0x57,
+ SQ_OP2_INST_SET_CF_IDX0 = 0x58,
+ SQ_OP2_INST_SET_CF_IDX1 = 0x59,
+ SQ_OP2_INST_SET_LDS_SIZE = 0x5a,
+ SQ_OP2_INST_EXP_IEEE = 0x81,
+ SQ_OP2_INST_LOG_CLAMPED = 0x82,
+ SQ_OP2_INST_LOG_IEEE = 0x83,
+ SQ_OP2_INST_RECIP_CLAMPED = 0x84,
+ SQ_OP2_INST_RECIP_FF = 0x85,
+ SQ_OP2_INST_RECIP_IEEE = 0x86,
+ SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x87,
+ SQ_OP2_INST_RECIPSQRT_FF = 0x88,
+ SQ_OP2_INST_RECIPSQRT_IEEE = 0x89,
+ SQ_OP2_INST_SQRT_IEEE = 0x8a,
+ SQ_OP2_INST_SIN = 0x8d,
+ SQ_OP2_INST_COS = 0x8e,
+ SQ_OP2_INST_MULLO_INT = 0x8f,
+ SQ_OP2_INST_MULHI_INT = 0x90,
+ SQ_OP2_INST_MULLO_UINT = 0x91,
+ SQ_OP2_INST_MULHI_UINT = 0x92,
+ SQ_OP2_INST_RECIP_INT = 0x93,
+ SQ_OP2_INST_RECIP_UINT = 0x94,
+ SQ_OP2_INST_RECIP_64 = 0x95,
+ SQ_OP2_INST_RECIP_CLAMPED_64 = 0x96,
+ SQ_OP2_INST_RECIPSQRT_64 = 0x97,
+ SQ_OP2_INST_RECIPSQRT_CLAMPED_64 = 0x98,
+ SQ_OP2_INST_SQRT_64 = 0x99,
+ SQ_OP2_INST_FLT_TO_UINT = 0x9a,
+ SQ_OP2_INST_INT_TO_FLT = 0x9b,
+ SQ_OP2_INST_UINT_TO_FLT = 0x9c,
+ SQ_OP2_INST_BFM_INT = 0xa0,
+ SQ_OP2_INST_FLT32_TO_FLT16 = 0xa2,
+ SQ_OP2_INST_FLT16_TO_FLT32 = 0xa3,
+ SQ_OP2_INST_UBYTE0_FLT = 0xa4,
+ SQ_OP2_INST_UBYTE1_FLT = 0xa5,
+ SQ_OP2_INST_UBYTE2_FLT = 0xa6,
+ SQ_OP2_INST_UBYTE3_FLT = 0xa7,
+ SQ_OP2_INST_BCNT_INT = 0xaa,
+ SQ_OP2_INST_FFBH_UINT = 0xab,
+ SQ_OP2_INST_FFBL_INT = 0xac,
+ SQ_OP2_INST_FFBH_INT = 0xad,
+ SQ_OP2_INST_FLT_TO_UINT4 = 0xae,
+ SQ_OP2_INST_DOT_IEEE = 0xaf,
+ SQ_OP2_INST_FLT_TO_INT_RPI = 0xb0,
+ SQ_OP2_INST_FLT_TO_INT_FLOOR = 0xb1,
+ SQ_OP2_INST_MULHI_UINT24 = 0xb2,
+ SQ_OP2_INST_MBCNT_32HI_INT = 0xb3,
+ SQ_OP2_INST_OFFSET_TO_FLT = 0xb4,
+ SQ_OP2_INST_MUL_UINT24 = 0xb5,
+ SQ_OP2_INST_BCNT_ACCUM_PREV_INT = 0xb6,
+ SQ_OP2_INST_MBCNT_32LO_ACCUM_PREV_INT = 0xb7,
+ SQ_OP2_INST_SETE_64 = 0xb8,
+ SQ_OP2_INST_SETNE_64 = 0xb9,
+ SQ_OP2_INST_SETGT_64 = 0xba,
+ SQ_OP2_INST_SETGE_64 = 0xbb,
+ SQ_OP2_INST_MIN_64 = 0xbc,
+ SQ_OP2_INST_MAX_64 = 0xbd,
+ SQ_OP2_INST_DOT4 = 0xbe,
+ SQ_OP2_INST_DOT4_IEEE = 0xbf,
+ SQ_OP2_INST_CUBE = 0xc0,
+ SQ_OP2_INST_MAX4 = 0xc1,
+ SQ_OP2_INST_FREXP_64 = 0xc4,
+ SQ_OP2_INST_LDEXP_64 = 0xc5,
+ SQ_OP2_INST_FRACT_64 = 0xc6,
+ SQ_OP2_INST_PRED_SETGT_64 = 0xc7,
+ SQ_OP2_INST_PRED_SETE_64 = 0xc8,
+ SQ_OP2_INST_PRED_SETGE_64 = 0xc9,
+ SQ_OP2_INST_MUL_64 = 0xca,
+ SQ_OP2_INST_ADD_64 = 0xcb,
+ SQ_OP2_INST_MOVA_INT = 0xcc,
+ SQ_OP2_INST_FLT64_TO_FLT32 = 0xcd,
+ SQ_OP2_INST_FLT32_TO_FLT64 = 0xce,
+ SQ_OP2_INST_SAD_ACCUM_PREV_UINT = 0xcf,
+ SQ_OP2_INST_DOT = 0xd0,
+ SQ_OP2_INST_MUL_PREV = 0xd1,
+ SQ_OP2_INST_MUL_IEEE_PREV = 0xd2,
+ SQ_OP2_INST_ADD_PREV = 0xd3,
+ SQ_OP2_INST_MULADD_PREV = 0xd4,
+ SQ_OP2_INST_MULADD_IEEE_PREV = 0xd5,
+ SQ_OP2_INST_INTERP_XY = 0xd6,
+ SQ_OP2_INST_INTERP_ZW = 0xd7,
+ SQ_OP2_INST_INTERP_X = 0xd8,
+ SQ_OP2_INST_INTERP_Z = 0xd9,
+ SQ_OP2_INST_STORE_FLAGS = 0xda,
+ SQ_OP2_INST_LOAD_STORE_FLAGS = 0xdb,
+ SQ_OP2_INST_INTERP_LOAD_P0 = 0xe0,
+ SQ_OP2_INST_INTERP_LOAD_P10 = 0xe1,
+ SQ_OP2_INST_INTERP_LOAD_P20 = 0xe2,
+ SQ_CF_WORD1 = 0x00008dfc,
+ POP_COUNT_mask = 0x07 << 0,
+ POP_COUNT_shift = 0,
+ CF_CONST_mask = 0x1f << 3,
+ CF_CONST_shift = 3,
+ COND_mask = 0x03 << 8,
+ COND_shift = 8,
+ SQ_CF_COND_ACTIVE = 0x00,
+ SQ_CF_COND_FALSE = 0x01,
+ SQ_CF_COND_BOOL = 0x02,
+ SQ_CF_COND_NOT_BOOL = 0x03,
+ SQ_CF_WORD1__COUNT_mask = 0x3f << 10,
+ SQ_CF_WORD1__COUNT_shift = 10,
+/* VALID_PIXEL_MODE_bit = 1 << 20, */
+/* END_OF_PROGRAM_bit = 1 << 21, */
+ SQ_CF_WORD1__CF_INST_mask = 0xff << 22,
+ SQ_CF_WORD1__CF_INST_shift = 22,
+ SQ_CF_INST_NOP = 0x00,
+ SQ_CF_INST_TC = 0x01,
+ SQ_CF_INST_VC = 0x02,
+ SQ_CF_INST_GDS = 0x03,
+ SQ_CF_INST_LOOP_START = 0x04,
+ SQ_CF_INST_LOOP_END = 0x05,
+ SQ_CF_INST_LOOP_START_DX10 = 0x06,
+ SQ_CF_INST_LOOP_START_NO_AL = 0x07,
+ SQ_CF_INST_LOOP_CONTINUE = 0x08,
+ SQ_CF_INST_LOOP_BREAK = 0x09,
+ SQ_CF_INST_JUMP = 0x0a,
+ SQ_CF_INST_PUSH = 0x0b,
+ SQ_CF_INST_ELSE = 0x0d,
+ SQ_CF_INST_POP = 0x0e,
+ SQ_CF_INST_CALL = 0x12,
+ SQ_CF_INST_CALL_FS = 0x13,
+ SQ_CF_INST_RETURN = 0x14,
+ SQ_CF_INST_EMIT_VERTEX = 0x15,
+ SQ_CF_INST_EMIT_CUT_VERTEX = 0x16,
+ SQ_CF_INST_CUT_VERTEX = 0x17,
+ SQ_CF_INST_KILL = 0x18,
+ SQ_CF_INST_WAIT_ACK = 0x1a,
+ SQ_CF_INST_TC_ACK = 0x1b,
+ SQ_CF_INST_VC_ACK = 0x1c,
+ SQ_CF_INST_JUMPTABLE = 0x1d,
+ SQ_CF_INST_GLOBAL_WAVE_SYNC = 0x1e,
+ SQ_CF_INST_HALT = 0x1f,
+/* WHOLE_QUAD_MODE_bit = 1 << 30, */
+/* BARRIER_bit = 1 << 31, */
+ SQ_VTX_WORD1_SEM = 0x00008dfc,
+ SEMANTIC_ID_mask = 0xff << 0,
+ SEMANTIC_ID_shift = 0,
+ SQ_TEX_WORD0 = 0x00008dfc,
+ TEX_INST_mask = 0x1f << 0,
+ TEX_INST_shift = 0,
+ SQ_TEX_INST_LD = 0x03,
+ SQ_TEX_INST_GET_TEXTURE_RESINFO = 0x04,
+ SQ_TEX_INST_GET_NUMBER_OF_SAMPLES = 0x05,
+ SQ_TEX_INST_GET_LOD = 0x06,
+ SQ_TEX_INST_GET_GRADIENTS_H = 0x07,
+ SQ_TEX_INST_GET_GRADIENTS_V = 0x08,
+ SQ_TEX_INST_SET_TEXTURE_OFFSETS = 0x09,
+ SQ_TEX_INST_KEEP_GRADIENTS = 0x0a,
+ SQ_TEX_INST_SET_GRADIENTS_H = 0x0b,
+ SQ_TEX_INST_SET_GRADIENTS_V = 0x0c,
+ SQ_TEX_INST_PASS = 0x0d,
+ SQ_TEX_INST_SAMPLE = 0x10,
+ SQ_TEX_INST_SAMPLE_L = 0x11,
+ SQ_TEX_INST_SAMPLE_LB = 0x12,
+ SQ_TEX_INST_SAMPLE_LZ = 0x13,
+ SQ_TEX_INST_SAMPLE_G = 0x14,
+ SQ_TEX_INST_GATHER4 = 0x15,
+ SQ_TEX_INST_SAMPLE_G_LB = 0x16,
+ SQ_TEX_INST_GATHER4_O = 0x17,
+ SQ_TEX_INST_SAMPLE_C = 0x18,
+ SQ_TEX_INST_SAMPLE_C_L = 0x19,
+ SQ_TEX_INST_SAMPLE_C_LB = 0x1a,
+ SQ_TEX_INST_SAMPLE_C_LZ = 0x1b,
+ SQ_TEX_INST_SAMPLE_C_G = 0x1c,
+ SQ_TEX_INST_GATHER4_C = 0x1d,
+ SQ_TEX_INST_SAMPLE_C_G_LB = 0x1e,
+ SQ_TEX_INST_GATHER4_C_O = 0x1f,
+ INST_MOD_mask = 0x03 << 5,
+ INST_MOD_shift = 5,
+/* FETCH_WHOLE_QUAD_bit = 1 << 7, */
+ RESOURCE_ID_mask = 0xff << 8,
+ RESOURCE_ID_shift = 8,
+ SQ_TEX_WORD0__SRC_GPR_mask = 0x7f << 16,
+ SQ_TEX_WORD0__SRC_GPR_shift = 16,
+/* SRC_REL_bit = 1 << 23, */
+ SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24,
+ RESOURCE_INDEX_MODE_mask = 0x03 << 25,
+ RESOURCE_INDEX_MODE_shift = 25,
+/* SQ_CF_INDEX_NONE = 0x00, */
+/* SQ_CF_INDEX_0 = 0x01, */
+/* SQ_CF_INDEX_1 = 0x02, */
+/* SQ_CF_INVALID = 0x03, */
+ SAMPLER_INDEX_MODE_mask = 0x03 << 27,
+ SAMPLER_INDEX_MODE_shift = 27,
+/* SQ_CF_INDEX_NONE = 0x00, */
+/* SQ_CF_INDEX_0 = 0x01, */
+/* SQ_CF_INDEX_1 = 0x02, */
+/* SQ_CF_INVALID = 0x03, */
+ SQ_VTX_WORD1_GPR = 0x00008dfc,
+ SQ_VTX_WORD1_GPR__DST_GPR_mask = 0x7f << 0,
+ SQ_VTX_WORD1_GPR__DST_GPR_shift = 0,
+ SQ_VTX_WORD1_GPR__DST_REL_bit = 1 << 7,
+ SQ_ALU_WORD1_LDS_IDX_OP = 0x00008dfc,
+/* SRC2_SEL_mask = 0x1ff << 0, */
+/* SRC2_SEL_shift = 0, */
+/* SQ_ALU_SRC_LDS_OQ_A = 0xdb, */
+/* SQ_ALU_SRC_LDS_OQ_B = 0xdc, */
+/* SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, */
+/* SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, */
+/* SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, */
+/* SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, */
+/* SQ_ALU_SRC_TIME_HI = 0xe3, */
+/* SQ_ALU_SRC_TIME_LO = 0xe4, */
+/* SQ_ALU_SRC_MASK_HI = 0xe5, */
+/* SQ_ALU_SRC_MASK_LO = 0xe6, */
+/* SQ_ALU_SRC_HW_WAVE_ID = 0xe7, */
+/* SQ_ALU_SRC_SIMD_ID = 0xe8, */
+/* SQ_ALU_SRC_SE_ID = 0xe9, */
+/* SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, */
+/* SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, */
+/* SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, */
+/* SQ_ALU_SRC_HW_ALU_ODD = 0xed, */
+/* SQ_ALU_SRC_LOOP_IDX = 0xee, */
+/* SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, */
+/* SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, */
+/* SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, */
+/* SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, */
+/* SQ_ALU_SRC_1_DBL_L = 0xf4, */
+/* SQ_ALU_SRC_1_DBL_M = 0xf5, */
+/* SQ_ALU_SRC_0_5_DBL_L = 0xf6, */
+/* SQ_ALU_SRC_0_5_DBL_M = 0xf7, */
+/* SQ_ALU_SRC_0 = 0xf8, */
+/* SQ_ALU_SRC_1 = 0xf9, */
+/* SQ_ALU_SRC_1_INT = 0xfa, */
+/* SQ_ALU_SRC_M_1_INT = 0xfb, */
+/* SQ_ALU_SRC_0_5 = 0xfc, */
+/* SQ_ALU_SRC_LITERAL = 0xfd, */
+/* SQ_ALU_SRC_PV = 0xfe, */
+/* SQ_ALU_SRC_PS = 0xff, */
+/* SRC2_REL_bit = 1 << 9, */
+/* SRC2_CHAN_mask = 0x03 << 10, */
+/* SRC2_CHAN_shift = 10, */
+/* SQ_CHAN_X = 0x00, */
+/* SQ_CHAN_Y = 0x01, */
+/* SQ_CHAN_Z = 0x02, */
+/* SQ_CHAN_W = 0x03, */
+ IDX_OFFSET_1_bit = 1 << 12,
+ SQ_ALU_WORD1_LDS_IDX_OP__ALU_INST_mask = 0x1f << 13,
+ SQ_ALU_WORD1_LDS_IDX_OP__ALU_INST_shift = 13,
+/* SQ_OP3_INST_BFE_UINT = 0x04, */
+/* SQ_OP3_INST_BFE_INT = 0x05, */
+/* SQ_OP3_INST_BFI_INT = 0x06, */
+/* SQ_OP3_INST_FMA = 0x07, */
+/* SQ_OP3_INST_CNDNE_64 = 0x09, */
+/* SQ_OP3_INST_FMA_64 = 0x0a, */
+/* SQ_OP3_INST_LERP_UINT = 0x0b, */
+/* SQ_OP3_INST_BIT_ALIGN_INT = 0x0c, */
+/* SQ_OP3_INST_BYTE_ALIGN_INT = 0x0d, */
+/* SQ_OP3_INST_SAD_ACCUM_UINT = 0x0e, */
+/* SQ_OP3_INST_SAD_ACCUM_HI_UINT = 0x0f, */
+/* SQ_OP3_INST_MULADD_UINT24 = 0x10, */
+/* SQ_OP3_INST_LDS_IDX_OP = 0x11, */
+/* SQ_OP3_INST_MULADD = 0x14, */
+/* SQ_OP3_INST_MULADD_M2 = 0x15, */
+/* SQ_OP3_INST_MULADD_M4 = 0x16, */
+/* SQ_OP3_INST_MULADD_D2 = 0x17, */
+/* SQ_OP3_INST_MULADD_IEEE = 0x18, */
+/* SQ_OP3_INST_CNDE = 0x19, */
+/* SQ_OP3_INST_CNDGT = 0x1a, */
+/* SQ_OP3_INST_CNDGE = 0x1b, */
+/* SQ_OP3_INST_CNDE_INT = 0x1c, */
+/* SQ_OP3_INST_CNDGT_INT = 0x1d, */
+/* SQ_OP3_INST_CNDGE_INT = 0x1e, */
+/* SQ_OP3_INST_MUL_LIT = 0x1f, */
+/* BANK_SWIZZLE_mask = 0x07 << 18, */
+/* BANK_SWIZZLE_shift = 18, */
+/* SQ_ALU_VEC_012 = 0x00, */
+/* SQ_ALU_VEC_021 = 0x01, */
+/* SQ_ALU_VEC_120 = 0x02, */
+/* SQ_ALU_VEC_102 = 0x03, */
+/* SQ_ALU_VEC_201 = 0x04, */
+/* SQ_ALU_VEC_210 = 0x05, */
+ LDS_OP_mask = 0x3f << 21,
+ LDS_OP_shift = 21,
+/* SQ_DS_INST_ADD = 0x00, */
+/* SQ_DS_INST_SUB = 0x01, */
+/* SQ_DS_INST_RSUB = 0x02, */
+/* SQ_DS_INST_INC = 0x03, */
+/* SQ_DS_INST_DEC = 0x04, */
+/* SQ_DS_INST_MIN_INT = 0x05, */
+/* SQ_DS_INST_MAX_INT = 0x06, */
+/* SQ_DS_INST_MIN_UINT = 0x07, */
+/* SQ_DS_INST_MAX_UINT = 0x08, */
+/* SQ_DS_INST_AND = 0x09, */
+/* SQ_DS_INST_OR = 0x0a, */
+/* SQ_DS_INST_XOR = 0x0b, */
+/* SQ_DS_INST_MSKOR = 0x0c, */
+/* SQ_DS_INST_WRITE = 0x0d, */
+/* SQ_DS_INST_WRITE_REL = 0x0e, */
+/* SQ_DS_INST_WRITE2 = 0x0f, */
+/* SQ_DS_INST_CMP_STORE = 0x10, */
+/* SQ_DS_INST_CMP_STORE_SPF = 0x11, */
+/* SQ_DS_INST_BYTE_WRITE = 0x12, */
+/* SQ_DS_INST_SHORT_WRITE = 0x13, */
+/* SQ_DS_INST_ADD_RET = 0x20, */
+/* SQ_DS_INST_SUB_RET = 0x21, */
+/* SQ_DS_INST_RSUB_RET = 0x22, */
+/* SQ_DS_INST_INC_RET = 0x23, */
+/* SQ_DS_INST_DEC_RET = 0x24, */
+/* SQ_DS_INST_MIN_INT_RET = 0x25, */
+/* SQ_DS_INST_MAX_INT_RET = 0x26, */
+/* SQ_DS_INST_MIN_UINT_RET = 0x27, */
+/* SQ_DS_INST_MAX_UINT_RET = 0x28, */
+/* SQ_DS_INST_AND_RET = 0x29, */
+/* SQ_DS_INST_OR_RET = 0x2a, */
+/* SQ_DS_INST_XOR_RET = 0x2b, */
+/* SQ_DS_INST_MSKOR_RET = 0x2c, */
+/* SQ_DS_INST_XCHG_RET = 0x2d, */
+/* SQ_DS_INST_XCHG_REL_RET = 0x2e, */
+/* SQ_DS_INST_XCHG2_RET = 0x2f, */
+/* SQ_DS_INST_CMP_XCHG_RET = 0x30, */
+/* SQ_DS_INST_CMP_XCHG_SPF_RET = 0x31, */
+/* SQ_DS_INST_READ_RET = 0x32, */
+/* SQ_DS_INST_READ_REL_RET = 0x33, */
+/* SQ_DS_INST_READ2_RET = 0x34, */
+/* SQ_DS_INST_READWRITE_RET = 0x35, */
+/* SQ_DS_INST_BYTE_READ_RET = 0x36, */
+/* SQ_DS_INST_UBYTE_READ_RET = 0x37, */
+/* SQ_DS_INST_SHORT_READ_RET = 0x38, */
+/* SQ_DS_INST_USHORT_READ_RET = 0x39, */
+/* SQ_DS_INST_ATOMIC_ORDERED_ALLOC_RET = 0x3f, */
+ IDX_OFFSET_0_bit = 1 << 27,
+ IDX_OFFSET_2_bit = 1 << 28,
+/* DST_CHAN_mask = 0x03 << 29, */
+/* DST_CHAN_shift = 29, */
+/* CHAN_X = 0x00, */
+/* CHAN_Y = 0x01, */
+/* CHAN_Z = 0x02, */
+/* CHAN_W = 0x03, */
+ IDX_OFFSET_3_bit = 1 << 31,
+ SQ_CF_ENCODING_WORD1 = 0x00008dfc,
+ SQ_CF_ENCODING_WORD1__ENCODING_mask = 0x03 << 28,
+ SQ_CF_ENCODING_WORD1__ENCODING_shift = 28,
+ SQ_CF_ENCODING_INST_CF = 0x00,
+ SQ_CF_ENCODING_INST_ALLOC_EXPORT = 0x01,
+ SQ_CF_ENCODING_INST_ALU0 = 0x02,
+ SQ_CF_ENCODING_INST_ALU1 = 0x03,
+ SQ_ALU_WORD0 = 0x00008dfc,
+/* SRC0_SEL_mask = 0x1ff << 0, */
+/* SRC0_SEL_shift = 0, */
+/* SQ_ALU_SRC_LDS_OQ_A = 0xdb, */
+/* SQ_ALU_SRC_LDS_OQ_B = 0xdc, */
+/* SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, */
+/* SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, */
+/* SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, */
+/* SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, */
+/* SQ_ALU_SRC_TIME_HI = 0xe3, */
+/* SQ_ALU_SRC_TIME_LO = 0xe4, */
+/* SQ_ALU_SRC_MASK_HI = 0xe5, */
+/* SQ_ALU_SRC_MASK_LO = 0xe6, */
+/* SQ_ALU_SRC_HW_WAVE_ID = 0xe7, */
+/* SQ_ALU_SRC_SIMD_ID = 0xe8, */
+/* SQ_ALU_SRC_SE_ID = 0xe9, */
+/* SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, */
+/* SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, */
+/* SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, */
+/* SQ_ALU_SRC_HW_ALU_ODD = 0xed, */
+/* SQ_ALU_SRC_LOOP_IDX = 0xee, */
+/* SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, */
+/* SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, */
+/* SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, */
+/* SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, */
+/* SQ_ALU_SRC_1_DBL_L = 0xf4, */
+/* SQ_ALU_SRC_1_DBL_M = 0xf5, */
+/* SQ_ALU_SRC_0_5_DBL_L = 0xf6, */
+/* SQ_ALU_SRC_0_5_DBL_M = 0xf7, */
+/* SQ_ALU_SRC_0 = 0xf8, */
+/* SQ_ALU_SRC_1 = 0xf9, */
+/* SQ_ALU_SRC_1_INT = 0xfa, */
+/* SQ_ALU_SRC_M_1_INT = 0xfb, */
+/* SQ_ALU_SRC_0_5 = 0xfc, */
+/* SQ_ALU_SRC_LITERAL = 0xfd, */
+/* SQ_ALU_SRC_PV = 0xfe, */
+/* SQ_ALU_SRC_PS = 0xff, */
+/* SRC0_REL_bit = 1 << 9, */
+/* SRC0_CHAN_mask = 0x03 << 10, */
+/* SRC0_CHAN_shift = 10, */
+/* SQ_CHAN_X = 0x00, */
+/* SQ_CHAN_Y = 0x01, */
+/* SQ_CHAN_Z = 0x02, */
+/* SQ_CHAN_W = 0x03, */
+ SRC0_NEG_bit = 1 << 12,
+/* SRC1_SEL_mask = 0x1ff << 13, */
+/* SRC1_SEL_shift = 13, */
+/* SQ_ALU_SRC_LDS_OQ_A = 0xdb, */
+/* SQ_ALU_SRC_LDS_OQ_B = 0xdc, */
+/* SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, */
+/* SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, */
+/* SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, */
+/* SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, */
+/* SQ_ALU_SRC_TIME_HI = 0xe3, */
+/* SQ_ALU_SRC_TIME_LO = 0xe4, */
+/* SQ_ALU_SRC_MASK_HI = 0xe5, */
+/* SQ_ALU_SRC_MASK_LO = 0xe6, */
+/* SQ_ALU_SRC_HW_WAVE_ID = 0xe7, */
+/* SQ_ALU_SRC_SIMD_ID = 0xe8, */
+/* SQ_ALU_SRC_SE_ID = 0xe9, */
+/* SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, */
+/* SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, */
+/* SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, */
+/* SQ_ALU_SRC_HW_ALU_ODD = 0xed, */
+/* SQ_ALU_SRC_LOOP_IDX = 0xee, */
+/* SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, */
+/* SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, */
+/* SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, */
+/* SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, */
+/* SQ_ALU_SRC_1_DBL_L = 0xf4, */
+/* SQ_ALU_SRC_1_DBL_M = 0xf5, */
+/* SQ_ALU_SRC_0_5_DBL_L = 0xf6, */
+/* SQ_ALU_SRC_0_5_DBL_M = 0xf7, */
+/* SQ_ALU_SRC_0 = 0xf8, */
+/* SQ_ALU_SRC_1 = 0xf9, */
+/* SQ_ALU_SRC_1_INT = 0xfa, */
+/* SQ_ALU_SRC_M_1_INT = 0xfb, */
+/* SQ_ALU_SRC_0_5 = 0xfc, */
+/* SQ_ALU_SRC_LITERAL = 0xfd, */
+/* SQ_ALU_SRC_PV = 0xfe, */
+/* SQ_ALU_SRC_PS = 0xff, */
+/* SRC1_REL_bit = 1 << 22, */
+/* SRC1_CHAN_mask = 0x03 << 23, */
+/* SRC1_CHAN_shift = 23, */
+/* SQ_CHAN_X = 0x00, */
+/* SQ_CHAN_Y = 0x01, */
+/* SQ_CHAN_Z = 0x02, */
+/* SQ_CHAN_W = 0x03, */
+ SRC1_NEG_bit = 1 << 25,
+/* INDEX_MODE_mask = 0x07 << 26, */
+/* INDEX_MODE_shift = 26, */
+/* SQ_INDEX_AR_X = 0x00, */
+/* SQ_INDEX_LOOP = 0x04, */
+/* SQ_INDEX_GLOBAL = 0x05, */
+/* SQ_INDEX_GLOBAL_AR_X = 0x06, */
+/* PRED_SEL_mask = 0x03 << 29, */
+/* PRED_SEL_shift = 29, */
+/* SQ_PRED_SEL_OFF = 0x00, */
+/* SQ_PRED_SEL_ZERO = 0x02, */
+/* SQ_PRED_SEL_ONE = 0x03, */
+/* LAST_bit = 1 << 31, */
+ SQ_MEM_RD_WORD1 = 0x00008dfc,
+ SQ_MEM_RD_WORD1__DST_GPR_mask = 0x7f << 0,
+ SQ_MEM_RD_WORD1__DST_GPR_shift = 0,
+ SQ_MEM_RD_WORD1__DST_REL_bit = 1 << 7,
+ SQ_MEM_RD_WORD1__DST_SEL_X_mask = 0x07 << 9,
+ SQ_MEM_RD_WORD1__DST_SEL_X_shift = 9,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_MEM_RD_WORD1__DST_SEL_Y_mask = 0x07 << 12,
+ SQ_MEM_RD_WORD1__DST_SEL_Y_shift = 12,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_MEM_RD_WORD1__DST_SEL_Z_mask = 0x07 << 15,
+ SQ_MEM_RD_WORD1__DST_SEL_Z_shift = 15,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_MEM_RD_WORD1__DST_SEL_W_mask = 0x07 << 18,
+ SQ_MEM_RD_WORD1__DST_SEL_W_shift = 18,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_MEM_RD_WORD1__DATA_FORMAT_mask = 0x3f << 22,
+ SQ_MEM_RD_WORD1__DATA_FORMAT_shift = 22,
+ SQ_MEM_RD_WORD1__NUM_FORMAT_ALL_mask = 0x03 << 28,
+ SQ_MEM_RD_WORD1__NUM_FORMAT_ALL_shift = 28,
+/* SQ_NUM_FORMAT_NORM = 0x00, */
+/* SQ_NUM_FORMAT_INT = 0x01, */
+/* SQ_NUM_FORMAT_SCALED = 0x02, */
+ SQ_MEM_RD_WORD1__FORMAT_COMP_ALL_bit = 1 << 30,
+ SQ_MEM_RD_WORD1__SRF_MODE_ALL_bit = 1 << 31,
+ SQ_LSTMP_RING_BASE = 0x00008e10,
+ SQ_LSTMP_RING_SIZE = 0x00008e14,
+ SQ_HSTMP_RING_BASE = 0x00008e18,
+ SQ_HSTMP_RING_SIZE = 0x00008e1c,
+ SX_EXPORT_BUFFER_SIZES = 0x0000900c,
+ COLOR_BUFFER_SIZE_mask = 0xff << 0,
+ COLOR_BUFFER_SIZE_shift = 0,
+ POSITION_BUFFER_SIZE_mask = 0xff << 8,
+ POSITION_BUFFER_SIZE_shift = 8,
+ SMX_BUFFER_SIZE_mask = 0xff << 16,
+ SMX_BUFFER_SIZE_shift = 16,
+ SX_MEMORY_EXPORT_BASE = 0x00009010,
+ SX_MEMORY_EXPORT_SIZE = 0x00009014,
+ SPI_CONFIG_CNTL = 0x00009100,
+ GPR_WRITE_PRIORITY_mask = 0x3ffff << 0,
+ GPR_WRITE_PRIORITY_shift = 0,
+ SPI_CONFIG_CNTL_1 = 0x0000913c,
+ VTX_DONE_DELAY_mask = 0x0f << 0,
+ VTX_DONE_DELAY_shift = 0,
+ X_DELAY_14_CLKS = 0x00,
+ X_DELAY_16_CLKS = 0x01,
+ X_DELAY_18_CLKS = 0x02,
+ X_DELAY_20_CLKS = 0x03,
+ X_DELAY_22_CLKS = 0x04,
+ X_DELAY_24_CLKS = 0x05,
+ X_DELAY_26_CLKS = 0x06,
+ X_DELAY_28_CLKS = 0x07,
+ X_DELAY_30_CLKS = 0x08,
+ X_DELAY_32_CLKS = 0x09,
+ X_DELAY_34_CLKS = 0x0a,
+ X_DELAY_4_CLKS = 0x0b,
+ X_DELAY_6_CLKS = 0x0c,
+ X_DELAY_8_CLKS = 0x0d,
+ X_DELAY_10_CLKS = 0x0e,
+ X_DELAY_12_CLKS = 0x0f,
+ INTERP_ONE_PRIM_PER_ROW_bit = 1 << 4,
+ BC_OPTIMIZE_DISABLE_bit = 1 << 5,
+ PC_LIMIT_ENABLE_bit = 1 << 6,
+ PC_LIMIT_STRICT_bit = 1 << 7,
+ PC_LIMIT_SIZE_mask = 0xffff << 16,
+ PC_LIMIT_SIZE_shift = 16,
+ TD_CNTL = 0x00009494,
+ SYNC_PHASE_SH_mask = 0x03 << 0,
+ SYNC_PHASE_SH_shift = 0,
+ PAD_STALL_EN_bit = 1 << 8,
+ GATHER4_FLOAT_MODE_bit = 1 << 16,
+ TD_STATUS = 0x00009498,
+ BUSY_bit = 1 << 31,
+ TA_CNTL_AUX = 0x00009508,
+ TA_CNTL_AUX__DISABLE_CUBE_WRAP_bit = 1 << 0,
+ DISABLE_CUBE_ANISO_bit = 1 << 1,
+ GETLOD_SELECT_mask = 0x03 << 2,
+ GETLOD_SELECT_shift = 2,
+ X_SAMPLER_AND_RESOURCE_CLAMPED_LOD_IN_RESOURCE= 0x00,
+ DISABLE_IDLE_STALL_bit = 1 << 4,
+ TEX_COORD_PRECISION_bit = 1 << 28,
+ LOD_LOG2_TRUNC_bit = 1 << 29,
+ DB_ZPASS_COUNT_LOW = 0x00009870,
+ DB_ZPASS_COUNT_HI = 0x00009874,
+ COUNT_HI_mask = 0x7fffffff << 0,
+ COUNT_HI_shift = 0,
+ TD_PS_BORDER_COLOR_INDEX = 0x0000a400,
+ INDEX_mask = 0x1f << 0,
+ INDEX_shift = 0,
+ TD_PS_BORDER_COLOR_RED = 0x0000a404,
+ TD_PS_BORDER_COLOR_GREEN = 0x0000a408,
+ TD_PS_BORDER_COLOR_BLUE = 0x0000a40c,
+ TD_PS_BORDER_COLOR_ALPHA = 0x0000a410,
+ TD_VS_BORDER_COLOR_INDEX = 0x0000a414,
+/* INDEX_mask = 0x1f << 0, */
+/* INDEX_shift = 0, */
+ TD_VS_BORDER_COLOR_RED = 0x0000a418,
+ TD_VS_BORDER_COLOR_GREEN = 0x0000a41c,
+ TD_VS_BORDER_COLOR_BLUE = 0x0000a420,
+ TD_VS_BORDER_COLOR_ALPHA = 0x0000a424,
+ TD_GS_BORDER_COLOR_INDEX = 0x0000a428,
+/* INDEX_mask = 0x1f << 0, */
+/* INDEX_shift = 0, */
+ TD_GS_BORDER_COLOR_RED = 0x0000a42c,
+ TD_GS_BORDER_COLOR_GREEN = 0x0000a430,
+ TD_GS_BORDER_COLOR_BLUE = 0x0000a434,
+ TD_GS_BORDER_COLOR_ALPHA = 0x0000a438,
+ TD_HS_BORDER_COLOR_INDEX = 0x0000a43c,
+/* INDEX_mask = 0x1f << 0, */
+/* INDEX_shift = 0, */
+ TD_HS_BORDER_COLOR_RED = 0x0000a440,
+ TD_HS_BORDER_COLOR_GREEN = 0x0000a444,
+ TD_HS_BORDER_COLOR_BLUE = 0x0000a448,
+ TD_HS_BORDER_COLOR_ALPHA = 0x0000a44c,
+ TD_LS_BORDER_COLOR_INDEX = 0x0000a450,
+/* INDEX_mask = 0x1f << 0, */
+/* INDEX_shift = 0, */
+ TD_LS_BORDER_COLOR_RED = 0x0000a454,
+ TD_LS_BORDER_COLOR_GREEN = 0x0000a458,
+ TD_LS_BORDER_COLOR_BLUE = 0x0000a45c,
+ TD_LS_BORDER_COLOR_ALPHA = 0x0000a460,
+ TD_CS_BORDER_COLOR_INDEX = 0x0000a464,
+/* INDEX_mask = 0x1f << 0, */
+/* INDEX_shift = 0, */
+ TD_CS_BORDER_COLOR_RED = 0x0000a468,
+ TD_CS_BORDER_COLOR_GREEN = 0x0000a46c,
+ TD_CS_BORDER_COLOR_BLUE = 0x0000a470,
+ TD_CS_BORDER_COLOR_ALPHA = 0x0000a474,
+ DB_RENDER_CONTROL = 0x00028000,
+ DEPTH_CLEAR_ENABLE_bit = 1 << 0,
+ STENCIL_CLEAR_ENABLE_bit = 1 << 1,
+ DEPTH_COPY_bit = 1 << 2,
+ STENCIL_COPY_bit = 1 << 3,
+ RESUMMARIZE_ENABLE_bit = 1 << 4,
+ STENCIL_COMPRESS_DISABLE_bit = 1 << 5,
+ DEPTH_COMPRESS_DISABLE_bit = 1 << 6,
+ COPY_CENTROID_bit = 1 << 7,
+ COPY_SAMPLE_mask = 0x07 << 8,
+ COPY_SAMPLE_shift = 8,
+ COLOR_DISABLE_bit = 1 << 12,
+ DB_COUNT_CONTROL = 0x00028004,
+ ZPASS_INCREMENT_DISABLE_bit = 1 << 0,
+ PERFECT_ZPASS_COUNTS_bit = 1 << 1,
+ DB_DEPTH_VIEW = 0x00028008,
+ SLICE_START_mask = 0x7ff << 0,
+ SLICE_START_shift = 0,
+ SLICE_MAX_mask = 0x7ff << 13,
+ SLICE_MAX_shift = 13,
+ Z_READ_ONLY_bit = 1 << 24,
+ STENCIL_READ_ONLY_bit = 1 << 25,
+ DB_RENDER_OVERRIDE = 0x0002800c,
+ FORCE_HIZ_ENABLE_mask = 0x03 << 0,
+ FORCE_HIZ_ENABLE_shift = 0,
+ FORCE_OFF = 0x00,
+ FORCE_ENABLE = 0x01,
+ FORCE_DISABLE = 0x02,
+ FORCE_RESERVED = 0x03,
+ FORCE_HIS_ENABLE0_mask = 0x03 << 2,
+ FORCE_HIS_ENABLE0_shift = 2,
+/* FORCE_OFF = 0x00, */
+/* FORCE_ENABLE = 0x01, */
+/* FORCE_DISABLE = 0x02, */
+/* FORCE_RESERVED = 0x03, */
+ FORCE_HIS_ENABLE1_mask = 0x03 << 4,
+ FORCE_HIS_ENABLE1_shift = 4,
+/* FORCE_OFF = 0x00, */
+/* FORCE_ENABLE = 0x01, */
+/* FORCE_DISABLE = 0x02, */
+/* FORCE_RESERVED = 0x03, */
+ FORCE_SHADER_Z_ORDER_bit = 1 << 6,
+ FAST_Z_DISABLE_bit = 1 << 7,
+ FAST_STENCIL_DISABLE_bit = 1 << 8,
+ NOOP_CULL_DISABLE_bit = 1 << 9,
+ FORCE_COLOR_KILL_bit = 1 << 10,
+ FORCE_Z_READ_bit = 1 << 11,
+ FORCE_STENCIL_READ_bit = 1 << 12,
+ FORCE_FULL_Z_RANGE_mask = 0x03 << 13,
+ FORCE_FULL_Z_RANGE_shift = 13,
+/* FORCE_OFF = 0x00, */
+/* FORCE_ENABLE = 0x01, */
+/* FORCE_DISABLE = 0x02, */
+/* FORCE_RESERVED = 0x03, */
+ FORCE_QC_SMASK_CONFLICT_bit = 1 << 15,
+ DISABLE_VIEWPORT_CLAMP_bit = 1 << 16,
+ IGNORE_SC_ZRANGE_bit = 1 << 17,
+ DISABLE_FULLY_COVERED_bit = 1 << 18,
+ FORCE_Z_LIMIT_SUMM_mask = 0x03 << 19,
+ FORCE_Z_LIMIT_SUMM_shift = 19,
+ FORCE_SUMM_OFF = 0x00,
+ FORCE_SUMM_MINZ = 0x01,
+ FORCE_SUMM_MAXZ = 0x02,
+ FORCE_SUMM_BOTH = 0x03,
+ MAX_TILES_IN_DTT_mask = 0x1f << 21,
+ MAX_TILES_IN_DTT_shift = 21,
+ DISABLE_PIXEL_RATE_TILES_bit = 1 << 26,
+ FORCE_Z_DIRTY_bit = 1 << 27,
+ FORCE_STENCIL_DIRTY_bit = 1 << 28,
+ FORCE_Z_VALID_bit = 1 << 29,
+ FORCE_STENCIL_VALID_bit = 1 << 30,
+ PRESERVE_COMPRESSION_bit = 1 << 31,
+ DB_RENDER_OVERRIDE2 = 0x00028010,
+ PARTIAL_SQUAD_LAUNCH_CONTROL_mask = 0x03 << 0,
+ PARTIAL_SQUAD_LAUNCH_CONTROL_shift = 0,
+ PSLC_AUTO = 0x00,
+ PSLC_ON_HANG_ONLY = 0x01,
+ PSLC_ASAP = 0x02,
+ PSLC_COUNTDOWN = 0x03,
+ PARTIAL_SQUAD_LAUNCH_COUNTDOWN_mask = 0x07 << 2,
+ PARTIAL_SQUAD_LAUNCH_COUNTDOWN_shift = 2,
+ DISABLE_ZMASK_EXPCLEAR_OPTIMIZATIO_bit = 1 << 5,
+ DB_HTILE_DATA_BASE = 0x00028014,
+ DB_STENCIL_CLEAR = 0x00028028,
+ DB_STENCIL_CLEAR__CLEAR_mask = 0xff << 0,
+ DB_STENCIL_CLEAR__CLEAR_shift = 0,
+ MIN_mask = 0xff << 16,
+ MIN_shift = 16,
+ DB_DEPTH_CLEAR = 0x0002802c,
+ PA_SC_SCREEN_SCISSOR_TL = 0x00028030,
+ PA_SC_SCREEN_SCISSOR_TL__TL_X_mask = 0xffff << 0,
+ PA_SC_SCREEN_SCISSOR_TL__TL_X_shift = 0,
+ PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask = 0xffff << 16,
+ PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift = 16,
+ PA_SC_SCREEN_SCISSOR_BR = 0x00028034,
+ PA_SC_SCREEN_SCISSOR_BR__BR_X_mask = 0xffff << 0,
+ PA_SC_SCREEN_SCISSOR_BR__BR_X_shift = 0,
+ PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask = 0xffff << 16,
+ PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift = 16,
+ DB_Z_INFO = 0x00028040,
+ DB_Z_INFO__FORMAT_mask = 0x03 << 0,
+ DB_Z_INFO__FORMAT_shift = 0,
+ Z_INVALID = 0x00,
+ Z_16 = 0x01,
+ Z_24 = 0x02,
+ Z_32_FLOAT = 0x03,
+ DB_Z_INFO__ARRAY_MODE_mask = 0x0f << 4,
+ DB_Z_INFO__ARRAY_MODE_shift = 4,
+ ARRAY_LINEAR_GENERAL = 0x00,
+ ARRAY_LINEAR_ALIGNED = 0x01,
+ ARRAY_1D_TILED_THIN1 = 0x02,
+ ARRAY_2D_TILED_THIN1 = 0x04,
+ DB_Z_INFO__TILE_SPLIT_mask = 0x07 << 8,
+ DB_Z_INFO__TILE_SPLIT_shift = 8,
+ ADDR_SURF_TILE_SPLIT_64B = 0x00,
+ ADDR_SURF_TILE_SPLIT_128B = 0x01,
+ ADDR_SURF_TILE_SPLIT_256B = 0x02,
+ ADDR_SURF_TILE_SPLIT_512B = 0x03,
+ ADDR_SURF_TILE_SPLIT_1KB = 0x04,
+ ADDR_SURF_TILE_SPLIT_2KB = 0x05,
+ ADDR_SURF_TILE_SPLIT_4KB = 0x06,
+ DB_Z_INFO__NUM_BANKS_mask = 0x03 << 12,
+ DB_Z_INFO__NUM_BANKS_shift = 12,
+ ADDR_SURF_2_BANK = 0x00,
+ ADDR_SURF_4_BANK = 0x01,
+ ADDR_SURF_8_BANK = 0x02,
+ ADDR_SURF_16_BANK = 0x03,
+ DB_Z_INFO__BANK_WIDTH_mask = 0x03 << 16,
+ DB_Z_INFO__BANK_WIDTH_shift = 16,
+ ADDR_SURF_BANK_WIDTH_1 = 0x00,
+ ADDR_SURF_BANK_WIDTH_2 = 0x01,
+ ADDR_SURF_BANK_WIDTH_4 = 0x02,
+ ADDR_SURF_BANK_WIDTH_8 = 0x03,
+ DB_Z_INFO__BANK_HEIGHT_mask = 0x03 << 20,
+ DB_Z_INFO__BANK_HEIGHT_shift = 20,
+ ADDR_SURF_BANK_HEIGHT_1 = 0x00,
+ ADDR_SURF_BANK_HEIGHT_2 = 0x01,
+ ADDR_SURF_BANK_HEIGHT_4 = 0x02,
+ ADDR_SURF_BANK_HEIGHT_8 = 0x03,
+ DB_Z_INFO__MACRO_TILE_ASPECT_mask = 0x03 << 24,
+ DB_Z_INFO__MACRO_TILE_ASPECT_shift = 24,
+ ADDR_SURF_MACRO_ASPECT_1 = 0x00,
+ ADDR_SURF_MACRO_ASPECT_2 = 0x01,
+ ADDR_SURF_MACRO_ASPECT_4 = 0x02,
+ ADDR_SURF_MACRO_ASPECT_8 = 0x03,
+ ALLOW_EXPCLEAR_bit = 1 << 27,
+ READ_SIZE_bit = 1 << 28,
+ TILE_SURFACE_ENABLE_bit = 1 << 29,
+ DB_Z_INFO__TILE_COMPACT_bit = 1 << 30,
+ ZRANGE_PRECISION_bit = 1 << 31,
+ DB_STENCIL_INFO = 0x00028044,
+ DB_STENCIL_INFO__FORMAT_bit = 1 << 0,
+ DB_STENCIL_INFO__TILE_SPLIT_mask = 0x07 << 8,
+ DB_STENCIL_INFO__TILE_SPLIT_shift = 8,
+/* ADDR_SURF_TILE_SPLIT_64B = 0x00, */
+/* ADDR_SURF_TILE_SPLIT_128B = 0x01, */
+/* ADDR_SURF_TILE_SPLIT_256B = 0x02, */
+/* ADDR_SURF_TILE_SPLIT_512B = 0x03, */
+/* ADDR_SURF_TILE_SPLIT_1KB = 0x04, */
+/* ADDR_SURF_TILE_SPLIT_2KB = 0x05, */
+/* ADDR_SURF_TILE_SPLIT_4KB = 0x06, */
+ DB_Z_READ_BASE = 0x00028048,
+ DB_STENCIL_READ_BASE = 0x0002804c,
+ DB_Z_WRITE_BASE = 0x00028050,
+ DB_STENCIL_WRITE_BASE = 0x00028054,
+ DB_DEPTH_SIZE = 0x00028058,
+ PITCH_TILE_MAX_mask = 0x7ff << 0,
+ PITCH_TILE_MAX_shift = 0,
+ HEIGHT_TILE_MAX_mask = 0x7ff << 11,
+ HEIGHT_TILE_MAX_shift = 11,
+ DB_DEPTH_SLICE = 0x0002805c,
+ SLICE_TILE_MAX_mask = 0x3fffff << 0,
+ SLICE_TILE_MAX_shift = 0,
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0 = 0x00028140,
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0_num = 16,
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_shift = 0,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0 = 0x00028180,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0_num = 16,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_shift = 0,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0 = 0x000281c0,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0_num = 16,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_shift = 0,
+ PA_SC_WINDOW_OFFSET = 0x00028200,
+ WINDOW_X_OFFSET_mask = 0xffff << 0,
+ WINDOW_X_OFFSET_shift = 0,
+ WINDOW_Y_OFFSET_mask = 0xffff << 16,
+ WINDOW_Y_OFFSET_shift = 16,
+ PA_SC_WINDOW_SCISSOR_TL = 0x00028204,
+ PA_SC_WINDOW_SCISSOR_TL__TL_X_mask = 0x7fff << 0,
+ PA_SC_WINDOW_SCISSOR_TL__TL_X_shift = 0,
+ PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask = 0x7fff << 16,
+ PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift = 16,
+ WINDOW_OFFSET_DISABLE_bit = 1 << 31,
+ PA_SC_WINDOW_SCISSOR_BR = 0x00028208,
+ PA_SC_WINDOW_SCISSOR_BR__BR_X_mask = 0x7fff << 0,
+ PA_SC_WINDOW_SCISSOR_BR__BR_X_shift = 0,
+ PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask = 0x7fff << 16,
+ PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift = 16,
+ PA_SC_CLIPRECT_RULE = 0x0002820c,
+ CLIP_RULE_mask = 0xffff << 0,
+ CLIP_RULE_shift = 0,
+ PA_SC_CLIPRECT_0_TL = 0x00028210,
+ PA_SC_CLIPRECT_0_TL_num = 4,
+ PA_SC_CLIPRECT_0_TL_offset = 8,
+ PA_SC_CLIPRECT_0_TL__TL_X_mask = 0x7fff << 0,
+ PA_SC_CLIPRECT_0_TL__TL_X_shift = 0,
+ PA_SC_CLIPRECT_0_TL__TL_Y_mask = 0x7fff << 16,
+ PA_SC_CLIPRECT_0_TL__TL_Y_shift = 16,
+ PA_SC_CLIPRECT_0_BR = 0x00028214,
+ PA_SC_CLIPRECT_0_BR_num = 4,
+ PA_SC_CLIPRECT_0_BR_offset = 8,
+ PA_SC_CLIPRECT_0_BR__BR_X_mask = 0x7fff << 0,
+ PA_SC_CLIPRECT_0_BR__BR_X_shift = 0,
+ PA_SC_CLIPRECT_0_BR__BR_Y_mask = 0x7fff << 16,
+ PA_SC_CLIPRECT_0_BR__BR_Y_shift = 16,
+ PA_SC_EDGERULE = 0x00028230,
+ ER_TRI_mask = 0x0f << 0,
+ ER_TRI_shift = 0,
+ ER_POINT_mask = 0x0f << 4,
+ ER_POINT_shift = 4,
+ ER_RECT_mask = 0x0f << 8,
+ ER_RECT_shift = 8,
+ ER_LINE_LR_mask = 0x3f << 12,
+ ER_LINE_LR_shift = 12,
+ ER_LINE_RL_mask = 0x3f << 18,
+ ER_LINE_RL_shift = 18,
+ ER_LINE_TB_mask = 0x0f << 24,
+ ER_LINE_TB_shift = 24,
+ ER_LINE_BT_mask = 0x0f << 28,
+ ER_LINE_BT_shift = 28,
+ PA_SU_HARDWARE_SCREEN_OFFSET = 0x00028234,
+ HW_SCREEN_OFFSET_X_mask = 0x1f << 0,
+ HW_SCREEN_OFFSET_X_shift = 0,
+ HW_SCREEN_OFFSET_Y_mask = 0x1f << 8,
+ HW_SCREEN_OFFSET_Y_shift = 8,
+ CB_TARGET_MASK = 0x00028238,
+ TARGET0_ENABLE_mask = 0x0f << 0,
+ TARGET0_ENABLE_shift = 0,
+ TARGET1_ENABLE_mask = 0x0f << 4,
+ TARGET1_ENABLE_shift = 4,
+ TARGET2_ENABLE_mask = 0x0f << 8,
+ TARGET2_ENABLE_shift = 8,
+ TARGET3_ENABLE_mask = 0x0f << 12,
+ TARGET3_ENABLE_shift = 12,
+ TARGET4_ENABLE_mask = 0x0f << 16,
+ TARGET4_ENABLE_shift = 16,
+ TARGET5_ENABLE_mask = 0x0f << 20,
+ TARGET5_ENABLE_shift = 20,
+ TARGET6_ENABLE_mask = 0x0f << 24,
+ TARGET6_ENABLE_shift = 24,
+ TARGET7_ENABLE_mask = 0x0f << 28,
+ TARGET7_ENABLE_shift = 28,
+ CB_SHADER_MASK = 0x0002823c,
+ OUTPUT0_ENABLE_mask = 0x0f << 0,
+ OUTPUT0_ENABLE_shift = 0,
+ OUTPUT1_ENABLE_mask = 0x0f << 4,
+ OUTPUT1_ENABLE_shift = 4,
+ OUTPUT2_ENABLE_mask = 0x0f << 8,
+ OUTPUT2_ENABLE_shift = 8,
+ OUTPUT3_ENABLE_mask = 0x0f << 12,
+ OUTPUT3_ENABLE_shift = 12,
+ OUTPUT4_ENABLE_mask = 0x0f << 16,
+ OUTPUT4_ENABLE_shift = 16,
+ OUTPUT5_ENABLE_mask = 0x0f << 20,
+ OUTPUT5_ENABLE_shift = 20,
+ OUTPUT6_ENABLE_mask = 0x0f << 24,
+ OUTPUT6_ENABLE_shift = 24,
+ OUTPUT7_ENABLE_mask = 0x0f << 28,
+ OUTPUT7_ENABLE_shift = 28,
+ PA_SC_GENERIC_SCISSOR_TL = 0x00028240,
+ PA_SC_GENERIC_SCISSOR_TL__TL_X_mask = 0x7fff << 0,
+ PA_SC_GENERIC_SCISSOR_TL__TL_X_shift = 0,
+ PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask = 0x7fff << 16,
+ PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift = 16,
+/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */
+ PA_SC_GENERIC_SCISSOR_BR = 0x00028244,
+ PA_SC_GENERIC_SCISSOR_BR__BR_X_mask = 0x7fff << 0,
+ PA_SC_GENERIC_SCISSOR_BR__BR_X_shift = 0,
+ PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask = 0x7fff << 16,
+ PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift = 16,
+ PA_SC_VPORT_SCISSOR_0_TL = 0x00028250,
+ PA_SC_VPORT_SCISSOR_0_TL_num = 16,
+ PA_SC_VPORT_SCISSOR_0_TL_offset = 8,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask = 0x7fff << 0,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift = 0,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask = 0x7fff << 16,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift = 16,
+/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */
+ PA_SC_VPORT_SCISSOR_0_BR = 0x00028254,
+ PA_SC_VPORT_SCISSOR_0_BR_num = 16,
+ PA_SC_VPORT_SCISSOR_0_BR_offset = 8,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask = 0x7fff << 0,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift = 0,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask = 0x7fff << 16,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift = 16,
+ PA_SC_VPORT_ZMIN_0 = 0x000282d0,
+ PA_SC_VPORT_ZMIN_0_num = 16,
+ PA_SC_VPORT_ZMIN_0_offset = 8,
+ PA_SC_VPORT_ZMAX_0 = 0x000282d4,
+ PA_SC_VPORT_ZMAX_0_num = 16,
+ PA_SC_VPORT_ZMAX_0_offset = 8,
+ SX_MISC = 0x00028350,
+ MULTIPASS_bit = 1 << 0,
+ SQ_VTX_SEMANTIC_0 = 0x00028380,
+ SQ_VTX_SEMANTIC_0_num = 32,
+/* SEMANTIC_ID_mask = 0xff << 0, */
+/* SEMANTIC_ID_shift = 0, */
+ VGT_MAX_VTX_INDX = 0x00028400,
+ VGT_MIN_VTX_INDX = 0x00028404,
+ VGT_INDX_OFFSET = 0x00028408,
+ VGT_MULTI_PRIM_IB_RESET_INDX = 0x0002840c,
+ SX_ALPHA_TEST_CONTROL = 0x00028410,
+ ALPHA_FUNC_mask = 0x07 << 0,
+ ALPHA_FUNC_shift = 0,
+ REF_NEVER = 0x00,
+ REF_LESS = 0x01,
+ REF_EQUAL = 0x02,
+ REF_LEQUAL = 0x03,
+ REF_GREATER = 0x04,
+ REF_NOTEQUAL = 0x05,
+ REF_GEQUAL = 0x06,
+ REF_ALWAYS = 0x07,
+ ALPHA_TEST_ENABLE_bit = 1 << 3,
+ ALPHA_TEST_BYPASS_bit = 1 << 8,
+ CB_BLEND_RED = 0x00028414,
+ CB_BLEND_GREEN = 0x00028418,
+ CB_BLEND_BLUE = 0x0002841c,
+ CB_BLEND_ALPHA = 0x00028420,
+ DB_STENCILREFMASK = 0x00028430,
+ STENCILREF_mask = 0xff << 0,
+ STENCILREF_shift = 0,
+ STENCILMASK_mask = 0xff << 8,
+ STENCILMASK_shift = 8,
+ STENCILWRITEMASK_mask = 0xff << 16,
+ STENCILWRITEMASK_shift = 16,
+ DB_STENCILREFMASK_BF = 0x00028434,
+ STENCILREF_BF_mask = 0xff << 0,
+ STENCILREF_BF_shift = 0,
+ STENCILMASK_BF_mask = 0xff << 8,
+ STENCILMASK_BF_shift = 8,
+ STENCILWRITEMASK_BF_mask = 0xff << 16,
+ STENCILWRITEMASK_BF_shift = 16,
+ SX_ALPHA_REF = 0x00028438,
+ PA_CL_VPORT_XSCALE_0 = 0x0002843c,
+ PA_CL_VPORT_XSCALE_0_num = 16,
+ PA_CL_VPORT_XSCALE_0_offset = 24,
+ PA_CL_VPORT_XOFFSET_0 = 0x00028440,
+ PA_CL_VPORT_XOFFSET_0_num = 16,
+ PA_CL_VPORT_XOFFSET_0_offset = 24,
+ PA_CL_VPORT_YSCALE_0 = 0x00028444,
+ PA_CL_VPORT_YSCALE_0_num = 16,
+ PA_CL_VPORT_YSCALE_0_offset = 24,
+ PA_CL_VPORT_YOFFSET_0 = 0x00028448,
+ PA_CL_VPORT_YOFFSET_0_num = 16,
+ PA_CL_VPORT_YOFFSET_0_offset = 24,
+ PA_CL_VPORT_ZSCALE_0 = 0x0002844c,
+ PA_CL_VPORT_ZSCALE_0_num = 16,
+ PA_CL_VPORT_ZSCALE_0_offset = 24,
+ PA_CL_VPORT_ZOFFSET_0 = 0x00028450,
+ PA_CL_VPORT_ZOFFSET_0_num = 16,
+ PA_CL_VPORT_ZOFFSET_0_offset = 24,
+ PA_CL_UCP_0_X = 0x000285bc,
+ PA_CL_UCP_0_X_num = 6,
+ PA_CL_UCP_0_X_offset = 16,
+ PA_CL_UCP_0_Y = 0x000285c0,
+ PA_CL_UCP_0_Y_num = 6,
+ PA_CL_UCP_0_Y_offset = 16,
+ PA_CL_UCP_0_Z = 0x000285c4,
+ PA_CL_UCP_0_Z_num = 6,
+ PA_CL_UCP_0_Z_offset = 16,
+ PA_CL_UCP_0_W = 0x000285c8,
+ PA_CL_UCP_0_W_num = 6,
+ PA_CL_UCP_0_W_offset = 16,
+ SPI_VS_OUT_ID_0 = 0x0002861c,
+ SPI_VS_OUT_ID_0_num = 10,
+ SEMANTIC_0_mask = 0xff << 0,
+ SEMANTIC_0_shift = 0,
+ SEMANTIC_1_mask = 0xff << 8,
+ SEMANTIC_1_shift = 8,
+ SEMANTIC_2_mask = 0xff << 16,
+ SEMANTIC_2_shift = 16,
+ SEMANTIC_3_mask = 0xff << 24,
+ SEMANTIC_3_shift = 24,
+ SPI_PS_INPUT_CNTL_0 = 0x00028644,
+ SPI_PS_INPUT_CNTL_0_num = 32,
+ SEMANTIC_mask = 0xff << 0,
+ SEMANTIC_shift = 0,
+ DEFAULT_VAL_mask = 0x03 << 8,
+ DEFAULT_VAL_shift = 8,
+ X_0_0F = 0x00,
+ FLAT_SHADE_bit = 1 << 10,
+ CYL_WRAP_mask = 0x0f << 13,
+ CYL_WRAP_shift = 13,
+ PT_SPRITE_TEX_bit = 1 << 17,
+ SPI_VS_OUT_CONFIG = 0x000286c4,
+ VS_PER_COMPONENT_bit = 1 << 0,
+ VS_EXPORT_COUNT_mask = 0x1f << 1,
+ VS_EXPORT_COUNT_shift = 1,
+ VS_EXPORTS_FOG_bit = 1 << 8,
+ VS_OUT_FOG_VEC_ADDR_mask = 0x1f << 9,
+ VS_OUT_FOG_VEC_ADDR_shift = 9,
+ SPI_PS_IN_CONTROL_0 = 0x000286cc,
+ NUM_INTERP_mask = 0x3f << 0,
+ NUM_INTERP_shift = 0,
+ POSITION_ENA_bit = 1 << 8,
+ POSITION_CENTROID_bit = 1 << 9,
+ POSITION_ADDR_mask = 0x1f << 10,
+ POSITION_ADDR_shift = 10,
+ PARAM_GEN_mask = 0x0f << 15,
+ PARAM_GEN_shift = 15,
+ PERSP_GRADIENT_ENA_bit = 1 << 28,
+ LINEAR_GRADIENT_ENA_bit = 1 << 29,
+ POSITION_SAMPLE_bit = 1 << 30,
+ SPI_PS_IN_CONTROL_1 = 0x000286d0,
+ FRONT_FACE_ENA_bit = 1 << 8,
+ FRONT_FACE_ALL_BITS_bit = 1 << 11,
+ FRONT_FACE_ADDR_mask = 0x1f << 12,
+ FRONT_FACE_ADDR_shift = 12,
+ FOG_ADDR_mask = 0x7f << 17,
+ FOG_ADDR_shift = 17,
+ FIXED_PT_POSITION_ENA_bit = 1 << 24,
+ FIXED_PT_POSITION_ADDR_mask = 0x1f << 25,
+ FIXED_PT_POSITION_ADDR_shift = 25,
+ POSITION_ULC_bit = 1 << 30,
+ SPI_INTERP_CONTROL_0 = 0x000286d4,
+ FLAT_SHADE_ENA_bit = 1 << 0,
+ PNT_SPRITE_ENA_bit = 1 << 1,
+ PNT_SPRITE_OVRD_X_mask = 0x07 << 2,
+ PNT_SPRITE_OVRD_X_shift = 2,
+ SPI_PNT_SPRITE_SEL_0 = 0x00,
+ SPI_PNT_SPRITE_SEL_1 = 0x01,
+ SPI_PNT_SPRITE_SEL_S = 0x02,
+ SPI_PNT_SPRITE_SEL_T = 0x03,
+ SPI_PNT_SPRITE_SEL_NONE = 0x04,
+ PNT_SPRITE_OVRD_Y_mask = 0x07 << 5,
+ PNT_SPRITE_OVRD_Y_shift = 5,
+/* SPI_PNT_SPRITE_SEL_0 = 0x00, */
+/* SPI_PNT_SPRITE_SEL_1 = 0x01, */
+/* SPI_PNT_SPRITE_SEL_S = 0x02, */
+/* SPI_PNT_SPRITE_SEL_T = 0x03, */
+/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */
+ PNT_SPRITE_OVRD_Z_mask = 0x07 << 8,
+ PNT_SPRITE_OVRD_Z_shift = 8,
+/* SPI_PNT_SPRITE_SEL_0 = 0x00, */
+/* SPI_PNT_SPRITE_SEL_1 = 0x01, */
+/* SPI_PNT_SPRITE_SEL_S = 0x02, */
+/* SPI_PNT_SPRITE_SEL_T = 0x03, */
+/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */
+ PNT_SPRITE_OVRD_W_mask = 0x07 << 11,
+ PNT_SPRITE_OVRD_W_shift = 11,
+/* SPI_PNT_SPRITE_SEL_0 = 0x00, */
+/* SPI_PNT_SPRITE_SEL_1 = 0x01, */
+/* SPI_PNT_SPRITE_SEL_S = 0x02, */
+/* SPI_PNT_SPRITE_SEL_T = 0x03, */
+/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */
+ PNT_SPRITE_TOP_1_bit = 1 << 14,
+ SPI_INPUT_Z = 0x000286d8,
+ PROVIDE_Z_TO_SPI_bit = 1 << 0,
+ SPI_FOG_CNTL = 0x000286dc,
+ PASS_FOG_THROUGH_PS_bit = 1 << 0,
+ SPI_BARYC_CNTL = 0x000286e0,
+ PERSP_CENTER_ENA_mask = 0x03 << 0,
+ PERSP_CENTER_ENA_shift = 0,
+ X_OFF = 0x00,
+ PERSP_CENTER_ENA__X_ON_AT_CENTER = 0x01,
+ PERSP_CENTER_ENA__X_ON_AT_CENTROID = 0x02,
+ PERSP_CENTROID_ENA_mask = 0x03 << 4,
+ PERSP_CENTROID_ENA_shift = 4,
+/* X_OFF = 0x00, */
+ PERSP_CENTROID_ENA__X_ON_AT_CENTROID = 0x01,
+ PERSP_CENTROID_ENA__X_ON_AT_CENTER = 0x02,
+ PERSP_SAMPLE_ENA_mask = 0x03 << 8,
+ PERSP_SAMPLE_ENA_shift = 8,
+/* X_OFF = 0x00, */
+ PERSP_PULL_MODEL_ENA_mask = 0x03 << 12,
+ PERSP_PULL_MODEL_ENA_shift = 12,
+/* X_OFF = 0x00, */
+ LINEAR_CENTER_ENA_mask = 0x03 << 16,
+ LINEAR_CENTER_ENA_shift = 16,
+/* X_OFF = 0x00, */
+ LINEAR_CENTER_ENA__X_ON_AT_CENTER = 0x01,
+ LINEAR_CENTER_ENA__X_ON_AT_CENTROID = 0x02,
+ LINEAR_CENTROID_ENA_mask = 0x03 << 20,
+ LINEAR_CENTROID_ENA_shift = 20,
+/* X_OFF = 0x00, */
+ LINEAR_CENTROID_ENA__X_ON_AT_CENTROID = 0x01,
+ LINEAR_CENTROID_ENA__X_ON_AT_CENTER = 0x02,
+ LINEAR_SAMPLE_ENA_mask = 0x03 << 24,
+ LINEAR_SAMPLE_ENA_shift = 24,
+/* X_OFF = 0x00, */
+ SPI_PS_IN_CONTROL_2 = 0x000286e4,
+ LINE_STIPPLE_TEX_ADDR_mask = 0xff << 0,
+ LINE_STIPPLE_TEX_ADDR_shift = 0,
+ LINE_STIPPLE_TEX_ENA_bit = 1 << 8,
+ CB_BLEND0_CONTROL = 0x00028780,
+ CB_BLEND0_CONTROL_num = 8,
+ COLOR_SRCBLEND_mask = 0x1f << 0,
+ COLOR_SRCBLEND_shift = 0,
+ BLEND_ZERO = 0x00,
+ BLEND_ONE = 0x01,
+ BLEND_SRC_COLOR = 0x02,
+ BLEND_ONE_MINUS_SRC_COLOR = 0x03,
+ BLEND_SRC_ALPHA = 0x04,
+ BLEND_ONE_MINUS_SRC_ALPHA = 0x05,
+ BLEND_DST_ALPHA = 0x06,
+ BLEND_ONE_MINUS_DST_ALPHA = 0x07,
+ BLEND_DST_COLOR = 0x08,
+ BLEND_ONE_MINUS_DST_COLOR = 0x09,
+ BLEND_SRC_ALPHA_SATURATE = 0x0a,
+ BLEND_BOTH_SRC_ALPHA = 0x0b,
+ BLEND_BOTH_INV_SRC_ALPHA = 0x0c,
+ BLEND_CONSTANT_COLOR = 0x0d,
+ BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e,
+ BLEND_SRC1_COLOR = 0x0f,
+ BLEND_INV_SRC1_COLOR = 0x10,
+ BLEND_SRC1_ALPHA = 0x11,
+ BLEND_INV_SRC1_ALPHA = 0x12,
+ BLEND_CONSTANT_ALPHA = 0x13,
+ BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14,
+ COLOR_COMB_FCN_mask = 0x07 << 5,
+ COLOR_COMB_FCN_shift = 5,
+ COMB_DST_PLUS_SRC = 0x00,
+ COMB_SRC_MINUS_DST = 0x01,
+ COMB_MIN_DST_SRC = 0x02,
+ COMB_MAX_DST_SRC = 0x03,
+ COMB_DST_MINUS_SRC = 0x04,
+ COLOR_DESTBLEND_mask = 0x1f << 8,
+ COLOR_DESTBLEND_shift = 8,
+/* BLEND_ZERO = 0x00, */
+/* BLEND_ONE = 0x01, */
+/* BLEND_SRC_COLOR = 0x02, */
+/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */
+/* BLEND_SRC_ALPHA = 0x04, */
+/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */
+/* BLEND_DST_ALPHA = 0x06, */
+/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */
+/* BLEND_DST_COLOR = 0x08, */
+/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */
+/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */
+/* BLEND_BOTH_SRC_ALPHA = 0x0b, */
+/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */
+/* BLEND_CONSTANT_COLOR = 0x0d, */
+/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */
+/* BLEND_SRC1_COLOR = 0x0f, */
+/* BLEND_INV_SRC1_COLOR = 0x10, */
+/* BLEND_SRC1_ALPHA = 0x11, */
+/* BLEND_INV_SRC1_ALPHA = 0x12, */
+/* BLEND_CONSTANT_ALPHA = 0x13, */
+/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */
+ ALPHA_SRCBLEND_mask = 0x1f << 16,
+ ALPHA_SRCBLEND_shift = 16,
+/* BLEND_ZERO = 0x00, */
+/* BLEND_ONE = 0x01, */
+/* BLEND_SRC_COLOR = 0x02, */
+/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */
+/* BLEND_SRC_ALPHA = 0x04, */
+/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */
+/* BLEND_DST_ALPHA = 0x06, */
+/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */
+/* BLEND_DST_COLOR = 0x08, */
+/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */
+/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */
+/* BLEND_BOTH_SRC_ALPHA = 0x0b, */
+/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */
+/* BLEND_CONSTANT_COLOR = 0x0d, */
+/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */
+/* BLEND_SRC1_COLOR = 0x0f, */
+/* BLEND_INV_SRC1_COLOR = 0x10, */
+/* BLEND_SRC1_ALPHA = 0x11, */
+/* BLEND_INV_SRC1_ALPHA = 0x12, */
+/* BLEND_CONSTANT_ALPHA = 0x13, */
+/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */
+ ALPHA_COMB_FCN_mask = 0x07 << 21,
+ ALPHA_COMB_FCN_shift = 21,
+/* COMB_DST_PLUS_SRC = 0x00, */
+/* COMB_SRC_MINUS_DST = 0x01, */
+/* COMB_MIN_DST_SRC = 0x02, */
+/* COMB_MAX_DST_SRC = 0x03, */
+/* COMB_DST_MINUS_SRC = 0x04, */
+ ALPHA_DESTBLEND_mask = 0x1f << 24,
+ ALPHA_DESTBLEND_shift = 24,
+/* BLEND_ZERO = 0x00, */
+/* BLEND_ONE = 0x01, */
+/* BLEND_SRC_COLOR = 0x02, */
+/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */
+/* BLEND_SRC_ALPHA = 0x04, */
+/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */
+/* BLEND_DST_ALPHA = 0x06, */
+/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */
+/* BLEND_DST_COLOR = 0x08, */
+/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */
+/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */
+/* BLEND_BOTH_SRC_ALPHA = 0x0b, */
+/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */
+/* BLEND_CONSTANT_COLOR = 0x0d, */
+/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */
+/* BLEND_SRC1_COLOR = 0x0f, */
+/* BLEND_INV_SRC1_COLOR = 0x10, */
+/* BLEND_SRC1_ALPHA = 0x11, */
+/* BLEND_INV_SRC1_ALPHA = 0x12, */
+/* BLEND_CONSTANT_ALPHA = 0x13, */
+/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */
+ SEPARATE_ALPHA_BLEND_bit = 1 << 29,
+ CB_BLEND0_CONTROL__ENABLE_bit = 1 << 30,
+ PA_CL_POINT_X_RAD = 0x000287d4,
+ PA_CL_POINT_Y_RAD = 0x000287d8,
+ PA_CL_POINT_SIZE = 0x000287dc,
+ PA_CL_POINT_CULL_RAD = 0x000287e0,
+ VGT_DMA_BASE_HI = 0x000287e4,
+ VGT_DMA_BASE_HI__BASE_ADDR_mask = 0xff << 0,
+ VGT_DMA_BASE_HI__BASE_ADDR_shift = 0,
+ VGT_DMA_BASE = 0x000287e8,
+ VGT_DRAW_INITIATOR = 0x000287f0,
+ SOURCE_SELECT_mask = 0x03 << 0,
+ SOURCE_SELECT_shift = 0,
+ DI_SRC_SEL_DMA = 0x00,
+ DI_SRC_SEL_IMMEDIATE = 0x01,
+ DI_SRC_SEL_AUTO_INDEX = 0x02,
+ DI_SRC_SEL_RESERVED = 0x03,
+ MAJOR_MODE_mask = 0x03 << 2,
+ MAJOR_MODE_shift = 2,
+ DI_MAJOR_MODE_0 = 0x00,
+ DI_MAJOR_MODE_1 = 0x01,
+ NOT_EOP_bit = 1 << 5,
+ USE_OPAQUE_bit = 1 << 6,
+ VGT_IMMED_DATA = 0x000287f4,
+ VGT_EVENT_ADDRESS_REG = 0x000287f8,
+ ADDRESS_LOW_mask = 0xfffffff << 0,
+ ADDRESS_LOW_shift = 0,
+ DB_DEPTH_CONTROL = 0x00028800,
+ STENCIL_ENABLE_bit = 1 << 0,
+ Z_ENABLE_bit = 1 << 1,
+ Z_WRITE_ENABLE_bit = 1 << 2,
+ ZFUNC_mask = 0x07 << 4,
+ ZFUNC_shift = 4,
+ FRAG_NEVER = 0x00,
+ FRAG_LESS = 0x01,
+ FRAG_EQUAL = 0x02,
+ FRAG_LEQUAL = 0x03,
+ FRAG_GREATER = 0x04,
+ FRAG_NOTEQUAL = 0x05,
+ FRAG_GEQUAL = 0x06,
+ FRAG_ALWAYS = 0x07,
+ BACKFACE_ENABLE_bit = 1 << 7,
+ STENCILFUNC_mask = 0x07 << 8,
+ STENCILFUNC_shift = 8,
+/* REF_NEVER = 0x00, */
+/* REF_LESS = 0x01, */
+/* REF_EQUAL = 0x02, */
+/* REF_LEQUAL = 0x03, */
+/* REF_GREATER = 0x04, */
+/* REF_NOTEQUAL = 0x05, */
+/* REF_GEQUAL = 0x06, */
+/* REF_ALWAYS = 0x07, */
+ STENCILFAIL_mask = 0x07 << 11,
+ STENCILFAIL_shift = 11,
+ STENCIL_KEEP = 0x00,
+ STENCIL_ZERO = 0x01,
+ STENCIL_REPLACE = 0x02,
+ STENCIL_INCR_CLAMP = 0x03,
+ STENCIL_DECR_CLAMP = 0x04,
+ STENCIL_INVERT = 0x05,
+ STENCIL_INCR_WRAP = 0x06,
+ STENCIL_DECR_WRAP = 0x07,
+ STENCILZPASS_mask = 0x07 << 14,
+ STENCILZPASS_shift = 14,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ STENCILZFAIL_mask = 0x07 << 17,
+ STENCILZFAIL_shift = 17,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ STENCILFUNC_BF_mask = 0x07 << 20,
+ STENCILFUNC_BF_shift = 20,
+/* REF_NEVER = 0x00, */
+/* REF_LESS = 0x01, */
+/* REF_EQUAL = 0x02, */
+/* REF_LEQUAL = 0x03, */
+/* REF_GREATER = 0x04, */
+/* REF_NOTEQUAL = 0x05, */
+/* REF_GEQUAL = 0x06, */
+/* REF_ALWAYS = 0x07, */
+ STENCILFAIL_BF_mask = 0x07 << 23,
+ STENCILFAIL_BF_shift = 23,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ STENCILZPASS_BF_mask = 0x07 << 26,
+ STENCILZPASS_BF_shift = 26,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ STENCILZFAIL_BF_mask = 0x07 << 29,
+ STENCILZFAIL_BF_shift = 29,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ CB_COLOR_CONTROL = 0x00028808,
+ DEGAMMA_ENABLE_bit = 1 << 3,
+ CB_COLOR_CONTROL__MODE_mask = 0x07 << 4,
+ CB_COLOR_CONTROL__MODE_shift = 4,
+ CB_DISABLE = 0x00,
+ CB_NORMAL = 0x01,
+ CB_ELIMINATE_FAST_CLEAR = 0x02,
+ CB_RESOLVE = 0x03,
+ CB_DECOMPRESS = 0x04,
+ CB_FMASK_DECOMPRESS = 0x05,
+ ROP3_mask = 0xff << 16,
+ ROP3_shift = 16,
+ DB_SHADER_CONTROL = 0x0002880c,
+ Z_EXPORT_ENABLE_bit = 1 << 0,
+ STENCIL_REF_EXPORT_ENABLE_bit = 1 << 1,
+ Z_ORDER_mask = 0x03 << 4,
+ Z_ORDER_shift = 4,
+ LATE_Z = 0x00,
+ EARLY_Z_THEN_LATE_Z = 0x01,
+ RE_Z = 0x02,
+ EARLY_Z_THEN_RE_Z = 0x03,
+ KILL_ENABLE_bit = 1 << 6,
+ COVERAGE_TO_MASK_ENABLE_bit = 1 << 7,
+ MASK_EXPORT_ENABLE_bit = 1 << 8,
+ DUAL_EXPORT_ENABLE_bit = 1 << 9,
+ EXEC_ON_HIER_FAIL_bit = 1 << 10,
+ EXEC_ON_NOOP_bit = 1 << 11,
+ ALPHA_TO_MASK_DISABLE_bit = 1 << 12,
+ DB_SOURCE_FORMAT_mask = 0x03 << 13,
+ DB_SOURCE_FORMAT_shift = 13,
+ EXPORT_DB_FULL = 0x00,
+ EXPORT_DB_FOUR16 = 0x01,
+ EXPORT_DB_TWO = 0x02,
+ DEPTH_BEFORE_SHADER_bit = 1 << 15,
+ CONSERVATIVE_Z_EXPORT_mask = 0x03 << 16,
+ CONSERVATIVE_Z_EXPORT_shift = 16,
+ EXPORT_ANY_Z = 0x00,
+ EXPORT_LESS_THAN_Z = 0x01,
+ EXPORT_GREATER_THAN_Z = 0x02,
+ EXPORT_RESERVED = 0x03,
+ PA_CL_CLIP_CNTL = 0x00028810,
+ UCP_ENA_0_bit = 1 << 0,
+ UCP_ENA_1_bit = 1 << 1,
+ UCP_ENA_2_bit = 1 << 2,
+ UCP_ENA_3_bit = 1 << 3,
+ UCP_ENA_4_bit = 1 << 4,
+ UCP_ENA_5_bit = 1 << 5,
+ PS_UCP_Y_SCALE_NEG_bit = 1 << 13,
+ PS_UCP_MODE_mask = 0x03 << 14,
+ PS_UCP_MODE_shift = 14,
+ CLIP_DISABLE_bit = 1 << 16,
+ UCP_CULL_ONLY_ENA_bit = 1 << 17,
+ BOUNDARY_EDGE_FLAG_ENA_bit = 1 << 18,
+ DX_CLIP_SPACE_DEF_bit = 1 << 19,
+ DIS_CLIP_ERR_DETECT_bit = 1 << 20,
+ VTX_KILL_OR_bit = 1 << 21,
+ DX_RASTERIZATION_KILL_bit = 1 << 22,
+ DX_LINEAR_ATTR_CLIP_ENA_bit = 1 << 24,
+ VTE_VPORT_PROVOKE_DISABLE_bit = 1 << 25,
+ ZCLIP_NEAR_DISABLE_bit = 1 << 26,
+ ZCLIP_FAR_DISABLE_bit = 1 << 27,
+ PA_SU_SC_MODE_CNTL = 0x00028814,
+ CULL_FRONT_bit = 1 << 0,
+ CULL_BACK_bit = 1 << 1,
+ FACE_bit = 1 << 2,
+ POLY_MODE_mask = 0x03 << 3,
+ POLY_MODE_shift = 3,
+ X_DISABLE_POLY_MODE = 0x00,
+ X_DUAL_MODE = 0x01,
+ POLYMODE_FRONT_PTYPE_mask = 0x07 << 5,
+ POLYMODE_FRONT_PTYPE_shift = 5,
+ X_DRAW_POINTS = 0x00,
+ X_DRAW_LINES = 0x01,
+ X_DRAW_TRIANGLES = 0x02,
+ POLYMODE_BACK_PTYPE_mask = 0x07 << 8,
+ POLYMODE_BACK_PTYPE_shift = 8,
+/* X_DRAW_POINTS = 0x00, */
+/* X_DRAW_LINES = 0x01, */
+/* X_DRAW_TRIANGLES = 0x02, */
+ POLY_OFFSET_FRONT_ENABLE_bit = 1 << 11,
+ POLY_OFFSET_BACK_ENABLE_bit = 1 << 12,
+ POLY_OFFSET_PARA_ENABLE_bit = 1 << 13,
+ VTX_WINDOW_OFFSET_ENABLE_bit = 1 << 16,
+ PROVOKING_VTX_LAST_bit = 1 << 19,
+ PERSP_CORR_DIS_bit = 1 << 20,
+ MULTI_PRIM_IB_ENA_bit = 1 << 21,
+ PA_CL_VTE_CNTL = 0x00028818,
+ VPORT_X_SCALE_ENA_bit = 1 << 0,
+ VPORT_X_OFFSET_ENA_bit = 1 << 1,
+ VPORT_Y_SCALE_ENA_bit = 1 << 2,
+ VPORT_Y_OFFSET_ENA_bit = 1 << 3,
+ VPORT_Z_SCALE_ENA_bit = 1 << 4,
+ VPORT_Z_OFFSET_ENA_bit = 1 << 5,
+ VTX_XY_FMT_bit = 1 << 8,
+ VTX_Z_FMT_bit = 1 << 9,
+ VTX_W0_FMT_bit = 1 << 10,
+ PA_CL_VS_OUT_CNTL = 0x0002881c,
+ CLIP_DIST_ENA_0_bit = 1 << 0,
+ CLIP_DIST_ENA_1_bit = 1 << 1,
+ CLIP_DIST_ENA_2_bit = 1 << 2,
+ CLIP_DIST_ENA_3_bit = 1 << 3,
+ CLIP_DIST_ENA_4_bit = 1 << 4,
+ CLIP_DIST_ENA_5_bit = 1 << 5,
+ CLIP_DIST_ENA_6_bit = 1 << 6,
+ CLIP_DIST_ENA_7_bit = 1 << 7,
+ CULL_DIST_ENA_0_bit = 1 << 8,
+ CULL_DIST_ENA_1_bit = 1 << 9,
+ CULL_DIST_ENA_2_bit = 1 << 10,
+ CULL_DIST_ENA_3_bit = 1 << 11,
+ CULL_DIST_ENA_4_bit = 1 << 12,
+ CULL_DIST_ENA_5_bit = 1 << 13,
+ CULL_DIST_ENA_6_bit = 1 << 14,
+ CULL_DIST_ENA_7_bit = 1 << 15,
+ USE_VTX_POINT_SIZE_bit = 1 << 16,
+ USE_VTX_EDGE_FLAG_bit = 1 << 17,
+ USE_VTX_RENDER_TARGET_INDX_bit = 1 << 18,
+ USE_VTX_VIEWPORT_INDX_bit = 1 << 19,
+ USE_VTX_KILL_FLAG_bit = 1 << 20,
+ VS_OUT_MISC_VEC_ENA_bit = 1 << 21,
+ VS_OUT_CCDIST0_VEC_ENA_bit = 1 << 22,
+ VS_OUT_CCDIST1_VEC_ENA_bit = 1 << 23,
+ PA_CL_NANINF_CNTL = 0x00028820,
+ VTE_XY_INF_DISCARD_bit = 1 << 0,
+ VTE_Z_INF_DISCARD_bit = 1 << 1,
+ VTE_W_INF_DISCARD_bit = 1 << 2,
+ VTE_0XNANINF_IS_0_bit = 1 << 3,
+ VTE_XY_NAN_RETAIN_bit = 1 << 4,
+ VTE_Z_NAN_RETAIN_bit = 1 << 5,
+ VTE_W_NAN_RETAIN_bit = 1 << 6,
+ VTE_W_RECIP_NAN_IS_0_bit = 1 << 7,
+ VS_XY_NAN_TO_INF_bit = 1 << 8,
+ VS_XY_INF_RETAIN_bit = 1 << 9,
+ VS_Z_NAN_TO_INF_bit = 1 << 10,
+ VS_Z_INF_RETAIN_bit = 1 << 11,
+ VS_W_NAN_TO_INF_bit = 1 << 12,
+ VS_W_INF_RETAIN_bit = 1 << 13,
+ VS_CLIP_DIST_INF_DISCARD_bit = 1 << 14,
+ VTE_NO_OUTPUT_NEG_0_bit = 1 << 20,
+ PA_SU_LINE_STIPPLE_CNTL = 0x00028824,
+ LINE_STIPPLE_RESET_mask = 0x03 << 0,
+ LINE_STIPPLE_RESET_shift = 0,
+ EXPAND_FULL_LENGTH_bit = 1 << 2,
+ FRACTIONAL_ACCUM_bit = 1 << 3,
+ DIAMOND_ADJUST_bit = 1 << 4,
+ PA_SU_LINE_STIPPLE_SCALE = 0x00028828,
+ PA_SU_PRIM_FILTER_CNTL = 0x0002882c,
+ TRIANGLE_FILTER_DISABLE_bit = 1 << 0,
+ LINE_FILTER_DISABLE_bit = 1 << 1,
+ POINT_FILTER_DISABLE_bit = 1 << 2,
+ RECTANGLE_FILTER_DISABLE_bit = 1 << 3,
+ TRIANGLE_EXPAND_ENA_bit = 1 << 4,
+ LINE_EXPAND_ENA_bit = 1 << 5,
+ POINT_EXPAND_ENA_bit = 1 << 6,
+ RECTANGLE_EXPAND_ENA_bit = 1 << 7,
+ PRIM_EXPAND_CONSTANT_mask = 0xff << 8,
+ PRIM_EXPAND_CONSTANT_shift = 8,
+ SQ_LSTMP_RING_ITEMSIZE = 0x00028830,
+ ITEMSIZE_mask = 0x7fff << 0,
+ ITEMSIZE_shift = 0,
+ SQ_HSTMP_RING_ITEMSIZE = 0x00028834,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_PGM_START_PS = 0x00028840,
+ SQ_PGM_RESOURCES_PS = 0x00028844,
+ NUM_GPRS_mask = 0xff << 0,
+ NUM_GPRS_shift = 0,
+ STACK_SIZE_mask = 0xff << 8,
+ STACK_SIZE_shift = 8,
+ DX10_CLAMP_bit = 1 << 21,
+ UNCACHED_FIRST_INST_bit = 1 << 28,
+ CLAMP_CONSTS_bit = 1 << 31,
+ SQ_PGM_RESOURCES_2_PS = 0x00028848,
+ SINGLE_ROUND_mask = 0x03 << 0,
+ SINGLE_ROUND_shift = 0,
+ SQ_ROUND_NEAREST_EVEN = 0x00,
+ SQ_ROUND_PLUS_INFINITY = 0x01,
+ SQ_ROUND_MINUS_INFINITY = 0x02,
+ SQ_ROUND_TO_ZERO = 0x03,
+ DOUBLE_ROUND_mask = 0x03 << 2,
+ DOUBLE_ROUND_shift = 2,
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+ ALLOW_SINGLE_DENORM_IN_bit = 1 << 4,
+ ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5,
+ ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6,
+ ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7,
+ SQ_PGM_EXPORTS_PS = 0x0002884c,
+ EXPORT_MODE_mask = 0x1f << 0,
+ EXPORT_MODE_shift = 0,
+ SQ_PGM_START_VS = 0x0002885c,
+ SQ_PGM_RESOURCES_VS = 0x00028860,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+/* DX10_CLAMP_bit = 1 << 21, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */
+ SQ_PGM_RESOURCES_2_VS = 0x00028864,
+/* SINGLE_ROUND_mask = 0x03 << 0, */
+/* SINGLE_ROUND_shift = 0, */
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+/* DOUBLE_ROUND_mask = 0x03 << 2, */
+/* DOUBLE_ROUND_shift = 2, */
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+/* ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, */
+/* ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, */
+/* ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, */
+/* ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, */
+ SQ_PGM_START_GS = 0x00028874,
+ SQ_PGM_RESOURCES_GS = 0x00028878,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+/* DX10_CLAMP_bit = 1 << 21, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */
+ SQ_PGM_RESOURCES_2_GS = 0x0002887c,
+/* SINGLE_ROUND_mask = 0x03 << 0, */
+/* SINGLE_ROUND_shift = 0, */
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+/* DOUBLE_ROUND_mask = 0x03 << 2, */
+/* DOUBLE_ROUND_shift = 2, */
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+/* ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, */
+/* ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, */
+/* ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, */
+/* ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, */
+ SQ_PGM_START_ES = 0x0002888c,
+ SQ_PGM_RESOURCES_ES = 0x00028890,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+/* DX10_CLAMP_bit = 1 << 21, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */
+ SQ_PGM_RESOURCES_2_ES = 0x00028894,
+/* SINGLE_ROUND_mask = 0x03 << 0, */
+/* SINGLE_ROUND_shift = 0, */
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+/* DOUBLE_ROUND_mask = 0x03 << 2, */
+/* DOUBLE_ROUND_shift = 2, */
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+/* ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, */
+/* ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, */
+/* ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, */
+/* ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, */
+ SQ_PGM_START_FS = 0x000288a4,
+ SQ_PGM_RESOURCES_FS = 0x000288a8,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+/* DX10_CLAMP_bit = 1 << 21, */
+ SQ_PGM_START_HS = 0x000288b8,
+ SQ_PGM_RESOURCES_HS = 0x000288bc,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+/* DX10_CLAMP_bit = 1 << 21, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */
+ SQ_PGM_RESOURCES_2_HS = 0x000288c0,
+/* SINGLE_ROUND_mask = 0x03 << 0, */
+/* SINGLE_ROUND_shift = 0, */
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+/* DOUBLE_ROUND_mask = 0x03 << 2, */
+/* DOUBLE_ROUND_shift = 2, */
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+/* ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, */
+/* ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, */
+/* ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, */
+/* ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, */
+ SQ_PGM_START_LS = 0x000288d0,
+ SQ_PGM_RESOURCES_LS = 0x000288d4,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+/* DX10_CLAMP_bit = 1 << 21, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */
+ SQ_PGM_RESOURCES_2_LS = 0x000288d8,
+/* SINGLE_ROUND_mask = 0x03 << 0, */
+/* SINGLE_ROUND_shift = 0, */
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+/* DOUBLE_ROUND_mask = 0x03 << 2, */
+/* DOUBLE_ROUND_shift = 2, */
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+/* ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, */
+/* ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, */
+/* ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, */
+/* ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, */
+ SQ_VTX_SEMANTIC_CLEAR = 0x000288f0,
+ SQ_ESGS_RING_ITEMSIZE = 0x00028900,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_GSVS_RING_ITEMSIZE = 0x00028904,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_ESTMP_RING_ITEMSIZE = 0x00028908,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_GSTMP_RING_ITEMSIZE = 0x0002890c,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_VSTMP_RING_ITEMSIZE = 0x00028910,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_PSTMP_RING_ITEMSIZE = 0x00028914,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_GS_VERT_ITEMSIZE = 0x0002891c,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_GS_VERT_ITEMSIZE_1 = 0x00028920,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_GS_VERT_ITEMSIZE_2 = 0x00028924,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_GS_VERT_ITEMSIZE_3 = 0x00028928,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_GSVS_RING_OFFSET_1 = 0x0002892c,
+ SQ_GSVS_RING_OFFSET_1__OFFSET_mask = 0x7fff << 0,
+ SQ_GSVS_RING_OFFSET_1__OFFSET_shift = 0,
+ SQ_GSVS_RING_OFFSET_2 = 0x00028930,
+ SQ_GSVS_RING_OFFSET_2__OFFSET_mask = 0x7fff << 0,
+ SQ_GSVS_RING_OFFSET_2__OFFSET_shift = 0,
+ SQ_GSVS_RING_OFFSET_3 = 0x00028934,
+ SQ_GSVS_RING_OFFSET_3__OFFSET_mask = 0x7fff << 0,
+ SQ_GSVS_RING_OFFSET_3__OFFSET_shift = 0,
+ SQ_ALU_CONST_CACHE_PS_0 = 0x00028940,
+ SQ_ALU_CONST_CACHE_PS_0_num = 16,
+ SQ_ALU_CONST_CACHE_VS_0 = 0x00028980,
+ SQ_ALU_CONST_CACHE_VS_0_num = 16,
+ SQ_ALU_CONST_CACHE_GS_0 = 0x000289c0,
+ SQ_ALU_CONST_CACHE_GS_0_num = 16,
+ PA_SU_POINT_SIZE = 0x00028a00,
+ HEIGHT_mask = 0xffff << 0,
+ HEIGHT_shift = 0,
+ PA_SU_POINT_SIZE__WIDTH_mask = 0xffff << 16,
+ PA_SU_POINT_SIZE__WIDTH_shift = 16,
+ PA_SU_POINT_MINMAX = 0x00028a04,
+ MIN_SIZE_mask = 0xffff << 0,
+ MIN_SIZE_shift = 0,
+ PA_SU_POINT_MINMAX__MAX_SIZE_mask = 0xffff << 16,
+ PA_SU_POINT_MINMAX__MAX_SIZE_shift = 16,
+ PA_SU_LINE_CNTL = 0x00028a08,
+ PA_SU_LINE_CNTL__WIDTH_mask = 0xffff << 0,
+ PA_SU_LINE_CNTL__WIDTH_shift = 0,
+ PA_SC_LINE_STIPPLE = 0x00028a0c,
+ LINE_PATTERN_mask = 0xffff << 0,
+ LINE_PATTERN_shift = 0,
+ REPEAT_COUNT_mask = 0xff << 16,
+ REPEAT_COUNT_shift = 16,
+ PATTERN_BIT_ORDER_bit = 1 << 28,
+ AUTO_RESET_CNTL_mask = 0x03 << 29,
+ AUTO_RESET_CNTL_shift = 29,
+ VGT_OUTPUT_PATH_CNTL = 0x00028a10,
+ PATH_SELECT_mask = 0x07 << 0,
+ PATH_SELECT_shift = 0,
+ VGT_OUTPATH_VTX_REUSE = 0x00,
+ VGT_OUTPATH_TESS_EN = 0x01,
+ VGT_OUTPATH_PASSTHRU = 0x02,
+ VGT_OUTPATH_GS_BLOCK = 0x03,
+ VGT_OUTPATH_HS_BLOCK = 0x04,
+ VGT_HOS_CNTL = 0x00028a14,
+ TESS_MODE_mask = 0x03 << 0,
+ TESS_MODE_shift = 0,
+ VGT_HOS_MAX_TESS_LEVEL = 0x00028a18,
+ VGT_HOS_MIN_TESS_LEVEL = 0x00028a1c,
+ VGT_HOS_REUSE_DEPTH = 0x00028a20,
+ REUSE_DEPTH_mask = 0xff << 0,
+ REUSE_DEPTH_shift = 0,
+ VGT_GROUP_PRIM_TYPE = 0x00028a24,
+ VGT_GROUP_PRIM_TYPE__PRIM_TYPE_mask = 0x1f << 0,
+ VGT_GROUP_PRIM_TYPE__PRIM_TYPE_shift = 0,
+ VGT_GRP_3D_POINT = 0x00,
+ VGT_GRP_3D_LINE = 0x01,
+ VGT_GRP_3D_TRI = 0x02,
+ VGT_GRP_3D_RECT = 0x03,
+ VGT_GRP_3D_QUAD = 0x04,
+ VGT_GRP_2D_COPY_RECT_V0 = 0x05,
+ VGT_GRP_2D_COPY_RECT_V1 = 0x06,
+ VGT_GRP_2D_COPY_RECT_V2 = 0x07,
+ VGT_GRP_2D_COPY_RECT_V3 = 0x08,
+ VGT_GRP_2D_FILL_RECT = 0x09,
+ VGT_GRP_2D_LINE = 0x0a,
+ VGT_GRP_2D_TRI = 0x0b,
+ VGT_GRP_PRIM_INDEX_LINE = 0x0c,
+ VGT_GRP_PRIM_INDEX_TRI = 0x0d,
+ VGT_GRP_PRIM_INDEX_QUAD = 0x0e,
+ VGT_GRP_3D_LINE_ADJ = 0x0f,
+ VGT_GRP_3D_TRI_ADJ = 0x10,
+ VGT_GRP_3D_PATCH = 0x11,
+ RETAIN_ORDER_bit = 1 << 14,
+ RETAIN_QUADS_bit = 1 << 15,
+ PRIM_ORDER_mask = 0x07 << 16,
+ PRIM_ORDER_shift = 16,
+ VGT_GRP_LIST = 0x00,
+ VGT_GRP_STRIP = 0x01,
+ VGT_GRP_FAN = 0x02,
+ VGT_GRP_LOOP = 0x03,
+ VGT_GRP_POLYGON = 0x04,
+ VGT_GROUP_FIRST_DECR = 0x00028a28,
+ FIRST_DECR_mask = 0x0f << 0,
+ FIRST_DECR_shift = 0,
+ VGT_GROUP_DECR = 0x00028a2c,
+ DECR_mask = 0x0f << 0,
+ DECR_shift = 0,
+ VGT_GROUP_VECT_0_CNTL = 0x00028a30,
+ COMP_X_EN_bit = 1 << 0,
+ COMP_Y_EN_bit = 1 << 1,
+ COMP_Z_EN_bit = 1 << 2,
+ COMP_W_EN_bit = 1 << 3,
+ VGT_GROUP_VECT_0_CNTL__STRIDE_mask = 0xff << 8,
+ VGT_GROUP_VECT_0_CNTL__STRIDE_shift = 8,
+ SHIFT_mask = 0xff << 16,
+ SHIFT_shift = 16,
+ VGT_GROUP_VECT_1_CNTL = 0x00028a34,
+/* COMP_X_EN_bit = 1 << 0, */
+/* COMP_Y_EN_bit = 1 << 1, */
+/* COMP_Z_EN_bit = 1 << 2, */
+/* COMP_W_EN_bit = 1 << 3, */
+ VGT_GROUP_VECT_1_CNTL__STRIDE_mask = 0xff << 8,
+ VGT_GROUP_VECT_1_CNTL__STRIDE_shift = 8,
+/* SHIFT_mask = 0xff << 16, */
+/* SHIFT_shift = 16, */
+ VGT_GROUP_VECT_0_FMT_CNTL = 0x00028a38,
+ X_CONV_mask = 0x0f << 0,
+ X_CONV_shift = 0,
+ VGT_GRP_INDEX_16 = 0x00,
+ VGT_GRP_INDEX_32 = 0x01,
+ VGT_GRP_UINT_16 = 0x02,
+ VGT_GRP_UINT_32 = 0x03,
+ VGT_GRP_SINT_16 = 0x04,
+ VGT_GRP_SINT_32 = 0x05,
+ VGT_GRP_FLOAT_32 = 0x06,
+ VGT_GRP_AUTO_PRIM = 0x07,
+ VGT_GRP_FIX_1_23_TO_FLOAT = 0x08,
+ X_OFFSET_mask = 0x0f << 4,
+ X_OFFSET_shift = 4,
+ Y_CONV_mask = 0x0f << 8,
+ Y_CONV_shift = 8,
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+ Y_OFFSET_mask = 0x0f << 12,
+ Y_OFFSET_shift = 12,
+ Z_CONV_mask = 0x0f << 16,
+ Z_CONV_shift = 16,
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+ Z_OFFSET_mask = 0x0f << 20,
+ Z_OFFSET_shift = 20,
+ W_CONV_mask = 0x0f << 24,
+ W_CONV_shift = 24,
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+ W_OFFSET_mask = 0x0f << 28,
+ W_OFFSET_shift = 28,
+ VGT_GROUP_VECT_1_FMT_CNTL = 0x00028a3c,
+/* X_CONV_mask = 0x0f << 0, */
+/* X_CONV_shift = 0, */
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* X_OFFSET_mask = 0x0f << 4, */
+/* X_OFFSET_shift = 4, */
+/* Y_CONV_mask = 0x0f << 8, */
+/* Y_CONV_shift = 8, */
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* Y_OFFSET_mask = 0x0f << 12, */
+/* Y_OFFSET_shift = 12, */
+/* Z_CONV_mask = 0x0f << 16, */
+/* Z_CONV_shift = 16, */
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* Z_OFFSET_mask = 0x0f << 20, */
+/* Z_OFFSET_shift = 20, */
+/* W_CONV_mask = 0x0f << 24, */
+/* W_CONV_shift = 24, */
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* W_OFFSET_mask = 0x0f << 28, */
+/* W_OFFSET_shift = 28, */
+ VGT_GS_MODE = 0x00028a40,
+ VGT_GS_MODE__MODE_mask = 0x03 << 0,
+ VGT_GS_MODE__MODE_shift = 0,
+ GS_OFF = 0x00,
+ GS_SCENARIO_A = 0x01,
+ GS_SCENARIO_B = 0x02,
+ GS_SCENARIO_G = 0x03,
+ GS_SCENARIO_C = 0x04,
+ SPRITE_EN = 0x05,
+ ES_PASSTHRU_bit = 1 << 2,
+ CUT_MODE_mask = 0x03 << 3,
+ CUT_MODE_shift = 3,
+ GS_CUT_1024 = 0x00,
+ GS_CUT_512 = 0x01,
+ GS_CUT_256 = 0x02,
+ GS_CUT_128 = 0x03,
+ MODE_HI_bit = 1 << 8,
+ PA_SC_MODE_CNTL_0 = 0x00028a48,
+ MSAA_ENABLE_bit = 1 << 0,
+ VPORT_SCISSOR_ENABLE_bit = 1 << 1,
+ LINE_STIPPLE_ENABLE_bit = 1 << 2,
+ VGT_ENHANCE = 0x00028a50,
+ VGT_GS_PER_ES = 0x00028a54,
+ GS_PER_ES_mask = 0x7ff << 0,
+ GS_PER_ES_shift = 0,
+ VGT_ES_PER_GS = 0x00028a58,
+ ES_PER_GS_mask = 0x7ff << 0,
+ ES_PER_GS_shift = 0,
+ VGT_GS_PER_VS = 0x00028a5c,
+ GS_PER_VS_mask = 0x0f << 0,
+ GS_PER_VS_shift = 0,
+ VGT_GS_OUT_PRIM_TYPE = 0x00028a6c,
+ OUTPRIM_TYPE_mask = 0x3f << 0,
+ OUTPRIM_TYPE_shift = 0,
+ POINTLIST = 0x00,
+ LINESTRIP = 0x01,
+ TRISTRIP = 0x02,
+ VGT_DMA_SIZE = 0x00028a74,
+ VGT_DMA_MAX_SIZE = 0x00028a78,
+ VGT_DMA_INDEX_TYPE = 0x00028a7c,
+/* INDEX_TYPE_mask = 0x03 << 0, */
+/* INDEX_TYPE_shift = 0, */
+ VGT_INDEX_16 = 0x00,
+ VGT_INDEX_32 = 0x01,
+ SWAP_MODE_mask = 0x03 << 2,
+ SWAP_MODE_shift = 2,
+ VGT_DMA_SWAP_NONE = 0x00,
+ VGT_DMA_SWAP_16_BIT = 0x01,
+ VGT_DMA_SWAP_32_BIT = 0x02,
+ VGT_DMA_SWAP_WORD = 0x03,
+ VGT_PRIMITIVEID_EN = 0x00028a84,
+ PRIMITIVEID_EN_bit = 1 << 0,
+ VGT_DMA_NUM_INSTANCES = 0x00028a88,
+ VGT_EVENT_INITIATOR = 0x00028a90,
+ EVENT_TYPE_mask = 0x3f << 0,
+ EVENT_TYPE_shift = 0,
+ SAMPLE_STREAMOUTSTATS1 = 0x01,
+ SAMPLE_STREAMOUTSTATS2 = 0x02,
+ SAMPLE_STREAMOUTSTATS3 = 0x03,
+ CACHE_FLUSH_TS = 0x04,
+ CONTEXT_DONE = 0x05,
+ CACHE_FLUSH = 0x06,
+ CS_PARTIAL_FLUSH = 0x07,
+ RST_PIX_CNT = 0x0d,
+ VS_PARTIAL_FLUSH = 0x0f,
+ PS_PARTIAL_FLUSH = 0x10,
+ FLUSH_HS_OUTPUT = 0x11,
+ FLUSH_LS_OUTPUT = 0x12,
+ CACHE_FLUSH_AND_INV_TS_EVENT = 0x14,
+ ZPASS_DONE = 0x15,
+ CACHE_FLUSH_AND_INV_EVENT = 0x16,
+ PERFCOUNTER_START = 0x17,
+ PERFCOUNTER_STOP = 0x18,
+ PIPELINESTAT_START = 0x19,
+ PIPELINESTAT_STOP = 0x1a,
+ PERFCOUNTER_SAMPLE = 0x1b,
+ FLUSH_ES_OUTPUT = 0x1c,
+ FLUSH_GS_OUTPUT = 0x1d,
+ SAMPLE_PIPELINESTAT = 0x1e,
+ SO_VGTSTREAMOUT_FLUSH = 0x1f,
+ SAMPLE_STREAMOUTSTATS = 0x20,
+ RESET_VTX_CNT = 0x21,
+ BLOCK_CONTEXT_DONE = 0x22,
+ CS_CONTEXT_DONE = 0x23,
+ VGT_FLUSH = 0x24,
+ SQ_NON_EVENT = 0x26,
+ SC_SEND_DB_VPZ = 0x27,
+ BOTTOM_OF_PIPE_TS = 0x28,
+ FLUSH_SX_TS = 0x29,
+ DB_CACHE_FLUSH_AND_INV = 0x2a,
+ FLUSH_AND_INV_DB_DATA_TS = 0x2b,
+ FLUSH_AND_INV_DB_META = 0x2c,
+ FLUSH_AND_INV_CB_DATA_TS = 0x2d,
+ FLUSH_AND_INV_CB_META = 0x2e,
+ CS_DONE = 0x2f,
+ PS_DONE = 0x30,
+ FLUSH_AND_INV_CB_PIXEL_DATA = 0x31,
+ ADDRESS_HI_mask = 0xff << 19,
+ ADDRESS_HI_shift = 19,
+ EXTENDED_EVENT_bit = 1 << 27,
+ VGT_MULTI_PRIM_IB_RESET_EN = 0x00028a94,
+ RESET_EN_bit = 1 << 0,
+ VGT_INSTANCE_STEP_RATE_0 = 0x00028aa0,
+ VGT_INSTANCE_STEP_RATE_1 = 0x00028aa4,
+ VGT_REUSE_OFF = 0x00028ab4,
+ REUSE_OFF_bit = 1 << 0,
+ VGT_VTX_CNT_EN = 0x00028ab8,
+ VTX_CNT_EN_bit = 1 << 0,
+ DB_HTILE_SURFACE = 0x00028abc,
+ HTILE_WIDTH_bit = 1 << 0,
+ HTILE_HEIGHT_bit = 1 << 1,
+ LINEAR_bit = 1 << 2,
+ FULL_CACHE_bit = 1 << 3,
+ HTILE_USES_PRELOAD_WIN_bit = 1 << 4,
+ PRELOAD_bit = 1 << 5,
+ PREFETCH_WIDTH_mask = 0x3f << 6,
+ PREFETCH_WIDTH_shift = 6,
+ PREFETCH_HEIGHT_mask = 0x3f << 12,
+ PREFETCH_HEIGHT_shift = 12,
+ DB_SRESULTS_COMPARE_STATE0 = 0x00028ac0,
+ COMPAREFUNC0_mask = 0x07 << 0,
+ COMPAREFUNC0_shift = 0,
+/* REF_NEVER = 0x00, */
+/* REF_LESS = 0x01, */
+/* REF_EQUAL = 0x02, */
+/* REF_LEQUAL = 0x03, */
+/* REF_GREATER = 0x04, */
+/* REF_NOTEQUAL = 0x05, */
+/* REF_GEQUAL = 0x06, */
+/* REF_ALWAYS = 0x07, */
+ COMPAREVALUE0_mask = 0xff << 4,
+ COMPAREVALUE0_shift = 4,
+ COMPAREMASK0_mask = 0xff << 12,
+ COMPAREMASK0_shift = 12,
+ ENABLE0_bit = 1 << 24,
+ DB_SRESULTS_COMPARE_STATE1 = 0x00028ac4,
+ COMPAREFUNC1_mask = 0x07 << 0,
+ COMPAREFUNC1_shift = 0,
+/* REF_NEVER = 0x00, */
+/* REF_LESS = 0x01, */
+/* REF_EQUAL = 0x02, */
+/* REF_LEQUAL = 0x03, */
+/* REF_GREATER = 0x04, */
+/* REF_NOTEQUAL = 0x05, */
+/* REF_GEQUAL = 0x06, */
+/* REF_ALWAYS = 0x07, */
+ COMPAREVALUE1_mask = 0xff << 4,
+ COMPAREVALUE1_shift = 4,
+ COMPAREMASK1_mask = 0xff << 12,
+ COMPAREMASK1_shift = 12,
+ ENABLE1_bit = 1 << 24,
+ DB_PRELOAD_CONTROL = 0x00028ac8,
+ START_X_mask = 0xff << 0,
+ START_X_shift = 0,
+ START_Y_mask = 0xff << 8,
+ START_Y_shift = 8,
+ MAX_X_mask = 0xff << 16,
+ MAX_X_shift = 16,
+ MAX_Y_mask = 0xff << 24,
+ MAX_Y_shift = 24,
+ VGT_STRMOUT_BUFFER_SIZE_0 = 0x00028ad0,
+ VGT_STRMOUT_VTX_STRIDE_0 = 0x00028ad4,
+ VGT_STRMOUT_VTX_STRIDE_0__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_VTX_STRIDE_0__STRIDE_shift = 0,
+ VGT_STRMOUT_BUFFER_BASE_0 = 0x00028ad8,
+ VGT_STRMOUT_BUFFER_OFFSET_0 = 0x00028adc,
+ VGT_STRMOUT_BUFFER_SIZE_1 = 0x00028ae0,
+ VGT_STRMOUT_VTX_STRIDE_1 = 0x00028ae4,
+ VGT_STRMOUT_VTX_STRIDE_1__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_VTX_STRIDE_1__STRIDE_shift = 0,
+ VGT_STRMOUT_BUFFER_BASE_1 = 0x00028ae8,
+ VGT_STRMOUT_BUFFER_OFFSET_1 = 0x00028aec,
+ VGT_STRMOUT_BUFFER_SIZE_2 = 0x00028af0,
+ VGT_STRMOUT_VTX_STRIDE_2 = 0x00028af4,
+ VGT_STRMOUT_VTX_STRIDE_2__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_VTX_STRIDE_2__STRIDE_shift = 0,
+ VGT_STRMOUT_BUFFER_BASE_2 = 0x00028af8,
+ VGT_STRMOUT_BUFFER_OFFSET_2 = 0x00028afc,
+ VGT_STRMOUT_BUFFER_SIZE_3 = 0x00028b00,
+ VGT_STRMOUT_VTX_STRIDE_3 = 0x00028b04,
+ VGT_STRMOUT_VTX_STRIDE_3__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_VTX_STRIDE_3__STRIDE_shift = 0,
+ VGT_STRMOUT_BUFFER_BASE_3 = 0x00028b08,
+ VGT_STRMOUT_BUFFER_OFFSET_3 = 0x00028b0c,
+ VGT_STRMOUT_BASE_OFFSET_0 = 0x00028b10,
+ VGT_STRMOUT_BASE_OFFSET_1 = 0x00028b14,
+ VGT_STRMOUT_BASE_OFFSET_2 = 0x00028b18,
+ VGT_STRMOUT_BASE_OFFSET_3 = 0x00028b1c,
+ VGT_STRMOUT_DRAW_OPAQUE_OFFSET = 0x00028b28,
+ VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE = 0x00028b2c,
+ VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE = 0x00028b30,
+ VERTEX_STRIDE_mask = 0x1ff << 0,
+ VERTEX_STRIDE_shift = 0,
+ VGT_GS_MAX_VERT_OUT = 0x00028b38,
+ MAX_VERT_OUT_mask = 0x7ff << 0,
+ MAX_VERT_OUT_shift = 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_0 = 0x00028b44,
+ VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_shift = 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_1 = 0x00028b48,
+ VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_shift = 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_2 = 0x00028b4c,
+ VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_shift = 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_3 = 0x00028b50,
+ VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_shift = 0,
+ VGT_SHADER_STAGES_EN = 0x00028b54,
+ LS_EN_mask = 0x03 << 0,
+ LS_EN_shift = 0,
+ LS_STAGE_OFF = 0x00,
+ LS_STAGE_ON = 0x01,
+ CS_STAGE_ON = 0x02,
+ HS_EN_bit = 1 << 2,
+ ES_EN_mask = 0x03 << 3,
+ ES_EN_shift = 3,
+ ES_STAGE_OFF = 0x00,
+ ES_STAGE_DS = 0x01,
+ ES_STAGE_REAL = 0x02,
+ GS_EN_bit = 1 << 5,
+ VS_EN_mask = 0x03 << 6,
+ VS_EN_shift = 6,
+ VS_STAGE_REAL = 0x00,
+ VS_STAGE_DS = 0x01,
+ VS_STAGE_COPY_SHADER = 0x02,
+ VGT_LS_HS_CONFIG = 0x00028b58,
+ NUM_PATCHES_mask = 0xff << 0,
+ NUM_PATCHES_shift = 0,
+ HS_NUM_INPUT_CP_mask = 0x3f << 8,
+ HS_NUM_INPUT_CP_shift = 8,
+ HS_NUM_OUTPUT_CP_mask = 0x3f << 14,
+ HS_NUM_OUTPUT_CP_shift = 14,
+ VGT_LS_SIZE = 0x00028b5c,
+ VGT_LS_SIZE__SIZE_mask = 0xff << 0,
+ VGT_LS_SIZE__SIZE_shift = 0,
+ PATCH_CP_SIZE_mask = 0x1fff << 8,
+ PATCH_CP_SIZE_shift = 8,
+ VGT_HS_SIZE = 0x00028b60,
+ VGT_HS_SIZE__SIZE_mask = 0xff << 0,
+ VGT_HS_SIZE__SIZE_shift = 0,
+/* PATCH_CP_SIZE_mask = 0x1fff << 8, */
+/* PATCH_CP_SIZE_shift = 8, */
+ VGT_LS_HS_ALLOC = 0x00028b64,
+ HS_TOTAL_OUTPUT_mask = 0x1fff << 0,
+ HS_TOTAL_OUTPUT_shift = 0,
+ LS_HS_TOTAL_OUTPUT_mask = 0x1fff << 13,
+ LS_HS_TOTAL_OUTPUT_shift = 13,
+ VGT_HS_PATCH_CONST = 0x00028b68,
+ VGT_HS_PATCH_CONST__SIZE_mask = 0x1fff << 0,
+ VGT_HS_PATCH_CONST__SIZE_shift = 0,
+ VGT_HS_PATCH_CONST__STRIDE_mask = 0x1fff << 13,
+ VGT_HS_PATCH_CONST__STRIDE_shift = 13,
+ DB_ALPHA_TO_MASK = 0x00028b70,
+ ALPHA_TO_MASK_ENABLE_bit = 1 << 0,
+ ALPHA_TO_MASK_OFFSET0_mask = 0x03 << 8,
+ ALPHA_TO_MASK_OFFSET0_shift = 8,
+ ALPHA_TO_MASK_OFFSET1_mask = 0x03 << 10,
+ ALPHA_TO_MASK_OFFSET1_shift = 10,
+ ALPHA_TO_MASK_OFFSET2_mask = 0x03 << 12,
+ ALPHA_TO_MASK_OFFSET2_shift = 12,
+ ALPHA_TO_MASK_OFFSET3_mask = 0x03 << 14,
+ ALPHA_TO_MASK_OFFSET3_shift = 14,
+ OFFSET_ROUND_bit = 1 << 16,
+ PA_SU_POLY_OFFSET_DB_FMT_CNTL = 0x00028b78,
+ POLY_OFFSET_NEG_NUM_DB_BITS_mask = 0xff << 0,
+ POLY_OFFSET_NEG_NUM_DB_BITS_shift = 0,
+ POLY_OFFSET_DB_IS_FLOAT_FMT_bit = 1 << 8,
+ PA_SU_POLY_OFFSET_CLAMP = 0x00028b7c,
+ PA_SU_POLY_OFFSET_FRONT_SCALE = 0x00028b80,
+ PA_SU_POLY_OFFSET_FRONT_OFFSET = 0x00028b84,
+ PA_SU_POLY_OFFSET_BACK_SCALE = 0x00028b88,
+ PA_SU_POLY_OFFSET_BACK_OFFSET = 0x00028b8c,
+ VGT_GS_INSTANCE_CNT = 0x00028b90,
+ VGT_GS_INSTANCE_CNT__ENABLE_bit = 1 << 0,
+ CNT_mask = 0x7f << 2,
+ CNT_shift = 2,
+ VGT_STRMOUT_CONFIG = 0x00028b94,
+ STREAMOUT_0_EN_bit = 1 << 0,
+ STREAMOUT_1_EN_bit = 1 << 1,
+ STREAMOUT_2_EN_bit = 1 << 2,
+ STREAMOUT_3_EN_bit = 1 << 3,
+ RAST_STREAM_mask = 0x07 << 4,
+ RAST_STREAM_shift = 4,
+ VGT_STRMOUT_BUFFER_CONFIG = 0x00028b98,
+ STREAM_0_BUFFER_EN_mask = 0x0f << 0,
+ STREAM_0_BUFFER_EN_shift = 0,
+ STREAM_1_BUFFER_EN_mask = 0x0f << 4,
+ STREAM_1_BUFFER_EN_shift = 4,
+ STREAM_2_BUFFER_EN_mask = 0x0f << 8,
+ STREAM_2_BUFFER_EN_shift = 8,
+ STREAM_3_BUFFER_EN_mask = 0x0f << 12,
+ STREAM_3_BUFFER_EN_shift = 12,
+ CB_IMMED0_BASE = 0x00028b9c,
+ CB_IMMED0_BASE_num = 12,
+ PA_SC_LINE_CNTL = 0x00028c00,
+ EXPAND_LINE_WIDTH_bit = 1 << 9,
+ LAST_PIXEL_bit = 1 << 10,
+ PERPENDICULAR_ENDCAP_ENA_bit = 1 << 11,
+ DX10_DIAMOND_TEST_ENA_bit = 1 << 12,
+ PA_SC_AA_CONFIG = 0x00028c04,
+ MSAA_NUM_SAMPLES_mask = 0x03 << 0,
+ MSAA_NUM_SAMPLES_shift = 0,
+ AA_MASK_CENTROID_DTMN_bit = 1 << 4,
+ MAX_SAMPLE_DIST_mask = 0x0f << 13,
+ MAX_SAMPLE_DIST_shift = 13,
+ PA_SU_VTX_CNTL = 0x00028c08,
+ PIX_CENTER_bit = 1 << 0,
+ PA_SU_VTX_CNTL__ROUND_MODE_mask = 0x03 << 1,
+ PA_SU_VTX_CNTL__ROUND_MODE_shift = 1,
+ X_TRUNCATE = 0x00,
+ X_ROUND = 0x01,
+ X_ROUND_TO_EVEN = 0x02,
+ X_ROUND_TO_ODD = 0x03,
+ QUANT_MODE_mask = 0x07 << 3,
+ QUANT_MODE_shift = 3,
+ X_1_16TH = 0x00,
+ X_1_8TH = 0x01,
+ X_1_4TH = 0x02,
+ X_1_2 = 0x03,
+ X_1 = 0x04,
+ X_1_256TH = 0x05,
+ X_1_1024TH = 0x06,
+ X_1_4096TH = 0x07,
+ PA_CL_GB_VERT_CLIP_ADJ = 0x00028c0c,
+ PA_CL_GB_VERT_DISC_ADJ = 0x00028c10,
+ PA_CL_GB_HORZ_CLIP_ADJ = 0x00028c14,
+ PA_CL_GB_HORZ_DISC_ADJ = 0x00028c18,
+ PA_SC_AA_SAMPLE_LOCS_0 = 0x00028c1c,
+ S0_X_mask = 0x0f << 0,
+ S0_X_shift = 0,
+ S0_Y_mask = 0x0f << 4,
+ S0_Y_shift = 4,
+ S1_X_mask = 0x0f << 8,
+ S1_X_shift = 8,
+ S1_Y_mask = 0x0f << 12,
+ S1_Y_shift = 12,
+ S2_X_mask = 0x0f << 16,
+ S2_X_shift = 16,
+ S2_Y_mask = 0x0f << 20,
+ S2_Y_shift = 20,
+ S3_X_mask = 0x0f << 24,
+ S3_X_shift = 24,
+ S3_Y_mask = 0x0f << 28,
+ S3_Y_shift = 28,
+ PA_SC_AA_SAMPLE_LOCS_1 = 0x00028c20,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+/* S2_X_mask = 0x0f << 16, */
+/* S2_X_shift = 16, */
+/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_shift = 20, */
+/* S3_X_mask = 0x0f << 24, */
+/* S3_X_shift = 24, */
+/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_shift = 28, */
+ PA_SC_AA_SAMPLE_LOCS_2 = 0x00028c24,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+/* S2_X_mask = 0x0f << 16, */
+/* S2_X_shift = 16, */
+/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_shift = 20, */
+/* S3_X_mask = 0x0f << 24, */
+/* S3_X_shift = 24, */
+/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_shift = 28, */
+ PA_SC_AA_SAMPLE_LOCS_3 = 0x00028c28,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+/* S2_X_mask = 0x0f << 16, */
+/* S2_X_shift = 16, */
+/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_shift = 20, */
+/* S3_X_mask = 0x0f << 24, */
+/* S3_X_shift = 24, */
+/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_shift = 28, */
+ PA_SC_AA_SAMPLE_LOCS_4 = 0x00028c2c,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+/* S2_X_mask = 0x0f << 16, */
+/* S2_X_shift = 16, */
+/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_shift = 20, */
+/* S3_X_mask = 0x0f << 24, */
+/* S3_X_shift = 24, */
+/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_shift = 28, */
+ PA_SC_AA_SAMPLE_LOCS_5 = 0x00028c30,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+/* S2_X_mask = 0x0f << 16, */
+/* S2_X_shift = 16, */
+/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_shift = 20, */
+/* S3_X_mask = 0x0f << 24, */
+/* S3_X_shift = 24, */
+/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_shift = 28, */
+ PA_SC_AA_SAMPLE_LOCS_6 = 0x00028c34,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+/* S2_X_mask = 0x0f << 16, */
+/* S2_X_shift = 16, */
+/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_shift = 20, */
+/* S3_X_mask = 0x0f << 24, */
+/* S3_X_shift = 24, */
+/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_shift = 28, */
+ PA_SC_AA_SAMPLE_LOCS_7 = 0x00028c38,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+/* S2_X_mask = 0x0f << 16, */
+/* S2_X_shift = 16, */
+/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_shift = 20, */
+/* S3_X_mask = 0x0f << 24, */
+/* S3_X_shift = 24, */
+/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_shift = 28, */
+ PA_SC_AA_MASK = 0x00028c3c,
+ VGT_VERTEX_REUSE_BLOCK_CNTL = 0x00028c58,
+ VTX_REUSE_DEPTH_mask = 0xff << 0,
+ VTX_REUSE_DEPTH_shift = 0,
+ VGT_OUT_DEALLOC_CNTL = 0x00028c5c,
+ DEALLOC_DIST_mask = 0x7f << 0,
+ DEALLOC_DIST_shift = 0,
+ CB_COLOR0_BASE = 0x00028c60,
+ CB_COLOR0_BASE_num = 12,
+ CB_COLOR0_BASE_offset = 51,
+ CB_COLOR0_PITCH = 0x00028c64,
+ CB_COLOR0_PITCH_num = 12,
+ CB_COLOR0_PITCH_offset = 51,
+ CB_COLOR0_PITCH__TILE_MAX_mask = 0x7ff << 0,
+ CB_COLOR0_PITCH__TILE_MAX_shift = 0,
+ CB_COLOR0_SLICE = 0x00028c68,
+ CB_COLOR0_SLICE_num = 12,
+ CB_COLOR0_SLICE_offset = 51,
+ CB_COLOR0_SLICE__TILE_MAX_mask = 0x3fffff << 0,
+ CB_COLOR0_SLICE__TILE_MAX_shift = 0,
+ CB_COLOR0_VIEW = 0x00028c6c,
+ CB_COLOR0_VIEW_num = 12,
+ CB_COLOR0_VIEW_offset = 51,
+/* SLICE_START_mask = 0x7ff << 0, */
+/* SLICE_START_shift = 0, */
+/* SLICE_MAX_mask = 0x7ff << 13, */
+/* SLICE_MAX_shift = 13, */
+ CB_COLOR0_INFO = 0x00028c70,
+ CB_COLOR0_INFO_num = 12,
+ CB_COLOR0_INFO_offset = 51,
+ ENDIAN_mask = 0x03 << 0,
+ ENDIAN_shift = 0,
+ ENDIAN_NONE = 0x00,
+ ENDIAN_8IN16 = 0x01,
+ ENDIAN_8IN32 = 0x02,
+ ENDIAN_8IN64 = 0x03,
+ CB_COLOR0_INFO__FORMAT_mask = 0x3f << 2,
+ CB_COLOR0_INFO__FORMAT_shift = 2,
+ COLOR_INVALID = 0x00,
+ COLOR_8 = 0x01,
+ COLOR_16 = 0x05,
+ COLOR_16_FLOAT = 0x06,
+ COLOR_8_8 = 0x07,
+ COLOR_5_6_5 = 0x08,
+ COLOR_1_5_5_5 = 0x0a,
+ COLOR_4_4_4_4 = 0x0b,
+ COLOR_5_5_5_1 = 0x0c,
+ COLOR_32 = 0x0d,
+ COLOR_32_FLOAT = 0x0e,
+ COLOR_16_16 = 0x0f,
+ COLOR_16_16_FLOAT = 0x10,
+ COLOR_8_24 = 0x11,
+ COLOR_24_8 = 0x13,
+ COLOR_10_11_11 = 0x15,
+ COLOR_10_11_11_FLOAT = 0x16,
+ COLOR_2_10_10_10 = 0x19,
+ COLOR_8_8_8_8 = 0x1a,
+ COLOR_10_10_10_2 = 0x1b,
+ COLOR_X24_8_32_FLOAT = 0x1c,
+ COLOR_32_32 = 0x1d,
+ COLOR_32_32_FLOAT = 0x1e,
+ COLOR_16_16_16_16 = 0x1f,
+ COLOR_16_16_16_16_FLOAT = 0x20,
+ COLOR_32_32_32_32 = 0x22,
+ COLOR_32_32_32_32_FLOAT = 0x23,
+ CB_COLOR0_INFO__ARRAY_MODE_mask = 0x0f << 8,
+ CB_COLOR0_INFO__ARRAY_MODE_shift = 8,
+/* ARRAY_LINEAR_GENERAL = 0x00, */
+/* ARRAY_LINEAR_ALIGNED = 0x01, */
+/* ARRAY_1D_TILED_THIN1 = 0x02, */
+/* ARRAY_2D_TILED_THIN1 = 0x04, */
+ NUMBER_TYPE_mask = 0x07 << 12,
+ NUMBER_TYPE_shift = 12,
+ NUMBER_UNORM = 0x00,
+ NUMBER_SNORM = 0x01,
+ NUMBER_UINT = 0x04,
+ NUMBER_SINT = 0x05,
+ NUMBER_SRGB = 0x06,
+ NUMBER_FLOAT = 0x07,
+ COMP_SWAP_mask = 0x03 << 15,
+ COMP_SWAP_shift = 15,
+ SWAP_STD = 0x00,
+ SWAP_ALT = 0x01,
+ SWAP_STD_REV = 0x02,
+ SWAP_ALT_REV = 0x03,
+ FAST_CLEAR_bit = 1 << 17,
+ COMPRESSION_bit = 1 << 18,
+ BLEND_CLAMP_bit = 1 << 19,
+ BLEND_BYPASS_bit = 1 << 20,
+ SIMPLE_FLOAT_bit = 1 << 21,
+ CB_COLOR0_INFO__ROUND_MODE_bit = 1 << 22,
+ CB_COLOR0_INFO__TILE_COMPACT_bit = 1 << 23,
+ SOURCE_FORMAT_mask = 0x03 << 24,
+ SOURCE_FORMAT_shift = 24,
+ EXPORT_4C_32BPC = 0x00,
+ EXPORT_4C_16BPC = 0x01,
+ RAT_bit = 1 << 26,
+ RESOURCE_TYPE_mask = 0x07 << 27,
+ RESOURCE_TYPE_shift = 27,
+ BUFFER = 0x00,
+ TEXTURE1D = 0x01,
+ TEXTURE1DARRAY = 0x02,
+ TEXTURE2D = 0x03,
+ TEXTURE2DARRAY = 0x04,
+ TEXTURE3D = 0x05,
+ CB_COLOR0_ATTRIB = 0x00028c74,
+ CB_COLOR0_ATTRIB_num = 12,
+ CB_COLOR0_ATTRIB_offset = 51,
+ IGNORE_SHADER_ENGINE_TILING_bit = 1 << 3,
+ CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit = 1 << 4,
+ CB_COLOR0_ATTRIB__TILE_SPLIT_mask = 0x0f << 5,
+ CB_COLOR0_ATTRIB__TILE_SPLIT_shift = 5,
+/* ADDR_SURF_TILE_SPLIT_64B = 0x00, */
+/* ADDR_SURF_TILE_SPLIT_128B = 0x01, */
+/* ADDR_SURF_TILE_SPLIT_256B = 0x02, */
+/* ADDR_SURF_TILE_SPLIT_512B = 0x03, */
+/* ADDR_SURF_TILE_SPLIT_1KB = 0x04, */
+/* ADDR_SURF_TILE_SPLIT_2KB = 0x05, */
+/* ADDR_SURF_TILE_SPLIT_4KB = 0x06, */
+ CB_COLOR0_ATTRIB__NUM_BANKS_mask = 0x03 << 10,
+ CB_COLOR0_ATTRIB__NUM_BANKS_shift = 10,
+/* ADDR_SURF_2_BANK = 0x00, */
+/* ADDR_SURF_4_BANK = 0x01, */
+/* ADDR_SURF_8_BANK = 0x02, */
+/* ADDR_SURF_16_BANK = 0x03, */
+ CB_COLOR0_ATTRIB__BANK_WIDTH_mask = 0x03 << 13,
+ CB_COLOR0_ATTRIB__BANK_WIDTH_shift = 13,
+/* ADDR_SURF_BANK_WIDTH_1 = 0x00, */
+/* ADDR_SURF_BANK_WIDTH_2 = 0x01, */
+/* ADDR_SURF_BANK_WIDTH_4 = 0x02, */
+/* ADDR_SURF_BANK_WIDTH_8 = 0x03, */
+ CB_COLOR0_ATTRIB__BANK_HEIGHT_mask = 0x03 << 16,
+ CB_COLOR0_ATTRIB__BANK_HEIGHT_shift = 16,
+/* ADDR_SURF_BANK_HEIGHT_1 = 0x00, */
+/* ADDR_SURF_BANK_HEIGHT_2 = 0x01, */
+/* ADDR_SURF_BANK_HEIGHT_4 = 0x02, */
+/* ADDR_SURF_BANK_HEIGHT_8 = 0x03, */
+ CB_COLOR0_ATTRIB__MACRO_TILE_ASPECT_mask = 0x03 << 19,
+ CB_COLOR0_ATTRIB__MACRO_TILE_ASPECT_shift = 19,
+/* ADDR_SURF_MACRO_ASPECT_1 = 0x00, */
+/* ADDR_SURF_MACRO_ASPECT_2 = 0x01, */
+/* ADDR_SURF_MACRO_ASPECT_4 = 0x02, */
+/* ADDR_SURF_MACRO_ASPECT_8 = 0x03, */
+ FMASK_BANK_HEIGHT_mask = 0x03 << 22,
+ FMASK_BANK_HEIGHT_shift = 22,
+/* ADDR_SURF_BANK_HEIGHT_1 = 0x00, */
+/* ADDR_SURF_BANK_HEIGHT_2 = 0x01, */
+/* ADDR_SURF_BANK_HEIGHT_4 = 0x02, */
+/* ADDR_SURF_BANK_HEIGHT_8 = 0x03, */
+ CB_COLOR0_DIM = 0x00028c78,
+ CB_COLOR0_DIM_num = 12,
+ CB_COLOR0_DIM_offset = 51,
+ WIDTH_MAX_mask = 0xffff << 0,
+ WIDTH_MAX_shift = 0,
+ HEIGHT_MAX_mask = 0xffff << 16,
+ HEIGHT_MAX_shift = 16,
+ CB_COLOR0_CMASK = 0x00028c7c,
+ CB_COLOR0_CMASK_num = 8,
+ CB_COLOR0_CMASK_offset = 60,
+ CB_COLOR0_CMASK_SLICE = 0x00028c80,
+ CB_COLOR0_CMASK_SLICE_num = 8,
+ CB_COLOR0_CMASK_SLICE_offset = 60,
+ CB_COLOR0_CMASK_SLICE__TILE_MAX_mask = 0x3fff << 0,
+ CB_COLOR0_CMASK_SLICE__TILE_MAX_shift = 0,
+ CB_COLOR0_FMASK = 0x00028c84,
+ CB_COLOR0_FMASK_num = 8,
+ CB_COLOR0_FMASK_offset = 60,
+ CB_COLOR0_FMASK_SLICE = 0x00028c88,
+ CB_COLOR0_FMASK_SLICE_num = 8,
+ CB_COLOR0_FMASK_SLICE_offset = 60,
+ CB_COLOR0_FMASK_SLICE__TILE_MAX_mask = 0x3fffff << 0,
+ CB_COLOR0_FMASK_SLICE__TILE_MAX_shift = 0,
+ CB_COLOR0_CLEAR_WORD0 = 0x00028c8c,
+ CB_COLOR0_CLEAR_WORD0_num = 8,
+ CB_COLOR0_CLEAR_WORD0_offset = 60,
+ CB_COLOR0_CLEAR_WORD1 = 0x00028c90,
+ CB_COLOR0_CLEAR_WORD1_num = 8,
+ CB_COLOR0_CLEAR_WORD1_offset = 60,
+ CB_COLOR0_CLEAR_WORD2 = 0x00028c94,
+ CB_COLOR0_CLEAR_WORD2_num = 8,
+ CB_COLOR0_CLEAR_WORD2_offset = 60,
+ CB_COLOR0_CLEAR_WORD3 = 0x00028c98,
+ CB_COLOR0_CLEAR_WORD3_num = 8,
+ CB_COLOR0_CLEAR_WORD3_offset = 60,
+ SQ_ALU_CONST_CACHE_HS_0 = 0x00028f00,
+ SQ_ALU_CONST_CACHE_HS_0_num = 16,
+ SQ_ALU_CONST_CACHE_LS_0 = 0x00028f40,
+ SQ_ALU_CONST_CACHE_LS_0_num = 16,
+ SQ_ALU_CONST_BUFFER_SIZE_HS_0 = 0x00028f80,
+ SQ_ALU_CONST_BUFFER_SIZE_HS_0_num = 16,
+ SQ_ALU_CONST_BUFFER_SIZE_HS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_HS_0__DATA_shift = 0,
+ SQ_ALU_CONST_BUFFER_SIZE_LS_0 = 0x00028fc0,
+ SQ_ALU_CONST_BUFFER_SIZE_LS_0_num = 16,
+ SQ_ALU_CONST_BUFFER_SIZE_LS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_LS_0__DATA_shift = 0,
+ SQ_VTX_CONSTANT_WORD0_0 = 0x00030000,
+ SQ_TEX_RESOURCE_WORD0_0 = 0x00030000,
+ DIM_mask = 0x07 << 0,
+ DIM_shift = 0,
+ SQ_TEX_DIM_1D = 0x00,
+ SQ_TEX_DIM_2D = 0x01,
+ SQ_TEX_DIM_3D = 0x02,
+ SQ_TEX_DIM_CUBEMAP = 0x03,
+ SQ_TEX_DIM_1D_ARRAY = 0x04,
+ SQ_TEX_DIM_2D_ARRAY = 0x05,
+ SQ_TEX_DIM_2D_MSAA = 0x06,
+ SQ_TEX_DIM_2D_ARRAY_MSAA = 0x07,
+/* IGNORE_SHADER_ENGINE_TILING_bit = 1 << 3, */
+ SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit= 1 << 5,
+ PITCH_mask = 0xfff << 6,
+ PITCH_shift = 6,
+ TEX_WIDTH_mask = 0x3fff << 18,
+ TEX_WIDTH_shift = 18,
+ SQ_VTX_CONSTANT_WORD1_0 = 0x00030004,
+ SQ_TEX_RESOURCE_WORD1_0 = 0x00030004,
+ TEX_HEIGHT_mask = 0x3fff << 0,
+ TEX_HEIGHT_shift = 0,
+ TEX_DEPTH_mask = 0x1fff << 14,
+ TEX_DEPTH_shift = 14,
+ SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_mask = 0x0f << 28,
+ SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift = 28,
+ SQ_VTX_CONSTANT_WORD2_0 = 0x00030008,
+ BASE_ADDRESS_HI_mask = 0xff << 0,
+ BASE_ADDRESS_HI_shift = 0,
+ SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask = 0x7ff << 8,
+ SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift = 8,
+ SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit = 1 << 19,
+ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20,
+ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift = 20,
+ SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask = 0x03 << 26,
+ SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift = 26,
+/* SQ_NUM_FORMAT_NORM = 0x00, */
+/* SQ_NUM_FORMAT_INT = 0x01, */
+/* SQ_NUM_FORMAT_SCALED = 0x02, */
+ SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit = 1 << 28,
+ SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit = 1 << 29,
+ SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_mask = 0x03 << 30,
+ SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift = 30,
+/* SQ_ENDIAN_NONE = 0x00, */
+/* SQ_ENDIAN_8IN16 = 0x01, */
+/* SQ_ENDIAN_8IN32 = 0x02, */
+ SQ_TEX_RESOURCE_WORD2_0 = 0x00030008,
+ SQ_VTX_CONSTANT_WORD3_0 = 0x0003000c,
+ SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit = 1 << 2,
+ SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_mask = 0x07 << 3,
+ SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift = 3,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_mask = 0x07 << 6,
+ SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift = 6,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_mask = 0x07 << 9,
+ SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift = 9,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_mask = 0x07 << 12,
+ SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift = 12,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_TEX_RESOURCE_WORD3_0 = 0x0003000c,
+ SQ_TEX_RESOURCE_WORD4_0 = 0x00030010,
+ FORMAT_COMP_X_mask = 0x03 << 0,
+ FORMAT_COMP_X_shift = 0,
+ SQ_FORMAT_COMP_UNSIGNED = 0x00,
+ SQ_FORMAT_COMP_SIGNED = 0x01,
+ SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02,
+ FORMAT_COMP_Y_mask = 0x03 << 2,
+ FORMAT_COMP_Y_shift = 2,
+/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */
+/* SQ_FORMAT_COMP_SIGNED = 0x01, */
+/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */
+ FORMAT_COMP_Z_mask = 0x03 << 4,
+ FORMAT_COMP_Z_shift = 4,
+/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */
+/* SQ_FORMAT_COMP_SIGNED = 0x01, */
+/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */
+ FORMAT_COMP_W_mask = 0x03 << 6,
+ FORMAT_COMP_W_shift = 6,
+/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */
+/* SQ_FORMAT_COMP_SIGNED = 0x01, */
+/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */
+ SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask = 0x03 << 8,
+ SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift = 8,
+/* SQ_NUM_FORMAT_NORM = 0x00, */
+/* SQ_NUM_FORMAT_INT = 0x01, */
+/* SQ_NUM_FORMAT_SCALED = 0x02, */
+ SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit = 1 << 10,
+ SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit = 1 << 11,
+ SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask = 0x03 << 12,
+ SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift = 12,
+/* SQ_ENDIAN_NONE = 0x00, */
+/* SQ_ENDIAN_8IN16 = 0x01, */
+/* SQ_ENDIAN_8IN32 = 0x02, */
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask = 0x07 << 16,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift = 16,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask = 0x07 << 19,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift = 19,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask = 0x07 << 22,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift = 22,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask = 0x07 << 25,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift = 25,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ BASE_LEVEL_mask = 0x0f << 28,
+ BASE_LEVEL_shift = 28,
+ SQ_VTX_CONSTANT_WORD4_0 = 0x00030010,
+ SQ_TEX_RESOURCE_WORD5_0 = 0x00030014,
+ LAST_LEVEL_mask = 0x0f << 0,
+ LAST_LEVEL_shift = 0,
+ BASE_ARRAY_mask = 0x1fff << 4,
+ BASE_ARRAY_shift = 4,
+ LAST_ARRAY_mask = 0x1fff << 17,
+ LAST_ARRAY_shift = 17,
+ SQ_TEX_RESOURCE_WORD6_0 = 0x00030018,
+ PERF_MODULATION_mask = 0x07 << 3,
+ PERF_MODULATION_shift = 3,
+ INTERLACED_bit = 1 << 6,
+ SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_mask = 0xfff << 8,
+ SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift = 8,
+ SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_mask = 0x07 << 29,
+ SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_shift = 29,
+ SQ_ADDR_SURF_TILE_SPLIT_64B = 0x00,
+ SQ_ADDR_SURF_TILE_SPLIT_128B = 0x01,
+ SQ_ADDR_SURF_TILE_SPLIT_256B = 0x02,
+ SQ_ADDR_SURF_TILE_SPLIT_512B = 0x03,
+ SQ_ADDR_SURF_TILE_SPLIT_1KB = 0x04,
+ SQ_ADDR_SURF_TILE_SPLIT_2KB = 0x05,
+ SQ_ADDR_SURF_TILE_SPLIT_4KB = 0x06,
+ SQ_VTX_CONSTANT_WORD7_0 = 0x0003001c,
+ SQ_VTX_CONSTANT_WORD7_0__TYPE_mask = 0x03 << 30,
+ SQ_VTX_CONSTANT_WORD7_0__TYPE_shift = 30,
+ SQ_TEX_VTX_INVALID_TEXTURE = 0x00,
+ SQ_TEX_VTX_INVALID_BUFFER = 0x01,
+ SQ_TEX_VTX_VALID_TEXTURE = 0x02,
+ SQ_TEX_VTX_VALID_BUFFER = 0x03,
+ SQ_TEX_RESOURCE_WORD7_0 = 0x0003001c,
+ SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask = 0x3f << 0,
+ SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift = 0,
+ SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_mask = 0x03 << 6,
+ SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_shift = 6,
+ SQ_ADDR_SURF_MACRO_ASPECT_1 = 0x00,
+ SQ_ADDR_SURF_MACRO_ASPECT_2 = 0x01,
+ SQ_ADDR_SURF_MACRO_ASPECT_4 = 0x02,
+ SQ_ADDR_SURF_MACRO_ASPECT_8 = 0x03,
+ SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_mask = 0x03 << 8,
+ SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_shift = 8,
+ SQ_ADDR_SURF_BANK_WH_1 = 0x00,
+ SQ_ADDR_SURF_BANK_WH_2 = 0x01,
+ SQ_ADDR_SURF_BANK_WH_4 = 0x02,
+ SQ_ADDR_SURF_BANK_WH_8 = 0x03,
+ SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_mask = 0x03 << 10,
+ SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_shift = 10,
+/* SQ_ADDR_SURF_BANK_WH_1 = 0x00, */
+/* SQ_ADDR_SURF_BANK_WH_2 = 0x01, */
+/* SQ_ADDR_SURF_BANK_WH_4 = 0x02, */
+/* SQ_ADDR_SURF_BANK_WH_8 = 0x03, */
+ DEPTH_SAMPLE_ORDER_bit = 1 << 15,
+ SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_mask = 0x03 << 16,
+ SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_shift = 16,
+ SQ_ADDR_SURF_2_BANK = 0x00,
+ SQ_ADDR_SURF_4_BANK = 0x01,
+ SQ_ADDR_SURF_8_BANK = 0x02,
+ SQ_ADDR_SURF_16_BANK = 0x03,
+ SQ_TEX_RESOURCE_WORD7_0__TYPE_mask = 0x03 << 30,
+ SQ_TEX_RESOURCE_WORD7_0__TYPE_shift = 30,
+/* SQ_TEX_VTX_INVALID_TEXTURE = 0x00, */
+/* SQ_TEX_VTX_INVALID_BUFFER = 0x01, */
+/* SQ_TEX_VTX_VALID_TEXTURE = 0x02, */
+/* SQ_TEX_VTX_VALID_BUFFER = 0x03, */
+ SQ_LOOP_CONST_DX10_0 = 0x0003a200,
+ SQ_LOOP_CONST_0 = 0x0003a200,
+ SQ_LOOP_CONST_0__COUNT_mask = 0xfff << 0,
+ SQ_LOOP_CONST_0__COUNT_shift = 0,
+ INIT_mask = 0xfff << 12,
+ INIT_shift = 12,
+ INC_mask = 0xff << 24,
+ INC_shift = 24,
+ SQ_JUMPTABLE_CONST_0 = 0x0003a200,
+ CONST_A_mask = 0xff << 0,
+ CONST_A_shift = 0,
+ CONST_B_mask = 0xff << 8,
+ CONST_B_shift = 8,
+ CONST_C_mask = 0xff << 16,
+ CONST_C_shift = 16,
+ CONST_D_mask = 0xff << 24,
+ CONST_D_shift = 24,
+ SQ_BOOL_CONST_0 = 0x0003a500,
+ SQ_BOOL_CONST_0_num = 6,
+ SQ_TEX_SAMPLER_WORD0_0 = 0x0003c000,
+ SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask = 0x07 << 0,
+ SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift = 0,
+ SQ_TEX_WRAP = 0x00,
+ SQ_TEX_MIRROR = 0x01,
+ SQ_TEX_CLAMP_LAST_TEXEL = 0x02,
+ SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03,
+ SQ_TEX_CLAMP_HALF_BORDER = 0x04,
+ SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05,
+ SQ_TEX_CLAMP_BORDER = 0x06,
+ SQ_TEX_MIRROR_ONCE_BORDER = 0x07,
+ CLAMP_Y_mask = 0x07 << 3,
+ CLAMP_Y_shift = 3,
+/* SQ_TEX_WRAP = 0x00, */
+/* SQ_TEX_MIRROR = 0x01, */
+/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */
+/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */
+/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */
+/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */
+/* SQ_TEX_CLAMP_BORDER = 0x06, */
+/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */
+ CLAMP_Z_mask = 0x07 << 6,
+ CLAMP_Z_shift = 6,
+/* SQ_TEX_WRAP = 0x00, */
+/* SQ_TEX_MIRROR = 0x01, */
+/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */
+/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */
+/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */
+/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */
+/* SQ_TEX_CLAMP_BORDER = 0x06, */
+/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */
+ XY_MAG_FILTER_mask = 0x03 << 9,
+ XY_MAG_FILTER_shift = 9,
+ SQ_TEX_XY_FILTER_POINT = 0x00,
+ SQ_TEX_XY_FILTER_BILINEAR = 0x01,
+ XY_MIN_FILTER_mask = 0x03 << 11,
+ XY_MIN_FILTER_shift = 11,
+/* SQ_TEX_XY_FILTER_POINT = 0x00, */
+/* SQ_TEX_XY_FILTER_BILINEAR = 0x01, */
+ Z_FILTER_mask = 0x03 << 13,
+ Z_FILTER_shift = 13,
+ SQ_TEX_Z_FILTER_NONE = 0x00,
+ SQ_TEX_Z_FILTER_POINT = 0x01,
+ SQ_TEX_Z_FILTER_LINEAR = 0x02,
+ MIP_FILTER_mask = 0x03 << 15,
+ MIP_FILTER_shift = 15,
+/* SQ_TEX_Z_FILTER_NONE = 0x00, */
+/* SQ_TEX_Z_FILTER_POINT = 0x01, */
+/* SQ_TEX_Z_FILTER_LINEAR = 0x02, */
+ BORDER_COLOR_TYPE_mask = 0x03 << 20,
+ BORDER_COLOR_TYPE_shift = 20,
+ SQ_TEX_BORDER_COLOR_TRANS_BLACK = 0x00,
+ SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x01,
+ SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x02,
+ SQ_TEX_BORDER_COLOR_REGISTER = 0x03,
+ DEPTH_COMPARE_FUNCTION_mask = 0x07 << 22,
+ DEPTH_COMPARE_FUNCTION_shift = 22,
+ SQ_TEX_DEPTH_COMPARE_NEVER = 0x00,
+ SQ_TEX_DEPTH_COMPARE_LESS = 0x01,
+ SQ_TEX_DEPTH_COMPARE_EQUAL = 0x02,
+ SQ_TEX_DEPTH_COMPARE_LESSEQUAL = 0x03,
+ SQ_TEX_DEPTH_COMPARE_GREATER = 0x04,
+ SQ_TEX_DEPTH_COMPARE_NOTEQUAL = 0x05,
+ SQ_TEX_DEPTH_COMPARE_GREATEREQUAL = 0x06,
+ SQ_TEX_DEPTH_COMPARE_ALWAYS = 0x07,
+ CHROMA_KEY_mask = 0x03 << 25,
+ CHROMA_KEY_shift = 25,
+ SQ_TEX_CHROMA_KEY_DISABLED = 0x00,
+ SQ_TEX_CHROMA_KEY_KILL = 0x01,
+ SQ_TEX_CHROMA_KEY_BLEND = 0x02,
+ SQ_TEX_SAMPLER_WORD1_0 = 0x0003c004,
+ SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_mask = 0xfff << 0,
+ SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift = 0,
+ MAX_LOD_mask = 0xfff << 12,
+ MAX_LOD_shift = 12,
+ PERF_MIP_mask = 0x0f << 24,
+ PERF_MIP_shift = 24,
+ PERF_Z_mask = 0x0f << 28,
+ PERF_Z_shift = 28,
+ SQ_TEX_SAMPLER_WORD2_0 = 0x0003c008,
+ SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_mask = 0x3fff << 0,
+ SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift = 0,
+ LOD_BIAS_SEC_mask = 0x3f << 14,
+ LOD_BIAS_SEC_shift = 14,
+ MC_COORD_TRUNCATE_bit = 1 << 20,
+ SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit = 1 << 21,
+ TRUNCATE_COORD_bit = 1 << 28,
+ SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit = 1 << 29,
+ SQ_TEX_SAMPLER_WORD2_0__TYPE_bit = 1 << 31,
+ SQ_VTX_BASE_VTX_LOC = 0x0003cff0,
+ SQ_VTX_START_INST_LOC = 0x0003cff4,
+ SQ_TEX_SAMPLER_CLEAR = 0x0003ff00,
+ SQ_TEX_RESOURCE_CLEAR = 0x0003ff04,
+ SQ_LOOP_BOOL_CLEAR = 0x0003ff08,
+
+} ;
+
+#endif /* _EVERGREEN_REG_AUTO_H */
+
diff --git a/evergreen_shader.c b/evergreen_shader.c
new file mode 100644
index 0000000..8703424
--- /dev/null
+++ b/evergreen_shader.c
@@ -0,0 +1,3155 @@
+/*
+ * Copyright 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Alex Deucher <alexander.deucher@amd.com>
+ *
+ */
+
+#include "evergreen_shader.h"
+#include "evergreen_reg.h"
+
+/* solid vs --------------------------------------- */
+int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_DWORD0(ADDR(4),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_VC),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 1 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* 2 - always export a param whether it's used or not */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(1),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(0));
+ /* 3 - padding */
+ shader[i++] = 0x00000000;
+ shader[i++] = 0x00000000;
+ /* 4/5 */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(0),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+ ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+
+ return i;
+}
+
+/* solid ps --------------------------------------- */
+int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(2),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 1 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(1),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+
+ /* 2 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ /* 3 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ /* 4 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ /* 5 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+
+ return i;
+}
+
+/* copy vs --------------------------------------- */
+int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_DWORD0(ADDR(4),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_VC),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 1 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* 2 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(1),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(0));
+ /* 3 */
+ shader[i++] = 0x00000000;
+ shader[i++] = 0x00000000;
+ /* 4/5 */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(16));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(0),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+ ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+ /* 6/7 */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(8),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+ ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+
+ return i;
+}
+
+/* copy ps --------------------------------------- */
+int evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* CF INST 0 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(3),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* CF INST 1 */
+ shader[i++] = CF_DWORD0(ADDR(8),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_TC),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* CF INST 2 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(1),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+
+ /* 3 interpolate tex coords */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 4 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* 5 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* 6 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 7 */
+ shader[i++] = 0x00000000;
+ shader[i++] = 0x00000000;
+
+ /* 8/9 TEX INST 0 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X), /* R */
+ DST_SEL_Y(SQ_SEL_Y), /* G */
+ DST_SEL_Z(SQ_SEL_Z), /* B */
+ DST_SEL_W(SQ_SEL_W), /* A */
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_UNNORMALIZED),
+ COORD_TYPE_Y(TEX_UNNORMALIZED),
+ COORD_TYPE_Z(TEX_UNNORMALIZED),
+ COORD_TYPE_W(TEX_UNNORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+
+ return i;
+}
+
+int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_DWORD0(ADDR(6),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_VC),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 1 - ALU */
+ shader[i++] = CF_ALU_DWORD0(ADDR(4),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(2),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 2 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(3));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* 3 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(3));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(1),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(0));
+
+
+ /* 4 texX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 5 texY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 6/7 */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(16));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(0),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+ ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+ /* 8/9 */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(8),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+ ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+
+ return i;
+}
+
+int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(5),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 1 */
+ shader[i++] = CF_DWORD0(ADDR(21),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_CALL),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ /* 2 */
+ shader[i++] = CF_DWORD0(ADDR(30),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_NOT_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_CALL),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ /* 3 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(9),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(12),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 4 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(2),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(3));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(1),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* 5 interpolate tex coords */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 6 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* 7 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* 8 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 9,10,11,12 */
+ /* r2.x = MAD(c0.w, r1.x, c0.x) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* r2.y = MAD(c0.w, r1.x, c0.y) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Y),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* r2.z = MAD(c0.w, r1.x, c0.z) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* r2.w = MAD(0, 0, 1) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 13,14,15,16 */
+ /* r2.x = MAD(c1.x, r1.y, pv.x) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* r2.y = MAD(c1.y, r1.y, pv.y) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Y),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* r2.z = MAD(c1.z, r1.y, pv.z) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* r2.w = MAD(0, 0, 1) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+ /* 17,18,19,20 */
+ /* r2.x = MAD(c2.x, r1.z, pv.x) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ /* r2.y = MAD(c2.y, r1.z, pv.y) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Y),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ /* r2.z = MAD(c2.z, r1.z, pv.z) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ /* r2.w = MAD(0, 0, 1) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+
+ /* 21 */
+ shader[i++] = CF_DWORD0(ADDR(24),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(3),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_TC),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 22 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 23 */
+ shader[i++] = 0x00000000;
+ shader[i++] = 0x00000000;
+ /* 24/25 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_MASK),
+ DST_SEL_Z(SQ_SEL_MASK),
+ DST_SEL_W(SQ_SEL_1),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+ /* 26/27 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(1),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_MASK),
+ DST_SEL_Y(SQ_SEL_MASK),
+ DST_SEL_Z(SQ_SEL_X),
+ DST_SEL_W(SQ_SEL_MASK),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(1),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+ /* 28/29 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(2),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_MASK),
+ DST_SEL_Y(SQ_SEL_X),
+ DST_SEL_Z(SQ_SEL_MASK),
+ DST_SEL_W(SQ_SEL_MASK),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(2),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+ /* 30 */
+ shader[i++] = CF_DWORD0(ADDR(32),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_TC),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 31 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 32/33 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_MASK),
+ DST_SEL_Z(SQ_SEL_MASK),
+ DST_SEL_W(SQ_SEL_1),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+ /* 34/35 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(1),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_MASK),
+ DST_SEL_Y(SQ_SEL_X),
+ DST_SEL_Z(SQ_SEL_Y),
+ DST_SEL_W(SQ_SEL_MASK),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(1),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+
+ return i;
+}
+
+/* comp vs --------------------------------------- */
+int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_DWORD0(ADDR(3),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_CALL),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ /* 1 */
+ shader[i++] = CF_DWORD0(ADDR(9),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_NOT_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_CALL),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ /* 2 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(1),
+ CF_INST(SQ_CF_INST_NOP),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 3 - mask sub */
+ shader[i++] = CF_DWORD0(ADDR(44),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(3),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_VC),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 4 - ALU */
+ shader[i++] = CF_ALU_DWORD0(ADDR(14),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(20),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 5 - dst */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(2),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* 6 - src */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_EXPORT),
+ MARK(0),
+ BARRIER(0));
+ /* 7 - mask */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ /* 8 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 9 - non-mask sub */
+ shader[i++] = CF_DWORD0(ADDR(50),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_VC),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 10 - ALU */
+ shader[i++] = CF_ALU_DWORD0(ADDR(34),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(10),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 11 - dst */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
+ BURST_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* 12 - src */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
+ BURST_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(0));
+ /* 13 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 14 srcX.x DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 15 srcX.y DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 16 srcX.z DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 17 srcX.w DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 18 srcY.x DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 19 srcY.y DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 20 srcY.z DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 21 srcY.w DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 22 maskX.x DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 23 maskX.y DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 24 maskX.z DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 25 maskX.w DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 26 maskY.x DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 27 maskY.y DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 28 maskY.z DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 29 maskY.w DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 30 srcX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 31 srcY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 32 maskX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 33 maskY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 34 srcX.x DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 35 srcX.y DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 36 srcX.z DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 37 srcX.w DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 38 srcY.x DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 39 srcY.y DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 40 srcY.z DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 41 srcY.w DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 42 srcX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 43 srcY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* mask vfetch - 44/45 - dst */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(24));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(0),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+ ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+ /* 46/47 - src */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_1),
+ DST_SEL_W(SQ_SEL_0),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(8),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+ ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+ /* 48/49 - mask */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_1),
+ DST_SEL_W(SQ_SEL_0),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(16),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+ ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+
+ /* no mask vfetch - 50/51 - dst */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(16));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(0),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+ ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+ /* 52/53 - src */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_1),
+ DST_SEL_W(SQ_SEL_0),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(8),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+ ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+
+ return i;
+}
+
+/* comp ps --------------------------------------- */
+int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_DWORD0(ADDR(3),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_CALL),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ /* 1 */
+ shader[i++] = CF_DWORD0(ADDR(8),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_NOT_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_CALL),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ /* 2 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(1),
+ CF_INST(SQ_CF_INST_NOP),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 3 - mask sub */
+ shader[i++] = CF_ALU_DWORD0(ADDR(12),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(8),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 4 */
+ shader[i++] = CF_DWORD0(ADDR(28),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_TC),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 5 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(20),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 6 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(2),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* 7 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 8 - non-mask sub */
+ shader[i++] = CF_ALU_DWORD0(ADDR(24),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 9 */
+ shader[i++] = CF_DWORD0(ADDR(32),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_TC),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 10 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+
+ /* 11 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 12 interpolate src tex coords - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 13 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* 14 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* 15 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 16 interpolate mask tex coords */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 17 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* 18 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* 19 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 20 - alu 0 */
+ /* MUL gpr[2].x gpr[0].x gpr[1].x */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ /* 21 - alu 1 */
+ /* MUL gpr[2].y gpr[0].y gpr[1].y */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ /* 22 - alu 2 */
+ /* MUL gpr[2].z gpr[0].z gpr[1].z */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ /* 23 - alu 3 */
+ /* MUL gpr[2].w gpr[0].w gpr[1].w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+
+ /* 24 - interpolate tex coords - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 25 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* 26 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* 27 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 28/29 - src - mask */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(1),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+ /* 30/31 - mask */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(1),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(1),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+
+ /* 32/33 - src - non-mask */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+
+ return i;
+}
+
diff --git a/evergreen_shader.h b/evergreen_shader.h
new file mode 100644
index 0000000..c0a0601
--- /dev/null
+++ b/evergreen_shader.h
@@ -0,0 +1,292 @@
+/*
+ * Evergreen shaders
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Shader macros
+ */
+
+#ifndef __SHADER_H__
+#define __SHADER_H__
+
+#include "radeondemo.h"
+
+/* Oder of instructions: All CF, All ALU, All Tex/Vtx fetches */
+
+
+// CF insts
+// addr
+#define ADDR(x) (x)
+// jumptable
+#define JUMPTABLE_SEL(x) (x)
+// pc
+#define POP_COUNT(x) (x)
+// const
+#define CF_CONST(x) (x)
+// cond
+#define COND(x) (x) // SQ_COND_*
+// count
+#define I_COUNT(x) ((x) ? ((x) - 1) : 0)
+// vpm
+#define VALID_PIXEL_MODE(x) (x)
+// eop
+#define END_OF_PROGRAM(x) (x)
+// cf inst
+#define CF_INST(x) (x) // SQ_CF_INST_*
+// wqm
+#define WHOLE_QUAD_MODE(x) (x)
+// barrier
+#define BARRIER(x) (x)
+//kb0
+#define KCACHE_BANK0(x) (x)
+//kb1
+#define KCACHE_BANK1(x) (x)
+// km0/1
+#define KCACHE_MODE0(x) (x)
+#define KCACHE_MODE1(x) (x) // SQ_CF_KCACHE_*
+//
+#define KCACHE_ADDR0(x) (x)
+#define KCACHE_ADDR1(x) (x)
+
+#define ALT_CONST(x) (x)
+
+#define ARRAY_BASE(x) (x)
+// export pixel
+#define CF_PIXEL_MRT0 0
+#define CF_PIXEL_MRT1 1
+#define CF_PIXEL_MRT2 2
+#define CF_PIXEL_MRT3 3
+#define CF_PIXEL_MRT4 4
+#define CF_PIXEL_MRT5 5
+#define CF_PIXEL_MRT6 6
+#define CF_PIXEL_MRT7 7
+// computed Z
+#define CF_COMPUTED_Z 61
+// export pos
+#define CF_POS0 60
+#define CF_POS1 61
+#define CF_POS2 62
+#define CF_POS3 63
+// export param
+// 0...31
+#define TYPE(x) (x) // SQ_EXPORT_*
+#define RW_GPR(x) (x)
+#define RW_REL(x) (x)
+#define ABSOLUTE 0
+#define RELATIVE 1
+#define INDEX_GPR(x) (x)
+#define ELEM_SIZE(x) (x ? (x - 1) : 0)
+#define BURST_COUNT(x) (x ? (x - 1) : 0)
+#define MARK(x) (x)
+
+// swiz
+#define SRC_SEL_X(x) (x) // SQ_SEL_* each
+#define SRC_SEL_Y(x) (x)
+#define SRC_SEL_Z(x) (x)
+#define SRC_SEL_W(x) (x)
+
+#define CF_DWORD0(addr, jmptbl) cpu_to_le32(((addr) | ((jmptbl) << 24)))
+#define CF_DWORD1(pc, cf_const, cond, count, vpm, eop, cf_inst, wqm, b) \
+ cpu_to_le32((((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | ((count) << 10) | \
+ ((vpm) << 20) | ((eop) << 21) | ((cf_inst) << 22) | ((wqm) << 30) | ((b) << 31)))
+
+#define CF_ALU_DWORD0(addr, kb0, kb1, km0) cpu_to_le32((((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30)))
+#define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, alt_const, cf_inst, wqm, b) \
+ cpu_to_le32((((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \
+ ((count) << 18) | ((alt_const) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31)))
+
+#define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \
+ cpu_to_le32((((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | \
+ ((index_gpr) << 23) | ((es) << 30)))
+#define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, bc, vpm, eop, cf_inst, m, b) \
+ cpu_to_le32((((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | \
+ ((bc) << 16) | ((vpm) << 20) | ((eop) << 21) | ((cf_inst) << 22) | \
+ ((m) << 30) | ((b) << 31)))
+
+// ALU clause insts
+#define SRC0_SEL(x) (x)
+#define SRC1_SEL(x) (x)
+#define SRC2_SEL(x) (x)
+// src[0-2]_sel
+// 0-127 GPR
+// 128-159 kcache constants bank 0
+// 160-191 kcache constants bank 1
+// 192-255 inline const values
+// 256-287 kcache constants bank 2
+// 288-319 kcache constants bank 3
+// 219-255 special SQ_ALU_SRC_* (0, 1, etc.)
+// 488-520 src param space
+#define ALU_SRC_GPR_BASE 0
+#define ALU_SRC_KCACHE0_BASE 128
+#define ALU_SRC_KCACHE1_BASE 160
+#define ALU_SRC_INLINE_K_BASE 192
+#define ALU_SRC_KCACHE2_BASE 256
+#define ALU_SRC_KCACHE3_BASE 288
+#define ALU_SRC_PARAM_BASE 448
+
+#define SRC0_REL(x) (x)
+#define SRC1_REL(x) (x)
+#define SRC2_REL(x) (x)
+// elem
+#define SRC0_ELEM(x) (x)
+#define SRC1_ELEM(x) (x)
+#define SRC2_ELEM(x) (x)
+#define ELEM_X 0
+#define ELEM_Y 1
+#define ELEM_Z 2
+#define ELEM_W 3
+// neg
+#define SRC0_NEG(x) (x)
+#define SRC1_NEG(x) (x)
+#define SRC2_NEG(x) (x)
+// im
+#define INDEX_MODE(x) (x) // SQ_INDEX_*
+// ps
+#define PRED_SEL(x) (x) // SQ_PRED_SEL_*
+// last
+#define LAST(x) (x)
+// abs
+#define SRC0_ABS(x) (x)
+#define SRC1_ABS(x) (x)
+// uem
+#define UPDATE_EXECUTE_MASK(x) (x)
+// up
+#define UPDATE_PRED(x) (x)
+// wm
+#define WRITE_MASK(x) (x)
+// omod
+#define OMOD(x) (x) // SQ_ALU_OMOD_*
+// alu inst
+#define ALU_INST(x) (x) // SQ_ALU_INST_*
+//bs
+#define BANK_SWIZZLE(x) (x) // SQ_ALU_VEC_*
+#define DST_GPR(x) (x)
+#define DST_REL(x) (x)
+#define DST_ELEM(x) (x)
+#define CLAMP(x) (x)
+
+#define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \
+ cpu_to_le32((((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \
+ ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \
+ ((im) << 26) | ((ps) << 29) | ((last) << 31)))
+
+#define ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
+ cpu_to_le32((((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
+ ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \
+ ((dr) << 28) | ((de) << 29) | ((clamp) << 31)))
+
+#define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \
+ cpu_to_le32((((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \
+ ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \
+ ((de) << 29) | ((clamp) << 31)))
+
+// VTX clause insts
+// vxt insts
+#define VTX_INST(x) (x) // SQ_VTX_INST_*
+
+// fetch type
+#define FETCH_TYPE(x) (x) // SQ_VTX_FETCH_*
+
+#define FETCH_WHOLE_QUAD(x) (x)
+#define BUFFER_ID(x) (x)
+#define SRC_GPR(x) (x)
+#define SRC_REL(x) (x)
+#define MEGA_FETCH_COUNT(x) ((x) ? ((x) - 1) : 0)
+
+#define DST_SEL_X(x) (x)
+#define DST_SEL_Y(x) (x)
+#define DST_SEL_Z(x) (x)
+#define DST_SEL_W(x) (x)
+#define USE_CONST_FIELDS(x) (x)
+#define DATA_FORMAT(x) (x)
+// num format
+#define NUM_FORMAT_ALL(x) (x) // SQ_NUM_FORMAT_*
+// format comp
+#define FORMAT_COMP_ALL(x) (x) // SQ_FORMAT_COMP_*
+// sma
+#define SRF_MODE_ALL(x) (x)
+#define SRF_MODE_ZERO_CLAMP_MINUS_ONE 0
+#define SRF_MODE_NO_ZERO 1
+#define OFFSET(x) (x)
+// endian swap
+#define ENDIAN_SWAP(x) (x) // SQ_ENDIAN_*
+#define CONST_BUF_NO_STRIDE(x) (x)
+// mf
+#define MEGA_FETCH(x) (x)
+#define BUFFER_INDEX_MODE(x) (x)
+
+#define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, mfc) \
+ cpu_to_le32((((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \
+ ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26)))
+#define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \
+ cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
+ ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31)))
+#define VTX_DWORD2(offset, es, cbns, mf, alt_const, bim) \
+ cpu_to_le32((((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19) | ((alt_const) << 20) | ((bim) << 21)))
+#define VTX_DWORD_PAD cpu_to_le32(0x00000000)
+
+// TEX clause insts
+// tex insts
+#define TEX_INST(x) (x) // SQ_TEX_INST_*
+#define INST_MOD(x) (x)
+#define FETCH_WHOLE_QUAD(x) (x)
+#define RESOURCE_ID(x) (x)
+#define RESOURCE_INDEX_MODE(x) (x)
+#define SAMPLER_INDEX_MODE(x) (x)
+
+#define LOD_BIAS(x) (x)
+//ct
+#define COORD_TYPE_X(x) (x)
+#define COORD_TYPE_Y(x) (x)
+#define COORD_TYPE_Z(x) (x)
+#define COORD_TYPE_W(x) (x)
+#define TEX_UNNORMALIZED 0
+#define TEX_NORMALIZED 1
+#define OFFSET_X(x) (((int)(x) * 2) & 0x1f) /* 4:1-bits 2's-complement fixed-point: [-8.0..7.5] */
+#define OFFSET_Y(x) (((int)(x) * 2) & 0x1f)
+#define OFFSET_Z(x) (((int)(x) * 2) & 0x1f)
+#define SAMPLER_ID(x) (x)
+
+#define TEX_DWORD0(tex_inst, im, fwq, resource_id, src_gpr, sr, ac, rim, sim) \
+ cpu_to_le32((((tex_inst) << 0) | ((im) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \
+ ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24) | ((rim) << 25) | ((sim) << 27)))
+#define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \
+ cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
+ ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31)))
+#define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \
+ cpu_to_le32((((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \
+ ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29)))
+#define TEX_DWORD_PAD cpu_to_le32(0x00000000)
+
+extern int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* vs);
+extern int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* ps);
+
+extern int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* vs);
+extern int evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* ps);
+
+extern int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader);
+extern int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader);
+
+extern int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* vs);
+extern int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* ps);
+
+#endif
diff --git a/evergreen_state.h b/evergreen_state.h
new file mode 100644
index 0000000..1acb484
--- /dev/null
+++ b/evergreen_state.h
@@ -0,0 +1,329 @@
+/*
+ * Copyright 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Alex Deucher <alexander.deucher@amd.com>
+ *
+ */
+
+#ifndef __EVERGREEN_STATE_H__
+#define __EVERGREEN_STATE_H__
+
+typedef int bool_t;
+
+#define CLEAR(x) memset (&x, 0, sizeof(x))
+
+/* Sequencer / thread handling */
+typedef struct {
+ int ps_prio;
+ int vs_prio;
+ int gs_prio;
+ int es_prio;
+ int hs_prio;
+ int ls_prio;
+ int cs_prio;
+ int num_ps_gprs;
+ int num_vs_gprs;
+ int num_gs_gprs;
+ int num_es_gprs;
+ int num_hs_gprs;
+ int num_ls_gprs;
+ int num_cs_gprs;
+ int num_temp_gprs;
+ int num_ps_threads;
+ int num_vs_threads;
+ int num_gs_threads;
+ int num_es_threads;
+ int num_hs_threads;
+ int num_ls_threads;
+ int num_ps_stack_entries;
+ int num_vs_stack_entries;
+ int num_gs_stack_entries;
+ int num_es_stack_entries;
+ int num_hs_stack_entries;
+ int num_ls_stack_entries;
+} sq_config_t;
+
+/* Color buffer / render target */
+typedef struct {
+ int id;
+ int w;
+ int h;
+ uint64_t base;
+ int format;
+ int endian;
+ int array_mode; // tiling
+ int non_disp_tiling;
+ int number_type;
+ int read_size;
+ int comp_swap;
+ int tile_mode;
+ int blend_clamp;
+ int clear_color;
+ int blend_bypass;
+ int simple_float;
+ int round_mode;
+ int tile_compact;
+ int source_format;
+ int resource_type;
+ int fast_clear;
+ int compression;
+ int rat;
+ /* 2D related CB state */
+ uint32_t pmask;
+ int rop;
+ int blend_enable;
+ uint32_t blendcntl;
+ struct radeon_bo *bo;
+} cb_config_t;
+
+/* Shader */
+typedef struct {
+ uint64_t shader_addr;
+ uint32_t shader_size;
+ int num_gprs;
+ int stack_size;
+ int dx10_clamp;
+ int clamp_consts;
+ int export_mode;
+ int uncached_first_inst;
+ int single_round;
+ int double_round;
+ int allow_sdi;
+ int allow_sd0;
+ int allow_ddi;
+ int allow_ddo;
+ struct radeon_bo *bo;
+} shader_config_t;
+
+/* Shader consts */
+typedef struct {
+ int type;
+ int size_bytes;
+ uint64_t const_addr;
+ struct radeon_bo *bo;
+} const_config_t;
+
+/* Vertex buffer / vtx resource */
+typedef struct {
+ int id;
+ uint64_t vb_addr;
+ uint32_t vtx_num_entries;
+ uint32_t vtx_size_dw;
+ int clamp_x;
+ int format;
+ int num_format_all;
+ int format_comp_all;
+ int srf_mode_all;
+ int endian;
+ int mem_req_size;
+ int dst_sel_x;
+ int dst_sel_y;
+ int dst_sel_z;
+ int dst_sel_w;
+ int uncached;
+ struct radeon_bo *bo;
+} vtx_resource_t;
+
+/* Texture resource */
+typedef struct {
+ int id;
+ int w;
+ int h;
+ int pitch;
+ int depth;
+ int dim;
+ int array_mode;
+ int tile_type;
+ int format;
+ uint64_t base;
+ uint64_t mip_base;
+ uint32_t size;
+ int format_comp_x;
+ int format_comp_y;
+ int format_comp_z;
+ int format_comp_w;
+ int num_format_all;
+ int srf_mode_all;
+ int force_degamma;
+ int endian;
+ int dst_sel_x;
+ int dst_sel_y;
+ int dst_sel_z;
+ int dst_sel_w;
+ int base_level;
+ int last_level;
+ int base_array;
+ int last_array;
+ int perf_modulation;
+ int interlaced;
+ int min_lod;
+ struct radeon_bo *bo;
+ struct radeon_bo *mip_bo;
+} tex_resource_t;
+
+/* Texture sampler */
+typedef struct {
+ int id;
+ /* Clamping */
+ int clamp_x, clamp_y, clamp_z;
+ int border_color;
+ /* Filtering */
+ int xy_mag_filter, xy_min_filter;
+ int z_filter;
+ int mip_filter;
+ bool_t high_precision_filter; /* ? */
+ int perf_mip; /* ? 0-7 */
+ int perf_z; /* ? 3 */
+ /* LoD selection */
+ int min_lod, max_lod; /* 0-0x3ff */
+ int lod_bias; /* 0-0xfff (signed?) */
+ int lod_bias2; /* ? 0-0xfff (signed?) */
+ bool_t lod_uses_minor_axis; /* ? */
+ /* Other stuff */
+ bool_t point_sampling_clamp; /* ? */
+ bool_t tex_array_override; /* ? */
+ bool_t mc_coord_truncate; /* ? */
+ bool_t force_degamma; /* ? */
+ bool_t fetch_4; /* ? */
+ bool_t sample_is_pcf; /* ? */
+ bool_t type; /* ? */
+ int depth_compare; /* only depth textures? */
+ int chroma_key;
+ int truncate_coord;
+ bool_t disable_cube_wrap;
+} tex_sampler_t;
+
+/* Draw command */
+typedef struct {
+ uint32_t prim_type;
+ uint32_t vgt_draw_initiator;
+ uint32_t index_type;
+ uint32_t num_instances;
+ uint32_t num_indices;
+} draw_config_t;
+
+#define BEGIN_BATCH(n) \
+do { \
+ radeon_ddx_cs_start(radeon, (n), __FILE__, __func__, __LINE__); \
+} while(0)
+#define END_BATCH() \
+do { \
+ radeon_cs_end(radeon->cs, __FILE__, __func__, __LINE__); \
+} while(0)
+#define RELOC_BATCH(bo, rd, wd) \
+do { \
+ int _ret; \
+ _ret = radeon_cs_write_reloc(radeon->cs, (bo), (rd), (wd), 0); \
+ if (_ret) ErrorF("reloc emit failure %d (%s %d)\n", _ret, __func__, __LINE__); \
+} while(0)
+#define E32(dword) \
+do { \
+ radeon_cs_write_dword(radeon->cs, (dword)); \
+} while (0)
+
+#define EFLOAT(val) \
+do { \
+ union { float f; uint32_t d; } a; \
+ a.f = (val); \
+ E32(a.d); \
+} while (0)
+
+#define PACK3(cmd, num) \
+do { \
+ E32(RADEON_CP_PACKET3 | ((cmd) << 8) | ((((num) - 1) & 0x3fff) << 16)); \
+} while (0)
+
+/* write num registers, start at reg */
+/* If register falls in a special area, special commands are issued */
+#define PACK0(reg, num) \
+do { \
+ if ((reg) >= SET_CONFIG_REG_offset && (reg) < SET_CONFIG_REG_end) { \
+ PACK3(IT_SET_CONFIG_REG, (num) + 1); \
+ E32(((reg) - SET_CONFIG_REG_offset) >> 2); \
+ } else if ((reg) >= SET_CONTEXT_REG_offset && (reg) < SET_CONTEXT_REG_end) { \
+ PACK3(IT_SET_CONTEXT_REG, (num) + 1); \
+ E32(((reg) - SET_CONTEXT_REG_offset) >> 2); \
+ } else if ((reg) >= SET_RESOURCE_offset && (reg) < SET_RESOURCE_end) { \
+ PACK3(IT_SET_RESOURCE, num + 1); \
+ E32(((reg) - SET_RESOURCE_offset) >> 2); \
+ } else if ((reg) >= SET_SAMPLER_offset && (reg) < SET_SAMPLER_end) { \
+ PACK3(IT_SET_SAMPLER, (num) + 1); \
+ E32((reg - SET_SAMPLER_offset) >> 2); \
+ } else if ((reg) >= SET_CTL_CONST_offset && (reg) < SET_CTL_CONST_end) { \
+ PACK3(IT_SET_CTL_CONST, (num) + 1); \
+ E32(((reg) - SET_CTL_CONST_offset) >> 2); \
+ } else if ((reg) >= SET_LOOP_CONST_offset && (reg) < SET_LOOP_CONST_end) { \
+ PACK3(IT_SET_LOOP_CONST, (num) + 1); \
+ E32(((reg) - SET_LOOP_CONST_offset) >> 2); \
+ } else if ((reg) >= SET_BOOL_CONST_offset && (reg) < SET_BOOL_CONST_end) { \
+ PACK3(IT_SET_BOOL_CONST, (num) + 1); \
+ E32(((reg) - SET_BOOL_CONST_offset) >> 2); \
+ } else { \
+ E32(CP_PACKET0 ((reg), (num) - 1)); \
+ } \
+} while (0)
+
+/* write a single register */
+#define EREG(reg, val) \
+do { \
+ PACK0((reg), 1); \
+ E32((val)); \
+} while (0)
+
+void
+evergreen_start_3d(struct radeon *radeon);
+void
+evergreen_set_render_target(struct radeon *radeon, cb_config_t *cb_conf, uint32_t domain);
+void
+evergreen_set_spi(struct radeon *radeon, int vs_export_count, int num_interp);
+void
+evergreen_fs_setup(struct radeon *radeon, shader_config_t *fs_conf, uint32_t domain);
+void
+evergreen_vs_setup(struct radeon *radeon, shader_config_t *vs_conf, uint32_t domain);
+void
+evergreen_ps_setup(struct radeon *radeon, shader_config_t *ps_conf, uint32_t domain);
+void
+evergreen_set_alu_consts(struct radeon *radeon, const_config_t *const_conf, uint32_t domain);
+void
+evergreen_set_bool_consts(struct radeon *radeon, int offset, uint32_t val);
+void
+evergreen_set_tex_resource(struct radeon *radeon, tex_resource_t *tex_res, uint32_t domain);
+void
+evergreen_set_tex_sampler(struct radeon *radeon, tex_sampler_t *s);
+void
+evergreen_set_screen_scissor(struct radeon *radeon, int x1, int y1, int x2, int y2);
+void
+evergreen_set_vport_scissor(struct radeon *radeon, int id, int x1, int y1, int x2, int y2);
+void
+evergreen_set_generic_scissor(struct radeon *radeon, int x1, int y1, int x2, int y2);
+void
+evergreen_set_window_scissor(struct radeon *radeon, int x1, int y1, int x2, int y2);
+void
+evergreen_set_clip_rect(struct radeon *radeon, int id, int x1, int y1, int x2, int y2);
+void
+evergreen_set_default_state(struct radeon *radeon);
+void
+evergreen_draw_auto(struct radeon *radeon, draw_config_t *draw_conf);
+
+void evergreen_finish_op(struct radeon *radeon, int vtx_size);
+
+#endif
diff --git a/radeon_vbo.c b/radeon_vbo.c
new file mode 100644
index 0000000..f8e14c4
--- /dev/null
+++ b/radeon_vbo.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Dave Airlie <airlied@redhat.com>
+ *
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include "radeondemo.h"
+#include "radeon_bo.h"
+#include "radeon_cs.h"
+#define VBO_SIZE (16*1024)
+
+/* KMS vertex buffer support - for R600 only but could be used on previous gpus */
+
+static struct radeon_bo *radeon_vbo_get_bo(struct radeon *radeon);
+
+void radeon_vbo_put(struct radeon *radeon, struct radeon_vbo_object *vbo)
+{
+
+ if (vbo->vb_bo) {
+ radeon_bo_unmap(vbo->vb_bo);
+ radeon_bo_unref(vbo->vb_bo);
+ vbo->vb_bo = NULL;
+ vbo->vb_total = 0;
+ }
+
+ vbo->vb_offset = 0;
+}
+
+void radeon_vbo_get(struct radeon *radeon, struct radeon_vbo_object *vbo)
+{
+ int ret;
+
+ vbo->vb_bo = radeon_vbo_get_bo(radeon);
+ if (vbo->vb_bo) {
+ radeon_bo_ref(vbo->vb_bo);
+ ret = radeon_bo_map(vbo->vb_bo, 1);
+ if (ret) {
+ ErrorF("Failed to map vb %d\n", ret);
+ exit(-1);
+ }
+ }
+
+ vbo->vb_total = VBO_SIZE;
+ vbo->vb_offset = 0;
+ vbo->vb_start_op = vbo->vb_offset;
+}
+
+/* these functions could migrate to libdrm and
+ be shared with the radeon 3D driver */
+static int radeon_bo_is_idle(struct radeon_bo *bo)
+{
+ uint32_t domain;
+ int ret = radeon_bo_is_busy(bo, &domain);
+ return ret != -EBUSY;
+}
+
+void radeon_vbo_init_lists(struct radeon *radeon)
+{
+ struct radeon_accel_state *accel_state = &radeon->accel_state;
+
+ make_empty_list(&accel_state->bo_free);
+ make_empty_list(&accel_state->bo_wait);
+ make_empty_list(&accel_state->bo_reserved);
+}
+
+void radeon_vbo_free_lists(struct radeon *radeon)
+{
+ struct radeon_accel_state *accel_state = &radeon->accel_state;
+ struct radeon_dma_bo *dma_bo, *temp;
+
+ foreach_s(dma_bo, temp, &accel_state->bo_free) {
+ remove_from_list(dma_bo);
+ radeon_bo_unref(dma_bo->bo);
+ free(dma_bo);
+ }
+
+ foreach_s(dma_bo, temp, &accel_state->bo_wait) {
+ remove_from_list(dma_bo);
+ radeon_bo_unref(dma_bo->bo);
+ free(dma_bo);
+ }
+
+ foreach_s(dma_bo, temp, &accel_state->bo_reserved) {
+ remove_from_list(dma_bo);
+ radeon_bo_unref(dma_bo->bo);
+ free(dma_bo);
+ }
+}
+
+void radeon_vbo_flush_bos(struct radeon *radeon)
+{
+ struct radeon_accel_state *accel_state = &radeon->accel_state;
+ struct radeon_dma_bo *dma_bo, *temp;
+ const int expire_at = ++accel_state->bo_free.expire_counter + DMA_BO_FREE_TIME;
+ const int time = accel_state->bo_free.expire_counter;
+
+ foreach_s(dma_bo, temp, &accel_state->bo_wait) {
+ if (dma_bo->expire_counter == time) {
+ ErrorF("leaking dma buffer\n");
+ while ((dma_bo->bo = radeon_bo_unref(dma_bo->bo))) {}
+ remove_from_list(dma_bo);
+ free(dma_bo);
+ continue;
+ }
+
+ if (!radeon_bo_is_idle(dma_bo->bo))
+ continue;
+
+ if (dma_bo->bo->ptr) {
+ ErrorF("bo with pointer on wait list!\n");
+ continue;
+ }
+
+ remove_from_list(dma_bo);
+ dma_bo->expire_counter = expire_at;
+ insert_at_tail(&accel_state->bo_free, dma_bo);
+ }
+
+ /* move reserved to wait list */
+ foreach_s(dma_bo, temp, &accel_state->bo_reserved) {
+ remove_from_list(dma_bo);
+ dma_bo->expire_counter = expire_at;
+ insert_at_tail(&accel_state->bo_wait, dma_bo);
+ }
+
+ /* free bos that have been unused */
+ foreach_s(dma_bo, temp, &accel_state->bo_free) {
+ if (dma_bo->expire_counter != time)
+ break;
+ /* always keep one hanging around at end */
+ if (at_end(&accel_state->bo_free, dma_bo)) {
+ dma_bo->expire_counter = time + DMA_BO_FREE_TIME;
+ break;
+ }
+
+ remove_from_list(dma_bo);
+ radeon_bo_unref(dma_bo->bo);
+ free(dma_bo);
+ }
+}
+
+static struct radeon_bo *radeon_vbo_get_bo(struct radeon *radeon)
+{
+ struct radeon_accel_state *accel_state = &radeon->accel_state;
+ struct radeon_dma_bo *dma_bo = NULL;
+ struct radeon_bo *bo;
+
+ if (is_empty_list(&accel_state->bo_free)) {
+ dma_bo = calloc(1, sizeof(struct radeon_dma_bo));
+ if (!dma_bo)
+ return NULL;
+
+again_alloc:
+ dma_bo->bo = radeon_bo_open(radeon->bufmgr, 0, VBO_SIZE,
+ 0, RADEON_GEM_DOMAIN_GTT, 0);
+
+ if (!dma_bo->bo) {
+ ErrorF("failure to allocate DMA BO\n");
+ return NULL;
+ }
+ insert_at_head(&accel_state->bo_reserved, dma_bo);
+ } else {
+ dma_bo = last_elem(&accel_state->bo_free);
+ remove_from_list(dma_bo);
+ insert_at_head(&accel_state->bo_reserved, dma_bo);
+ }
+
+ if (is_empty_list(&accel_state->bo_reserved))
+ goto again_alloc;
+
+ bo = first_elem(&accel_state->bo_reserved)->bo;
+
+ /* need a space check */
+ if (radeon_cs_space_check_with_bo(radeon->cs,
+ bo,
+ RADEON_GEM_DOMAIN_GTT, 0))
+ ErrorF("failed to revalidate\n");
+
+ return bo;
+}
+
diff --git a/radeon_vbo.h b/radeon_vbo.h
new file mode 100644
index 0000000..701825c
--- /dev/null
+++ b/radeon_vbo.h
@@ -0,0 +1,45 @@
+
+#ifndef RADEON_VBO_H
+#define RADEON_VBO_H
+
+extern void radeon_vb_no_space(struct radeon *radeon, struct radeon_vbo_object *vbo, int vert_size);
+extern void radeon_vbo_init_lists(struct radeon *radeon);
+extern void radeon_vbo_free_lists(struct radeon *radeon);
+extern void radeon_vbo_flush_bos(struct radeon *radeon);
+extern void radeon_vbo_get(struct radeon *radeon, struct radeon_vbo_object *vbo);
+extern void radeon_vbo_put(struct radeon *radeon, struct radeon_vbo_object *vbo);
+
+static inline void radeon_vbo_check(struct radeon *radeon,
+ struct radeon_vbo_object *vbo,
+ int vert_size)
+{
+
+ if ((vbo->vb_offset + (vbo->verts_per_op * vert_size)) > vbo->vb_total) {
+ radeon_vb_no_space(radeon, vbo, vert_size);
+ }
+}
+
+static inline void *
+radeon_vbo_space(struct radeon *radeon,
+ struct radeon_vbo_object *vbo,
+ int vert_size)
+{
+ void *vb;
+
+ /* we've ran out of space in the vertex buffer - need to get a
+ new one */
+ radeon_vbo_check(radeon, vbo, vert_size);
+
+ vbo->vb_op_vert_size = vert_size;
+ vb = (void*)((char *)vbo->vb_bo->ptr + vbo->vb_offset);
+ return vb;
+}
+
+static inline void radeon_vbo_commit(struct radeon *radeon,
+ struct radeon_vbo_object *vbo)
+{
+
+ vbo->vb_offset += vbo->verts_per_op * vbo->vb_op_vert_size;
+}
+
+#endif
diff --git a/radeondemo.c b/radeondemo.c
new file mode 100644
index 0000000..7c41490
--- /dev/null
+++ b/radeondemo.c
@@ -0,0 +1,251 @@
+#include <sys/types.h>
+#include "xf86drm.h"
+#include "radeon_drm.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include <string.h>
+
+#include "radeondemo.h"
+
+#include "radeon_bo_gem.h"
+#include "radeon_cs_gem.h"
+
+struct radeon _radeon_ctx;
+struct radeon *radeon = &_radeon_ctx;
+
+int run_test(struct radeon *radeon)
+{
+ struct r600_accel_object test1;
+ int size;
+ struct radeon_bo *vram_bo, *gtt_bo;
+
+ test1.pitch = 256;
+ test1.width = 256;
+ test1.height = 256;
+ test1.bpp = 32;
+ test1.domain = RADEON_GEM_DOMAIN_VRAM;
+ test1.tiling_flags = 0;
+
+ size = test1.pitch*test1.height*test1.bpp;
+ test1.bo = radeon_bo_open(radeon->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ if (!test1.bo)
+ return -1;
+
+ do_solid_fill_prepare(radeon, &test1, 0xaa55aa55);
+ evergreen_solid(radeon, 0, 0, test1.width, test1.height);
+ evergreen_finish_op(radeon, 8);
+
+}
+
+int radeon_init(struct radeon *radeon, int fd)
+{
+ radeon->fd = fd;
+
+ {
+ struct drm_radeon_gem_info mminfo;
+
+ if (!drmCommandWriteRead(fd, DRM_RADEON_GEM_INFO, &mminfo, sizeof(mminfo)))
+ {
+ radeon->vram_size = mminfo.vram_visible;
+ radeon->gart_size = mminfo.gart_size;
+ printf(
+ "mem size init: gart size :%llx vram size: s:%llx visible:%llx\n",
+ (unsigned long long)mminfo.gart_size,
+ (unsigned long long)mminfo.vram_size,
+ (unsigned long long)mminfo.vram_visible);
+ }
+ }
+ radeon->ChipFamily = CHIP_FAMILY_PALM;
+ radeon->bufmgr = radeon_bo_manager_gem_ctor(fd);
+ if (!radeon->bufmgr)
+ return -1;
+
+ radeon->csm = radeon_cs_manager_gem_ctor(fd);
+ if (!radeon->csm)
+ return -1;
+
+ radeon->cs = radeon_cs_create(radeon->csm, 16384);
+ if (!radeon->cs)
+ return -1;
+
+ radeon_cs_set_limit(radeon->cs, RADEON_GEM_DOMAIN_VRAM, radeon->vram_size);
+ radeon_cs_set_limit(radeon->cs, RADEON_GEM_DOMAIN_GTT, radeon->gart_size);
+ radeon_cs_space_set_flush(radeon->cs, (void(*)(void *))radeon_cs_flush_indirect, radeon);
+
+ radeon_vbo_init_lists(radeon);
+
+ if (radeon->ChipFamily == CHIP_FAMILY_PALM) {
+ EVERGREENAllocShaders(radeon);
+ EVERGREENLoadShaders(radeon);
+ }
+
+ radeon->accel_state.XInited3D = false;
+ radeon->accel_state.src_obj[0].bo = NULL;
+ radeon->accel_state.src_obj[1].bo = NULL;
+ radeon->accel_state.dst_obj.bo = NULL;
+ radeon->accel_state.vbo.vb_start_op = -1;
+ radeon->accel_state.cbuf.vb_start_op = -1;
+ radeon->accel_state.finish_op = evergreen_finish_op;
+ radeon->accel_state.vbo.verts_per_op = 3;
+ radeon->accel_state.cbuf.verts_per_op = 1;
+ return 0;
+}
+
+void radeon_fini(struct radeon *radeon)
+{
+ radeon_cs_destroy(radeon->cs);
+ radeon_bo_manager_gem_dtor(radeon->bufmgr);
+ radeon_cs_manager_gem_dtor(radeon->csm);
+}
+
+
+int main(int argc, char **argv)
+{
+ int drmFD;
+ int ret;
+ char *pciids = "pci:0000:00:01.0";
+ drmFD = drmOpen(NULL, pciids);
+ if (drmFD < 0) {
+ drmError(drmFD, __func__);
+ fprintf(stderr, "Check that BusId is correct. You can find the correct BusId in /var/log/Xorg.0.log\n");
+ fprintf(stderr, "You can also try setting the environment variable LIBGL_DEBUG to \"verbose\" to see what libdrm is trying to do.\n");
+ exit(-1);
+ }
+
+ ret = radeon_init(radeon, drmFD);
+ if (ret < 0) {
+ fprintf(stderr,"uanbel to init radeon\n");
+ exit(-1);
+ }
+
+ run_test(radeon);
+
+ radeon_fini(radeon);
+ drmClose(drmFD);
+}
+
+void ErrorF(const char *f, ...)
+{
+
+}
+
+void radeon_cs_flush_indirect(struct radeon *radeon)
+{
+ struct radeon_accel_state *accel_state = &radeon->accel_state;
+ int ret;
+
+ if (!radeon->cs->cdw)
+ return;
+
+ /* release the current VBO so we don't block on mapping it later */
+ if (accel_state->vbo.vb_offset && accel_state->vbo.vb_bo) {
+ radeon_vbo_put(radeon, &accel_state->vbo);
+ accel_state->vbo.vb_start_op = -1;
+ }
+
+ /* release the current VBO so we don't block on mapping it later */
+ if (accel_state->cbuf.vb_bo) {
+ radeon_vbo_put(radeon, &accel_state->cbuf);
+ accel_state->cbuf.vb_start_op = -1;
+ }
+ radeon_cs_emit(radeon->cs);
+ radeon_cs_erase(radeon->cs);
+
+ radeon_vbo_flush_bos(radeon);
+
+ ret = radeon_cs_space_check_with_bo(radeon->cs,
+ accel_state->vbo.vb_bo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ if (ret)
+ ErrorF("space check failed in flush\n");
+
+ accel_state->XInited3D = false;
+
+}
+
+void radeon_ddx_cs_start(struct radeon *radeon,
+ int n, const char *file,
+ const char *func, int line)
+{
+ if (radeon->cs->cdw + n > radeon->cs->ndw) {
+// radeon_cs_flush_indirect(radeon);
+
+ }
+ radeon_cs_begin(radeon->cs, n, file, func, line);
+}
+
+void radeon_vb_no_space(struct radeon *radeon,
+ struct radeon_vbo_object *vbo,
+ int vert_size)
+{
+ struct radeon_accel_state *accel_state = &radeon->accel_state;
+
+ if (vbo->vb_bo) {
+ if (vbo->vb_start_op != vbo->vb_offset) {
+ accel_state->finish_op(radeon, vert_size);
+ accel_state->ib_reset_op = radeon->cs->cdw;
+ }
+
+ /* release the current VBO */
+ radeon_vbo_put(radeon, vbo);
+ }
+ /* get a new one */
+ radeon_vbo_get(radeon, vbo);
+ return;
+}
+
+void radeon_ib_discard(struct radeon *radeon)
+{
+ struct radeon_accel_state *accel_state = &radeon->accel_state;
+ int ret;
+
+ if (accel_state->ib_reset_op) {
+ /* if we have data just reset the CS and ignore the operation */
+ radeon->cs->cdw = accel_state->ib_reset_op;
+ accel_state->ib_reset_op = 0;
+ goto out;
+ }
+
+ accel_state->vbo.vb_offset = 0;
+ accel_state->vbo.vb_start_op = -1;
+ accel_state->cbuf.vb_offset = 0;
+ accel_state->cbuf.vb_start_op = -1;
+
+ if (CS_FULL(radeon->cs)) {
+ radeon_cs_flush_indirect(radeon);
+ return;
+ }
+ radeon_cs_erase(radeon->cs);
+ ret = radeon_cs_space_check_with_bo(radeon->cs,
+ accel_state->vbo.vb_bo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ if (ret)
+ ErrorF("space check failed in flush\n");
+
+ if (accel_state->cbuf.vb_bo) {
+ ret = radeon_cs_space_check_with_bo(radeon->cs,
+ accel_state->cbuf.vb_bo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ if (ret)
+ ErrorF("space check failed in flush\n");
+ }
+
+out:
+ accel_state->XInited3D = false;
+}
+
+int radeon_cp_start(struct radeon *radeon)
+{
+ struct radeon_accel_state *accel_state = &radeon->accel_state;
+
+ if (CS_FULL(radeon->cs)) {
+ radeon_cs_flush_indirect(radeon);
+ }
+ accel_state->ib_reset_op = radeon->cs->cdw;
+ accel_state->vbo.vb_start_op = accel_state->vbo.vb_offset;
+ accel_state->cbuf.vb_start_op = accel_state->cbuf.vb_offset;
+ return 0;
+}
diff --git a/radeondemo.h b/radeondemo.h
new file mode 100644
index 0000000..b23c58e
--- /dev/null
+++ b/radeondemo.h
@@ -0,0 +1,202 @@
+#ifndef RADEONDEMO_H
+#define RADEONDEMO_H
+
+#include <stdint.h>
+#include <sys/types.h>
+#define le32_to_cpu(x) (x)
+#define le16_to_cpu(x) (x)
+#define cpu_to_le32(x) (x)
+#define cpu_to_le16(x) (x)
+
+#include <stdbool.h>
+
+#include "simple_list.h"
+
+#include "radeon_bo.h"
+#include "radeon_cs.h"
+struct radeon_vbo_object {
+ struct radeon_bo *vb_bo;
+ unsigned verts_per_op;
+ int vb_offset;
+ uint32_t vb_start_op;
+ uint32_t vb_op_vert_size;
+ uint32_t vb_size;
+ int vb_total;
+ void *vb_ptr;
+};
+
+#define DMA_BO_FREE_TIME 1000
+
+struct radeon_dma_bo {
+ struct radeon_dma_bo *next, *prev;
+ struct radeon_bo *bo;
+ int expire_counter;
+};
+
+
+struct r600_accel_object {
+ uint32_t pitch;
+ uint32_t width;
+ uint32_t height;
+ uint32_t offset;
+ int bpp;
+ uint32_t domain;
+ struct radeon_bo *bo;
+ uint32_t tiling_flags;
+};
+
+struct radeon;
+
+struct radeon_accel_state {
+ bool XInited3D; /* X itself has the 3D context */
+ struct radeon_vbo_object vbo;
+ struct radeon_vbo_object cbuf;
+ uint32_t ib_reset_op;
+ uint32_t src_size[2];
+ uint32_t dst_size;
+ struct r600_accel_object src_obj[2];
+ struct r600_accel_object dst_obj;
+
+ struct radeon_dma_bo bo_free;
+ struct radeon_dma_bo bo_wait;
+ struct radeon_dma_bo bo_reserved;
+
+ void (*finish_op)(struct radeon *, int);
+
+ struct radeon_bo *shaders_bo;
+ uint32_t solid_vs_offset;
+ uint32_t solid_ps_offset;
+ uint32_t copy_vs_offset;
+ uint32_t copy_ps_offset;
+};
+typedef enum {
+ CHIP_FAMILY_UNKNOW,
+ CHIP_FAMILY_LEGACY,
+ CHIP_FAMILY_RADEON,
+ CHIP_FAMILY_RV100,
+ CHIP_FAMILY_RS100, /* U1 (IGP320M) or A3 (IGP320)*/
+ CHIP_FAMILY_RV200,
+ CHIP_FAMILY_RS200, /* U2 (IGP330M/340M/350M) or A4 (IGP330/340/345/350), RS250 (IGP 7000) */
+ CHIP_FAMILY_R200,
+ CHIP_FAMILY_RV250,
+ CHIP_FAMILY_RS300, /* RS300/RS350 */
+ CHIP_FAMILY_RV280,
+ CHIP_FAMILY_R300,
+ CHIP_FAMILY_R350,
+ CHIP_FAMILY_RV350,
+ CHIP_FAMILY_RV380, /* RV370/RV380/M22/M24 */
+ CHIP_FAMILY_R420, /* R420/R423/M18 */
+ CHIP_FAMILY_RV410, /* RV410, M26 */
+ CHIP_FAMILY_RS400, /* xpress 200, 200m (RS400) Intel */
+ CHIP_FAMILY_RS480, /* xpress 200, 200m (RS410/480/482/485) AMD */
+ CHIP_FAMILY_RV515, /* rv515 */
+ CHIP_FAMILY_R520, /* r520 */
+ CHIP_FAMILY_RV530, /* rv530 */
+ CHIP_FAMILY_R580, /* r580 */
+ CHIP_FAMILY_RV560, /* rv560 */
+ CHIP_FAMILY_RV570, /* rv570 */
+ CHIP_FAMILY_RS600,
+ CHIP_FAMILY_RS690,
+ CHIP_FAMILY_RS740,
+ CHIP_FAMILY_R600, /* r600 */
+ CHIP_FAMILY_RV610,
+ CHIP_FAMILY_RV630,
+ CHIP_FAMILY_RV670,
+ CHIP_FAMILY_RV620,
+ CHIP_FAMILY_RV635,
+ CHIP_FAMILY_RS780,
+ CHIP_FAMILY_RS880,
+ CHIP_FAMILY_RV770, /* r700 */
+ CHIP_FAMILY_RV730,
+ CHIP_FAMILY_RV710,
+ CHIP_FAMILY_RV740,
+ CHIP_FAMILY_CEDAR, /* evergreen */
+ CHIP_FAMILY_REDWOOD,
+ CHIP_FAMILY_JUNIPER,
+ CHIP_FAMILY_CYPRESS,
+ CHIP_FAMILY_HEMLOCK,
+ CHIP_FAMILY_PALM,
+ CHIP_FAMILY_BARTS,
+ CHIP_FAMILY_TURKS,
+ CHIP_FAMILY_CAICOS,
+ CHIP_FAMILY_CAYMAN,
+ CHIP_FAMILY_LAST
+} RADEONChipFamily;
+
+struct radeon {
+ int fd;
+ struct radeon_bo *vb;
+ struct radeon_accel_state accel_state;
+ RADEONChipFamily ChipFamily;
+
+ struct radeon_cs_manager *csm;
+ struct radeon_bo_manager *bufmgr;
+ struct radeon_cs *cs;
+
+ uint64_t vram_size;
+ uint64_t gart_size;
+};
+
+
+
+# define RADEON_ROP3_ZERO 0x00000000
+# define RADEON_ROP3_DSa 0x00880000
+# define RADEON_ROP3_SDna 0x00440000
+# define RADEON_ROP3_S 0x00cc0000
+# define RADEON_ROP3_DSna 0x00220000
+# define RADEON_ROP3_D 0x00aa0000
+# define RADEON_ROP3_DSx 0x00660000
+# define RADEON_ROP3_DSo 0x00ee0000
+# define RADEON_ROP3_DSon 0x00110000
+# define RADEON_ROP3_DSxn 0x00990000
+# define RADEON_ROP3_Dn 0x00550000
+# define RADEON_ROP3_SDno 0x00dd0000
+# define RADEON_ROP3_Sn 0x00330000
+# define RADEON_ROP3_DSno 0x00bb0000
+# define RADEON_ROP3_DSan 0x00770000
+# define RADEON_ROP3_ONE 0x00ff0000
+# define RADEON_ROP3_DPa 0x00a00000
+# define RADEON_ROP3_PDna 0x00500000
+# define RADEON_ROP3_P 0x00f00000
+# define RADEON_ROP3_DPna 0x000a0000
+# define RADEON_ROP3_D 0x00aa0000
+# define RADEON_ROP3_DPx 0x005a0000
+# define RADEON_ROP3_DPo 0x00fa0000
+# define RADEON_ROP3_DPon 0x00050000
+# define RADEON_ROP3_PDxn 0x00a50000
+# define RADEON_ROP3_PDno 0x00f50000
+# define RADEON_ROP3_Pn 0x000f0000
+# define RADEON_ROP3_DPno 0x00af0000
+# define RADEON_ROP3_DPan 0x005f0000
+
+
+#define CP_PACKET0(reg, n) \
+ (RADEON_CP_PACKET0 | ((n) << 16) | ((reg) >> 2))
+#define CP_PACKET1(reg0, reg1) \
+ (RADEON_CP_PACKET1 | (((reg1) >> 2) << 11) | ((reg0) >> 2))
+#define CP_PACKET2() \
+ (RADEON_CP_PACKET2)
+#define CP_PACKET3(pkt, n) \
+ (RADEON_CP_PACKET3 | (pkt) | ((n) << 16))
+
+#define RADEON_CP_PACKET0 0x00000000
+#define RADEON_CP_PACKET3 0xC0000000
+
+extern void ErrorF(const char *f, ...);
+
+void radeon_ddx_cs_start(struct radeon *radeon,
+ int n, const char *file,
+ const char *func, int line);
+#define RADEON_ALIGN(x,bytes) (((x) + ((bytes) - 1)) & ~((bytes) - 1))
+#define CS_FULL(cs) ((cs)->cdw > 15 * 1024)
+
+#include "radeon_vbo.h"
+
+
+void radeon_cs_flush_indirect(struct radeon *radeon);
+int radeon_cp_start(struct radeon *radeon);
+bool EVERGREENAllocShaders(struct radeon *radeon);
+bool EVERGREENLoadShaders(struct radeon *radeon);
+
+void evergreen_finish_op(struct radeon *radeon, int vtx_size);
+#endif
diff --git a/simple_list.h b/simple_list.h
new file mode 100644
index 0000000..ff7f888
--- /dev/null
+++ b/simple_list.h
@@ -0,0 +1,202 @@
+/**
+ * \file simple_list.h
+ * Simple macros for type-safe, intrusive lists.
+ *
+ * Intended to work with a list sentinal which is created as an empty
+ * list. Insert & delete are O(1).
+ *
+ * \author
+ * (C) 1997, Keith Whitwell
+ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version: 3.5
+ *
+ * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef _SIMPLE_LIST_H
+#define _SIMPLE_LIST_H
+
+struct simple_node {
+ struct simple_node *next;
+ struct simple_node *prev;
+};
+
+/**
+ * Remove an element from list.
+ *
+ * \param elem element to remove.
+ */
+#define remove_from_list(elem) \
+do { \
+ (elem)->next->prev = (elem)->prev; \
+ (elem)->prev->next = (elem)->next; \
+} while (0)
+
+/**
+ * Insert an element to the list head.
+ *
+ * \param list list.
+ * \param elem element to insert.
+ */
+#define insert_at_head(list, elem) \
+do { \
+ (elem)->prev = list; \
+ (elem)->next = (list)->next; \
+ (list)->next->prev = elem; \
+ (list)->next = elem; \
+} while(0)
+
+/**
+ * Insert an element to the list tail.
+ *
+ * \param list list.
+ * \param elem element to insert.
+ */
+#define insert_at_tail(list, elem) \
+do { \
+ (elem)->next = list; \
+ (elem)->prev = (list)->prev; \
+ (list)->prev->next = elem; \
+ (list)->prev = elem; \
+} while(0)
+
+/**
+ * Move an element to the list head.
+ *
+ * \param list list.
+ * \param elem element to move.
+ */
+#define move_to_head(list, elem) \
+do { \
+ remove_from_list(elem); \
+ insert_at_head(list, elem); \
+} while (0)
+
+/**
+ * Move an element to the list tail.
+ *
+ * \param list list.
+ * \param elem element to move.
+ */
+#define move_to_tail(list, elem) \
+do { \
+ remove_from_list(elem); \
+ insert_at_tail(list, elem); \
+} while (0)
+
+/**
+ * Make a empty list empty.
+ *
+ * \param sentinal list (sentinal element).
+ */
+#define make_empty_list(sentinal) \
+do { \
+ (sentinal)->next = sentinal; \
+ (sentinal)->prev = sentinal; \
+} while (0)
+
+/**
+ * Get list first element.
+ *
+ * \param list list.
+ *
+ * \return pointer to first element.
+ */
+#define first_elem(list) ((list)->next)
+
+/**
+ * Get list last element.
+ *
+ * \param list list.
+ *
+ * \return pointer to last element.
+ */
+#define last_elem(list) ((list)->prev)
+
+/**
+ * Get next element.
+ *
+ * \param elem element.
+ *
+ * \return pointer to next element.
+ */
+#define next_elem(elem) ((elem)->next)
+
+/**
+ * Get previous element.
+ *
+ * \param elem element.
+ *
+ * \return pointer to previous element.
+ */
+#define prev_elem(elem) ((elem)->prev)
+
+/**
+ * Test whether element is at end of the list.
+ *
+ * \param list list.
+ * \param elem element.
+ *
+ * \return non-zero if element is at end of list, or zero otherwise.
+ */
+#define at_end(list, elem) ((elem) == (list))
+
+/**
+ * Test if a list is empty.
+ *
+ * \param list list.
+ *
+ * \return non-zero if list empty, or zero otherwise.
+ */
+#define is_empty_list(list) ((list)->next == (list))
+
+/**
+ * Walk through the elements of a list.
+ *
+ * \param ptr pointer to the current element.
+ * \param list list.
+ *
+ * \note It should be followed by a { } block or a single statement, as in a \c
+ * for loop.
+ */
+#define foreach(ptr, list) \
+ for( ptr=(list)->next ; ptr!=list ; ptr=(ptr)->next )
+
+/**
+ * Walk through the elements of a list.
+ *
+ * Same as #foreach but lets you unlink the current value during a list
+ * traversal. Useful for freeing a list, element by element.
+ *
+ * \param ptr pointer to the current element.
+ * \param t temporary pointer.
+ * \param list list.
+ *
+ * \note It should be followed by a { } block or a single statement, as in a \c
+ * for loop.
+ */
+#define foreach_s(ptr, t, list) \
+ for(ptr=(list)->next,t=(ptr)->next; list != ptr; ptr=t, t=(t)->next)
+
+#endif