summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Hopf <mhopf@suse.de>2008-12-29 21:07:55 +0100
committerMatthias Hopf <mhopf@suse.de>2008-12-29 21:07:55 +0100
commitb1f4330a57eb45bd1f9659c9a2b170ef18a20233 (patch)
tree1880d4b0458a550ccfb50fb8e5f645561211fc52
Initial public commit.
-rw-r--r--.gitignore13
-rw-r--r--Makefile72
-rw-r--r--README117
-rwxr-xr-xcalc.sh32
-rw-r--r--convert_shader.c1369
-rw-r--r--r600_basic.c82
-rw-r--r--r600_broken.c509
-rw-r--r--r600_demo.c725
-rw-r--r--r600_emit.h217
-rw-r--r--r600_exa.c943
-rw-r--r--r600_hwapi.h131
-rw-r--r--r600_init.c1024
-rw-r--r--r600_init.h71
-rw-r--r--r600_lib.c683
-rw-r--r--r600_lib.h140
-rw-r--r--r600_pm4.c5699
-rw-r--r--r600_reg.h126
-rw-r--r--r600_reg_auto_r6xx.h3087
-rw-r--r--r600_reg_r6xx.h492
-rw-r--r--r600_reg_r7xx.h149
-rw-r--r--r600_shader.h347
-rw-r--r--r600_state.h194
-rw-r--r--r600_texture.c407
-rw-r--r--r600_triangles.c1348
-rw-r--r--radeon_drm.h755
25 files changed, 18732 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7313675
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,13 @@
+r600_demo
+convert_shader
+*.o
+*.lo
+*.la
+.deps
+.libs
+*~
+*.bak
+*.swp
+R6xx_3D.txt
+R6xx_3D.pdf
+TAGS
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..edacfd4
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,72 @@
+VERSION_FLAGS=-DPACKAGE=\"r600_demo\" -DVERSION=\"`git rev-parse --short HEAD`\"
+
+INCLUDES=-I/usr/include/drm
+LIBS=-ldrm
+CFLAGS:=-Wall -O0 -g $(INCLUDES) $(VERSION_FLAGS) $(CFLAGS)
+CC=gcc
+CFILES=r600_demo.c r600_lib.c r600_basic.c r600_init.c r600_triangles.c r600_texture.c r600_pm4.c r600_exa.c
+
+all: r600_demo convert_shader
+
+convert_shader: convert_shader.c
+ $(CC) convert_shader.c -o convert_shader
+
+r600_demo: $(CFILES:.c=.o) $(LIBS)
+ $(CC) $(CFILES:.c=.o) $(LIBS) -l m -o r600_demo
+
+r600_reg.h: r600_reg_auto_r6xx.h
+
+clean:
+ rm -f *.o *~ *.bak
+
+depend:
+ makedepend -Y *.[ch]
+
+tags:
+ etags --lang=c --regex='/[ \t]*\([a-z0-9_]+\)[\t ]*=.*/\1/i' *.[ch]
+
+dump:
+ ./r600_demo r 0-159c 15b4-15ec 2100-3f28 3f34-3ffc 8000-c14c >register.dump
+ ./r600_demo "" 28000-28e7c 30000-31ffc 38000-3effc >>register.dump
+ sed -i -e '/ := /p;d' register.dump
+
+fulldump:
+ # Just spare UCODE upload, ranges known to lock something up, and
+ # large ranges known to be unused.
+ # Microcode: 3f2c-3f30, c154-c160
+ # Locking up bus: f840-fffc (full range not tested)
+ # Unused: f800-f840 10000-27ffc
+ ./r600_demo r 0-3f28 3f34-c150 c164-f83c 28000-3fffc >register.dump
+ sed -i -e '/ := /p;d' register.dump
+
+screenshot:
+ import -window root -crop 260x260+0+0 png:screenshot.png
+
+tarball:
+ tar -cvzf ../r600_demo_git-`git-rev-parse --short HEAD`_`date +%Y-%m-%d`.tgz -C .. `git-ls-tree -r --name-only HEAD | sed 's|^|r600_demo/|'`
+
+# DO NOT DELETE
+
+r600_basic.o: r600_reg.h r600_reg_auto_r6xx.h r600_reg_r6xx.h r600_reg_r7xx.h
+r600_basic.o: r600_hwapi.h r600_emit.h r600_lib.h
+r600_broken.o: r600_reg.h r600_reg_auto_r6xx.h r600_reg_r6xx.h
+r600_broken.o: r600_reg_r7xx.h r600_lib.h r600_shader.h radeon_drm.h
+r600_demo.o: radeon_drm.h r600_reg.h r600_reg_auto_r6xx.h r600_reg_r6xx.h
+r600_demo.o: r600_reg_r7xx.h r600_lib.h r600_hwapi.h
+r600_exa.o: r600_reg.h r600_reg_auto_r6xx.h r600_reg_r6xx.h r600_reg_r7xx.h
+r600_exa.o: r600_hwapi.h r600_emit.h r600_lib.h r600_state.h r600_init.h
+r600_exa.o: r600_shader.h
+r600_init.o: r600_reg.h r600_reg_auto_r6xx.h r600_reg_r6xx.h r600_reg_r7xx.h
+r600_init.o: r600_hwapi.h r600_emit.h r600_lib.h r600_state.h
+r600_lib.o: r600_reg.h r600_reg_auto_r6xx.h r600_reg_r6xx.h r600_reg_r7xx.h
+r600_lib.o: r600_hwapi.h r600_emit.h r600_lib.h r600_shader.h radeon_drm.h
+r600_pm4.o: r600_reg.h r600_reg_auto_r6xx.h r600_reg_r6xx.h r600_reg_r7xx.h
+r600_pm4.o: r600_hwapi.h r600_emit.h r600_lib.h r600_state.h r600_init.h
+r600_pm4.o: r600_shader.h
+r600_reg.o: r600_reg_auto_r6xx.h r600_reg_r6xx.h r600_reg_r7xx.h
+r600_texture.o: r600_reg.h r600_reg_auto_r6xx.h r600_reg_r6xx.h
+r600_texture.o: r600_reg_r7xx.h r600_hwapi.h r600_emit.h r600_lib.h
+r600_texture.o: r600_state.h r600_init.h r600_shader.h
+r600_triangles.o: r600_reg.h r600_reg_auto_r6xx.h r600_reg_r6xx.h
+r600_triangles.o: r600_reg_r7xx.h r600_hwapi.h r600_emit.h r600_lib.h
+r600_triangles.o: r600_state.h r600_init.h r600_shader.h
diff --git a/README b/README
new file mode 100644
index 0000000..b93803d
--- /dev/null
+++ b/README
@@ -0,0 +1,117 @@
+This source code may damage your hardware.
+It is *UNTESTED* and *BROKEN* !
+Or, at least, some of it is.
+
+
+
+R600_DEMO
+
+ DRM/DRI exercise utility
+
+ This is a bringup utility for learning how to program r6xx cards. It
+ has no value for you if you are not a hardware driver hacker. Except
+ for that it (hopefully) will draw a single triangle for you :-P
+
+ Authors:
+ Matthias Hopf <mhopf@suse.de>
+ Alexander Deucher <Alexander.Deucher@amd.com>
+
+ The original skeleton for r600_demo was r300_demo, but there's almost
+ none (literally) code left from these days.
+
+ The r600_demo code is provided without any warranty and in
+ understanding that direct access to the hardware can lead to permanent
+ damage to the hardware (or even damage to the user - say from locked up
+ laser displays).
+ The software is meant for experimentation and thus one should not
+ expect it to be checked for compatibility or suitability for any
+ purpose.
+
+
+STATE
+
+ - DRM state dump ok
+ DRM setup works - DRM shows some issues, still, GART
+ texture space has a wrong GPU address assigned, etc.
+ - GPU state dump ok
+ - CP setup + test '.' ok
+ - CPU based clear screen 'c' ok
+ - More extensive CP tests 'p' ok
+ - PM4 dump tests 'w' 'W' 'b' ok
+ - Triangle tests 't' 'T' ok
+ - Texturing tests 'q' ok
+ - GPU reset 'r' ok
+ - EXA tests 'e' 'E' ok
+ - Register dump <range> ok
+ - Register setting <reg>=<val> ok
+ - Various test 'x' unknown
+
+ Tested on R600, RV610, RV620, RV630, RV635, RV670, RV710, RV730, RV770
+
+
+SETUP:
+
+ Requirements:
+
+ - r600_demo from git://anongit.freedesktop.org/git/mesa/r600_demo
+
+ - An r6xx or r7xx card, testing happens mostly on
+ RV610, RV670, and RV770. R600 is probably the least supported.
+
+ - Install + configure radeonhd:
+ Get the latest driver (1.2.4 is good enough). Explicitly enable
+ DRI and disable 2D acceleration:
+
+ Option "DRI"
+ Option "AccelMethod" "none"
+
+ - Install DRM:
+ Get and install the latest DRM.
+
+
+RUNNING:
+
+ - Run the Xserver. No additional clients needed (r600_demo is not an X
+ client, so no server reset occurs).
+ - Call r600_demo on the same machine as root(!).
+
+ ./r600_demo
+
+ This will print out the usage, including all options.
+
+ ./r600_demo ""
+
+ This will print out the state of DRM and the graphics card
+ (busy flags + ring and buffer states). Add '-v' to get a list of all
+ mapped memory regions and a more verbose state output.
+
+ ./r600_demo pc
+
+ Will test the CP, and clear the screen.
+
+ ./r600_demo t
+
+ Draws two Gouraud shaded triangles.
+
+ ./r600_demo r
+
+ This resets the engine. "./r600_demo rct" is a typical call, because
+ the 't' and 'q' tests do not invaldiate the shaders correctly yet.
+
+ ./r600_demo rcqE
+
+ Draws a textured quad, and then invokes the EXA copy test, which
+ copies the top left corner 3 times.
+
+ ./r600_demo "" 30000-3001c
+
+ Register dump: prints out the first two ALU constants
+ (SQ_ALU_CONSTANT0_0 - SQ_ALU_CONSTANT3_1). Any number of single
+ registers and ranges allowed.
+ You can lock up the whole machine when accessing non-existent
+ register space, so beware.
+
+ make dump
+
+ Create a register dump of known registers.
+
diff --git a/calc.sh b/calc.sh
new file mode 100755
index 0000000..b6f8a01
--- /dev/null
+++ b/calc.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+tmp=/tmp/test_$$
+
+cat >$tmp.c << EOS
+#include <stdio.h>
+#include "r600_reg.h"
+int main (int argc, char *argv[]) {
+long val =
+EOS
+
+if [ "x$1" = x ] ; then
+ cat >>$tmp.c
+else
+ echo "$@" >>$tmp.c
+fi
+if [ "x`sed -e '$!d;s/.*\(.\)$/\1/' $tmp.c`" != "x;" ] ; then
+ echo ";" >>$tmp.c
+fi
+
+cat >>$tmp.c <<EOE
+printf (" = %ld = 0x%lx\n", val, val);
+return 0;
+}
+EOE
+
+if gcc -I. -o $tmp.out $tmp.c ; then
+ $tmp.out
+else
+ cat $tmp.c
+fi
+
+rm -f $tmp.c $tmp.out
diff --git a/convert_shader.c b/convert_shader.c
new file mode 100644
index 0000000..d9da913
--- /dev/null
+++ b/convert_shader.c
@@ -0,0 +1,1369 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdint.h>
+
+const char *rel[2] = {
+ "ABSOLUTE",
+ "RELATIVE"
+};
+
+const char *im[5] = {
+ "SQ_INDEX_AR_X",
+ "SQ_INDEX_AR_Y",
+ "SQ_INDEX_AR_Z",
+ "SQ_INDEX_AR_W",
+ "SQ_INDEX_LOOP",
+};
+
+const char *ps[4] = {
+ "SQ_PRED_SEL_OFF",
+ "Reserved",
+ "SQ_PRED_SEL_ZERO",
+ "SQ_PRED_SEL_ONE"
+};
+
+const char *elem[4] = {
+ "ELEM_X",
+ "ELEM_Y",
+ "ELEM_Z",
+ "ELEM_W",
+};
+
+const char *omod[4] = {
+ "SQ_ALU_OMOD_OFF",
+ "SQ_ALU_OMOD_M2",
+ "SQ_ALU_OMOD_M4",
+ "SQ_ALU_OMOD_D2",
+};
+
+const char *bs[6] = {
+ "SQ_ALU_VEC_012",
+ "SQ_ALU_VEC_021",
+ "SQ_ALU_VEC_120",
+ "SQ_ALU_VEC_102",
+ "SQ_ALU_VEC_201",
+ "SQ_ALU_VEC_210",
+};
+
+const char *op2[122] = {
+ "SQ_OP2_INST_ADD", //0
+ "SQ_OP2_INST_MUL", //1
+ "SQ_OP2_INST_MUL_IEEE", //2
+ "SQ_OP2_INST_MAX", //3
+ "SQ_OP2_INST_MIN", //4
+ "SQ_OP2_INST_MAX_DX10", //5
+ "SQ_OP2_INST_MIN_DX10", //6
+ "reserved", //7
+ "SQ_OP2_INST_SETE", //8
+ "SQ_OP2_INST_SETGT", //9
+ "SQ_OP2_INST_SETGE", //10
+ "SQ_OP2_INST_SETNE", //11
+ "SQ_OP2_INST_SETE_DX10", //12
+ "SQ_OP2_INST_SETGT_DX10", //13
+ "SQ_OP2_INST_SETGE_DX10", //14
+ "SQ_OP2_INST_SETNE_DX10", //15
+ "SQ_OP2_INST_FRACT", //16
+ "SQ_OP2_INST_TRUNC", //17
+ "SQ_OP2_INST_CEIL", //18
+ "SQ_OP2_INST_RNDNE", //19
+ "SQ_OP2_INST_FLOOR", //20
+ "SQ_OP2_INST_MOVA", //21
+ "SQ_OP2_INST_MOVA_FLOOR", //22
+ "reserved", //23
+ "SQ_OP2_INST_MOVA_INT", //24
+ "SQ_OP2_INST_MOV", //25
+ "SQ_OP2_INST_NOP", //26
+ "reserved", //27
+ "reserved", //28
+ "reserved", //29
+ "SQ_OP2_INST_PRED_SETGT_UINT", //30
+ "SQ_OP2_INST_PRED_SETGE_UINT", //31
+ "SQ_OP2_INST_PRED_SETE", //32
+ "SQ_OP2_INST_PRED_SETGT", //33
+ "SQ_OP2_INST_PRED_SETGE", //34
+ "SQ_OP2_INST_PRED_SETNE", //35
+ "SQ_OP2_INST_PRED_SET_INV", //36
+ "SQ_OP2_INST_PRED_SET_POP", //37
+ "SQ_OP2_INST_PRED_SET_CLR", //38
+ "SQ_OP2_INST_PRED_SET_RESTORE", //39
+ "SQ_OP2_INST_PRED_SETE_PUSH", //40
+ "SQ_OP2_INST_PRED_SETGT_PUSH", //41
+ "SQ_OP2_INST_PRED_SETGE_PUSH", //42
+ "SQ_OP2_INST_PRED_SETNE_PUSH", //43
+ "SQ_OP2_INST_KILLE", //44
+ "SQ_OP2_INST_KILLGT", //45
+ "SQ_OP2_INST_KILLGE", //46
+ "SQ_OP2_INST_KILLNE", //47
+ "SQ_OP2_INST_AND_INT", //48
+ "SQ_OP2_INST_OR_INT", //49
+ "SQ_OP2_INST_XOR_INT", //50
+ "SQ_OP2_INST_NOT_INT", //51
+ "SQ_OP2_INST_ADD_INT", //52
+ "SQ_OP2_INST_SUB_INT", //53
+ "SQ_OP2_INST_MAX_INT", //54
+ "SQ_OP2_INST_MIN_INT", //55
+ "SQ_OP2_INST_MAX_UINT", //56
+ "SQ_OP2_INST_MIN_UINT", //57
+ "SQ_OP2_INST_SETE_INT", //58
+ "SQ_OP2_INST_SETGT_INT", //59
+ "SQ_OP2_INST_SETGE_INT", //60
+ "SQ_OP2_INST_SETNE_INT", //61
+ "SQ_OP2_INST_SETGT_UINT", //62
+ "SQ_OP2_INST_SETGE_UINT", //63
+ "SQ_OP2_INST_KILLGT_UINT", //64
+ "SQ_OP2_INST_KILLGE_UINT", //65
+ "SQ_OP2_INST_PRED_SETE_INT", //66
+ "SQ_OP2_INST_PRED_SETGT_INT", //67
+ "SQ_OP2_INST_PRED_SETGE_INT", //68
+ "SQ_OP2_INST_PRED_SETNE_INT", //69
+ "SQ_OP2_INST_PRED_SETLT_INT", //70
+ "SQ_OP2_INST_PRED_SETLE_INT", //71
+ "SQ_OP2_INST_KILLGE_INT", //72
+ "SQ_OP2_INST_KILLNE_INT", //73
+ "SQ_OP2_INST_PRED_SETE_PUSH_INT", //74
+ "SQ_OP2_INST_PRED_SETGT_PUSH_INT", //75
+ "SQ_OP2_INST_PRED_SETGE_PUSH_INT", //76
+ "SQ_OP2_INST_PRED_SETNE_PUSH_INT", //77
+ "SQ_OP2_INST_PRED_SETLT_PUSH_INT", //78
+ "SQ_OP2_INST_PRED_SETLE_PUSH_INT", //79
+ "SQ_OP2_INST_DOT4", //80
+ "SQ_OP2_INST_DOT4_IEEE", //81
+ "SQ_OP2_INST_CUBE", //82
+ "SQ_OP2_INST_MAX4", //83
+ "invalid", //84
+ "invalid", //85
+ "invalid", //86
+ "invalid", //87
+ "invalid", //88
+ "invalid", //89
+ "invalid", //90
+ "invalid", //91
+ "invalid", //92
+ "invalid", //93
+ "invalid", //94
+ "invalid", //95
+ "SQ_OP2_INST_MOVA_GPR_INT", //96
+ "SQ_OP2_INST_EXP_IEEE", //97
+ "SQ_OP2_INST_LOG_CLAMPED", //98
+ "SQ_OP2_INST_LOG_IEEE", //99
+ "SQ_OP2_INST_RECIP_CLAMPED", //100
+ "SQ_OP2_INST_RECIP_FF", //101
+ "SQ_OP2_INST_RECIP_IEEE", //102
+ "SQ_OP2_INST_RECIPSQRT_CLAMPED", //103
+ "SQ_OP2_INST_RECIPSQRT_FF", //104
+ "SQ_OP2_INST_RECIPSQRT_IEEE", //105
+ "SQ_OP2_INST_SQRT_IEEE", //106
+ "SQ_OP2_INST_FLT_TO_INT", //107
+ "SQ_OP2_INST_INT_TO_FLT", //108
+ "SQ_OP2_INST_UINT_TO_FLT", //109
+ "SQ_OP2_INST_SIN", //110
+ "SQ_OP2_INST_COS", //111
+ "SQ_OP2_INST_ASHR_INT", //112
+ "SQ_OP2_INST_LSHR_INT", //113
+ "SQ_OP2_INST_LSHL_INT", //114
+ "SQ_OP2_INST_MULLO_INT", //115
+ "SQ_OP2_INST_MULHI_INT", //116
+ "SQ_OP2_INST_MULLO_UINT", //117
+ "SQ_OP2_INST_MULHI_UINT", //118
+ "SQ_OP2_INST_RECIP_INT", //119
+ "SQ_OP2_INST_RECIP_UINT", //120
+ "SQ_OP2_INST_FLT_TO_UINT", //121
+};
+
+const char *op3[32] = {
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "reserved",
+ "reserved",
+ "reserved",
+ "reserved",
+ "SQ_OP3_INST_MUL_LIT",
+ "SQ_OP3_INST_MUL_LIT_M2",
+ "SQ_OP3_INST_MUL_LIT_M4",
+ "SQ_OP3_INST_MUL_LIT_D2",
+ "SQ_OP3_INST_MULADD",
+ "SQ_OP3_INST_MULADD_M2",
+ "SQ_OP3_INST_MULADD_M4",
+ "SQ_OP3_INST_MULADD_D2",
+ "SQ_OP3_INST_MULADD_IEEE",
+ "SQ_OP3_INST_MULADD_IEEE_M2",
+ "SQ_OP3_INST_MULADD_IEEE_M4",
+ "SQ_OP3_INST_MULADD_IEEE_D2",
+ "SQ_OP3_INST_CNDE",
+ "SQ_OP3_INST_CNDGT",
+ "SQ_OP3_INST_CNDGE",
+ "reserved",
+ "SQ_OP3_INST_CNDE_INT",
+ "SQ_OP3_INST_CNDGT_INT",
+ "SQ_OP3_INST_CNDGE_INT",
+ "reserved",
+};
+
+const char *cond[4] = {
+ "SQ_CF_COND_ACTIVE",
+ "SQ_CF_COND_FALSE",
+ "SQ_CF_COND_BOOL",
+ "SQ_CF_COND_NOT_BOOL",
+};
+
+const char *cf_op[25] = {
+ "SQ_CF_INST_NOP",
+ "SQ_CF_INST_TEX",
+ "SQ_CF_INST_VTX",
+ "SQ_CF_INST_VTX_TC",
+ "SQ_CF_INST_LOOP_START",
+ "SQ_CF_INST_LOOP_END",
+ "SQ_CF_INST_START_DX10",
+ "SQ_CF_INST_START_NO_AL",
+ "SQ_CF_INST_LOOP_CONTINUE",
+ "SQ_CF_INST_LOOP_BREAK",
+ "SQ_CF_INST_JUMP",
+ "SQ_CF_INST_PUSH",
+ "SQ_CF_INST_PUSH_ELSE",
+ "SQ_CF_INST_ELSE",
+ "SQ_CF_INST_POP",
+ "SQ_CF_INST_POP_JUMP",
+ "SQ_CF_INST_POP_PUSH",
+ "SQ_CF_INST_POP_PUSH_ELSE",
+ "SQ_CF_INST_CALL",
+ "SQ_CF_CALL_FS",
+ "SQ_CF_INST_RETURN",
+ "SQ_CF_INST_EMIT_VERTEX",
+ "SQ_CF_INST_EMIT_CUT_VERTEX",
+ "SQ_CF_INST_CUT_VERTEX",
+ "SQ_CF_INST_KILL",
+};
+
+
+const char *cf_alu_op[16] = {
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "SQ_CF_INST_ALU",
+ "SQ_CF_INST_ALU_PUSH_BEFORE",
+ "SQ_CF_INST_ALU_POP_AFTER",
+ "SQ_CF_INST_ALU_POP2_AFTER",
+ "reserved",
+ "SQ_CF_INST_ALU_CONTINUE",
+ "SQ_CF_INST_ALU_BREAK",
+ "SQ_CF_INST_ALU_ELSE_AFTER",
+};
+
+const char *cf_exp_op[41] = {
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "SQ_CF_INST_MEM_STREAM0",
+ "SQ_CF_INST_MEM_STREAM1",
+ "SQ_CF_INST_MEM_STREAM2",
+ "SQ_CF_INST_MEM_STREAM3",
+ "SQ_CF_INST_MEM_SCRATCH",
+ "SQ_CF_INST_MEM_REDUCTION",
+ "SQ_CF_INST_MEM_RING",
+ "SQ_CF_INST_EXPORT",
+ "SQ_CF_INST_EXPORT_DONE",
+};
+
+const char *km[4] = {
+ "SQ_CF_KCACHE_NOP",
+ "SQ_CF_KCACHE_LOCK_1",
+ "SQ_CF_KCACHE_LOCK_2",
+ "SQ_CF_KCACHE_LOCK_LOOP_INDEX",
+};
+
+const char *sel[8] = {
+ "SQ_SEL_X",
+ "SQ_SEL_Y",
+ "SQ_SEL_Z",
+ "SQ_SEL_W",
+ "SQ_SEL_0",
+ "SQ_SEL_1",
+ "reserved",
+ "SQ_SEL_MASK",
+};
+
+const char *cf_exp_type[8] = {
+ "SQ_EXPORT_PIXEL",
+ "SQ_EXPORT_POS",
+ "SQ_EXPORT_PARAM",
+ "unused",
+};
+
+const char *cf_mem_type[8] = {
+ "SQ_EXPORT_WRITE",
+ "SQ_EXPORT_WRITE_IND",
+ "SQ_IMPORT_READ",
+ "SQ_IMPORT_READ_IND",
+};
+
+const char *ab_pixel[62] = {
+ "CF_PIXEL_MRT0",
+ "CF_PIXEL_MRT1",
+ "CF_PIXEL_MRT2",
+ "CF_PIXEL_MRT3",
+ "CF_PIXEL_MRT4",
+ "CF_PIXEL_MRT5",
+ "CF_PIXEL_MRT6",
+ "CF_PIXEL_MRT7",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "CF_PIXEL_MRT0_FOG",
+ "CF_PIXEL_MRT1_FOG",
+ "CF_PIXEL_MRT2_FOG",
+ "CF_PIXEL_MRT3_FOG",
+ "CF_PIXEL_MRT4_FOG",
+ "CF_PIXEL_MRT5_FOG",
+ "CF_PIXEL_MRT6_FOG",
+ "CF_PIXEL_MRT7_FOG",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "CF_PIXEL_Z",
+};
+
+const char *ab_pos[64] = {
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "invalid",
+ "CF_POS0",
+ "CF_POS1",
+ "CF_POS2",
+ "CF_POS3",
+};
+
+const char *tex_op[32] = {
+ "SQ_TEX_INST_VTX_FETCH",
+ "SQ_TEX_INST_VTX_SEMANTIC",
+ "reserved",
+ "SQ_TEX_INST_LD",
+ "SQ_TEX_INST_GET_TEXTURE_RESINFO",
+ "SQ_TEX_INST_GET_NUMBER_OF_SAMPLES",
+ "SQ_TEX_INST_GET_LOD",
+ "SQ_TEX_INST_GET_GRADIENTS_H",
+ "SQ_TEX_INST_GET_GRADIENTS_V",
+ "SQ_TEX_INST_GET_LERP",
+ "reserved",
+ "SQ_TEX_INST_SET_GRADIENTS_H",
+ "SQ_TEX_INST_SET_GRADIENTS_V",
+ "SQ_TEX_INST_PASS",
+ "Z set index for array of cubemaps",
+ "reserved",
+ "SQ_TEX_INST_SAMPLE",
+ "SQ_TEX_INST_SAMPLE_L",
+ "SQ_TEX_INST_SAMPLE_LB",
+ "SQ_TEX_INST_SAMPLE_LZ",
+ "SQ_TEX_INST_SAMPLE_G",
+ "SQ_TEX_INST_SAMPLE_G_L",
+ "SQ_TEX_INST_SAMPLE_G_LB",
+ "SQ_TEX_INST_SAMPLE_G_LZ",
+ "SQ_TEX_INST_SAMPLE_C",
+ "SQ_TEX_INST_SAMPLE_C_L",
+ "SQ_TEX_INST_SAMPLE_C_LB",
+ "SQ_TEX_INST_SAMPLE_C_LZ",
+ "SQ_TEX_INST_SAMPLE_C_G",
+ "SQ_TEX_INST_SAMPLE_C_G_L",
+ "SQ_TEX_INST_SAMPLE_C_G_LB",
+ "SQ_TEX_INST_SAMPLE_C_G_LZ",
+};
+
+const char *coord_type[2] = {
+ "TEX_UNNORMALIZED",
+ "TEX_NORMALIZED",
+};
+
+const char *vtx_op[2] = {
+ "SQ_VTX_INST_FETCH",
+ "SQ_VTX_INST_SEMANTIC",
+};
+
+const char *fetch_type[3] = {
+ "SQ_VTX_FETCH_VERTEX_DATA",
+ "SQ_VTX_FETCH_INSTANCE_DATA",
+ "SQ_VTX_FETCH_NO_INDEX_OFFSET",
+};
+
+const char *num_format_all[3] = {
+ "SQ_NUM_FORMAT_NORM",
+ "SQ_NUM_FORMAT_INT",
+ "SQ_NUM_FORMAT_SCALED",
+};
+
+const char *format_comp_all[2] = {
+ "SQ_FORMAT_COMP_UNSIGNED",
+ "SQ_FORMAT_COMP_SIGNED",
+};
+
+const char *srf_mode_all[2] = {
+ "SRF_MODE_ZERO_CLAMP_MINUS_ONE",
+ "SRF_MODE_NO_ZERO",
+};
+
+const char *endian_swap[3] = {
+ "SQ_ENDIAN_NONE",
+ "SQ_ENDIAN_8IN16",
+ "SQ_ENDIAN_8IN32",
+};
+
+void alu_dword1_op3(uint32_t dword1, int is_r700)
+{
+ int src2_sel, src2_rel;
+ int src2_elem, src2_neg;
+ int alu_inst;
+ int bank_swizzle, dst_gpr, dst_rel;
+ int dst_elem, clamp;
+
+ src2_sel = (dword1 >> 0) & 0x1ff;
+ src2_rel = (dword1 >> 9) & 0x1;
+ src2_elem = (dword1 >> 10) & 0x3;
+ src2_neg = (dword1 >> 12) & 0x1;
+
+ printf("ALU_DWORD1_OP3(");
+ if (is_r700) {
+ if (src2_sel == 248)
+ printf("SRC2_SEL(SQ_ALU_SRC_0),\n");
+ else if (src2_sel == 249)
+ printf("SRC2_SEL(SQ_ALU_SRC_1),\n");
+ else if (src2_sel == 250)
+ printf("SRC2_SEL(SQ_ALU_SRC_I_INT),\n");
+ else if (src2_sel == 251)
+ printf("SRC2_SEL(SQ_ALU_SRC_M_1_INT),\n");
+ else if (src2_sel == 252)
+ printf("SRC2_SEL(SQ_ALU_SRC_0_5),\n");
+ else if (src2_sel == 253)
+ printf("SRC2_SEL(SQ_ALU_SRC_LITERAL),\n");
+ else if (src2_sel == 254)
+ printf("SRC2_SEL(SQ_ALU_SRC_PV),\n");
+ else if (src2_sel == 255)
+ printf("SRC2_SEL(SQ_ALU_SRC_PS),\n");
+ else
+ printf("SRC2_SEL(%d),\n", src2_sel);
+ } else {
+ if (src2_sel == 248)
+ printf("SRC2_SEL(ALU_SRC_0),\n");
+ else if (src2_sel == 249)
+ printf("SRC2_SEL(ALU_SRC_1),\n");
+ else if (src2_sel == 250)
+ printf("SRC2_SEL(ALU_SRC_I_INT),\n");
+ else if (src2_sel == 251)
+ printf("SRC2_SEL(ALU_SRC_M_1_INT),\n");
+ else if (src2_sel == 252)
+ printf("SRC2_SEL(ALU_SRC_0_5),\n");
+ else if (src2_sel == 253)
+ printf("SRC2_SEL(ALU_SRC_LITERAL),\n");
+ else if (src2_sel == 254)
+ printf("SRC2_SEL(ALU_SRC_PV),\n");
+ else if (src2_sel == 255)
+ printf("SRC2_SEL(ALU_SRC_PS),\n");
+ else
+ printf("SRC2_SEL(%d),\n", src2_sel);
+ }
+ printf("\tSRC2_REL(%s),\n", rel[src2_rel]);
+ printf("\tSRC2_ELEM(%s),\n", elem[src2_elem]);
+ printf("\tSRC2_NEG(%d),\n", src2_neg);
+
+ alu_inst = (dword1 >> 13) & 0x1f;
+ printf("\tALU_INST(%s),\n", op3[alu_inst]);
+ bank_swizzle = (dword1 >> 18) & 0x7;
+ printf("\tBANK_SWIZZLE(%s),\n", bs[bank_swizzle]);
+ dst_gpr = (dword1 >> 21) & 0x7f;
+ printf("\tDST_GPR(%d),\n", dst_gpr);
+ dst_rel = (dword1 >> 28) & 0x1;
+ printf("\tDST_REL(%s),\n", rel[dst_rel]);
+ dst_elem = (dword1 >> 29) & 0x3;
+ printf("\tDST_ELEM(%s),\n", elem[dst_elem]);
+ clamp = (dword1 >> 31) & 0x1;
+ printf("\tCLAMP(%d)", clamp);
+ printf("),\n");
+}
+
+void alu_dword1_op2(uint32_t dword1, int is_r700)
+{
+ int src0_abs, src1_abs;
+ int update_execute_mask, update_pred;
+ int write_mask, fog_merge;
+ int outmod, alu_inst;
+ int bank_swizzle, dst_gpr, dst_rel;
+ int dst_elem, clamp;
+
+ printf("ALU_DWORD1_OP2(");
+ printf("adapt->chipset,\n");
+ src0_abs = (dword1 >> 0) & 0x1;
+ printf("\tSRC0_ABS(%d),\n", src0_abs);
+ src1_abs = (dword1 >> 1) & 0x1;
+ printf("\tSRC1_ABS(%d),\n", src1_abs);
+ update_execute_mask = (dword1 >> 2) & 0x1;
+ printf("\tUPDATE_EXECUTE_MASK(%d),\n", update_execute_mask);
+ update_pred = (dword1 >> 3) & 0x1;
+ printf("\tUPDATE_PRED(%d),\n", update_pred);
+ write_mask = (dword1 >> 4) & 0x1;
+ printf("\tWRITE_MASK(%d),\n", write_mask);
+ if (is_r700) {
+ printf("\tFOG_MERGE(0),\n");
+ outmod = (dword1 >> 5) & 0x3;
+ printf("\tOMOD(%s),\n", omod[outmod]);
+ alu_inst = (dword1 >> 7) & 0x7ff;
+ printf("\tALU_INST(%s),\n", op2[alu_inst]);
+ } else {
+ fog_merge = (dword1 >> 5) & 0x1;
+ printf("\tFOG_MERGE(%d),\n", fog_merge);
+ outmod = (dword1 >> 6) & 0x3;
+ printf("\tOMOD(%s),\n", omod[outmod]);
+ alu_inst = (dword1 >> 8) & 0x3ff;
+ printf("\tALU_INST(%s),\n", op2[alu_inst]);
+ }
+ bank_swizzle = (dword1 >> 18) & 0x7;
+ printf("\tBANK_SWIZZLE(%s),\n", bs[bank_swizzle]);
+ dst_gpr = (dword1 >> 21) & 0x7f;
+ printf("\tDST_GPR(%d),\n", dst_gpr);
+ dst_rel = (dword1 >> 28) & 0x1;
+ printf("\tDST_REL(%s),\n", rel[dst_rel]);
+ dst_elem = (dword1 >> 29) & 0x3;
+ printf("\tDST_ELEM(%s),\n", elem[dst_elem]);
+ clamp = (dword1 >> 31) & 0x1;
+ printf("\tCLAMP(%d)", clamp);
+ printf("),\n");
+}
+
+void alu_dword0(uint32_t dword0, int is_r700)
+{
+ int src0_sel, src1_sel;
+ int src0_rel, src1_rel;
+ int src0_elem, src1_elem;
+ int src0_neg, src1_neg;
+ int index_mode, pred_sel, last;
+
+ src0_sel = (dword0 >> 0) & 0x1ff;
+ src1_sel = (dword0 >> 13) & 0x1ff;
+
+ src0_rel = (dword0 >> 9) & 0x1;
+ src1_rel = (dword0 >> 22) & 0x1;
+
+ src0_elem = (dword0 >> 10) & 0x3;
+ src1_elem = (dword0 >> 23) & 0x3;
+
+ src0_neg = (dword0 >> 12) & 0x1;
+ src1_neg = (dword0 >> 25) & 0x1;
+
+ index_mode = (dword0 >> 26) & 0x7;
+ pred_sel = (dword0 >> 29) & 0x3;
+ last = (dword0 >> 31) & 0x1;
+
+ printf("ALU_DWORD0(");
+ if (is_r700) {
+ if (src0_sel == 248)
+ printf("SRC0_SEL(SQ_ALU_SRC_0),\n");
+ else if (src0_sel == 249)
+ printf("SRC0_SEL(SQ_ALU_SRC_1),\n");
+ else if (src0_sel == 250)
+ printf("SRC0_SEL(SQ_ALU_SRC_I_INT),\n");
+ else if (src0_sel == 251)
+ printf("SRC0_SEL(SQ_ALU_SRC_M_1_INT),\n");
+ else if (src0_sel == 252)
+ printf("SRC0_SEL(SQ_ALU_SRC_0_5),\n");
+ else if (src0_sel == 253)
+ printf("SRC0_SEL(SQ_ALU_SRC_LITERAL),\n");
+ else if (src0_sel == 254)
+ printf("SRC0_SEL(SQ_ALU_SRC_PV),\n");
+ else if (src0_sel == 255)
+ printf("SRC0_SEL(SQ_ALU_SRC_PS),\n");
+ else
+ printf("SRC0_SEL(%d),\n", src0_sel);
+ } else {
+ if (src0_sel == 248)
+ printf("SRC0_SEL(SQ_ALU_SRC_0),\n");
+ else if (src0_sel == 249)
+ printf("SRC0_SEL(SQ_ALU_SRC_1),\n");
+ else if (src0_sel == 250)
+ printf("SRC0_SEL(SQ_ALU_SRC_I_INT),\n");
+ else if (src0_sel == 251)
+ printf("SRC0_SEL(SQ_ALU_SRC_M_1_INT),\n");
+ else if (src0_sel == 252)
+ printf("SRC0_SEL(SQ_ALU_SRC_0_5),\n");
+ else if (src0_sel == 253)
+ printf("SRC0_SEL(SQ_ALU_SRC_LITERAL),\n");
+ else if (src0_sel == 254)
+ printf("SRC0_SEL(SQ_ALU_SRC_PV),\n");
+ else if (src0_sel == 255)
+ printf("SRC0_SEL(SQ_ALU_SRC_PS),\n");
+ else
+ printf("SRC0_SEL(%d),\n", src0_sel);
+ }
+ printf("\tSRC0_REL(%s),\n", rel[src0_rel]);
+ printf("\tSRC0_ELEM(%s),\n", elem[src0_elem]);
+ printf("\tSRC0_NEG(%d),\n", src0_neg);
+
+ if (is_r700) {
+ if (src1_sel == 248)
+ printf("\tSRC1_SEL(SQ_ALU_SRC_0),\n");
+ else if (src1_sel == 249)
+ printf("\tSRC1_SEL(SQ_ALU_SRC_1),\n");
+ else if (src1_sel == 250)
+ printf("\tSRC1_SEL(SQ_ALU_SRC_I_INT),\n");
+ else if (src1_sel == 251)
+ printf("\tSRC1_SEL(SQ_ALU_SRC_M_1_INT),\n");
+ else if (src1_sel == 252)
+ printf("\tSRC0_SEL(SQ_ALU_SRC_0_5),\n");
+ else if (src1_sel == 253)
+ printf("\tSRC1_SEL(SQ_ALU_SRC_LITERAL),\n");
+ else if (src1_sel == 254)
+ printf("\tSRC1_SEL(SQ_ALU_SRC_PV),\n");
+ else if (src1_sel == 255)
+ printf("\tSRC1_SEL(SQ_ALU_SRC_PS),\n");
+ else
+ printf("\tSRC1_SEL(%d),\n", src1_sel);
+ } else {
+ if (src1_sel == 248)
+ printf("\tSRC1_SEL(SQ_ALU_SRC_0),\n");
+ else if (src1_sel == 249)
+ printf("\tSRC1_SEL(SQ_ALU_SRC_1),\n");
+ else if (src1_sel == 250)
+ printf("\tSRC1_SEL(SQ_ALU_SRC_I_INT),\n");
+ else if (src1_sel == 251)
+ printf("\tSRC1_SEL(SQ_ALU_SRC_M_1_INT),\n");
+ else if (src1_sel == 252)
+ printf("\tSRC0_SEL(SQ_ALU_SRC_0_5),\n");
+ else if (src1_sel == 253)
+ printf("\tSRC1_SEL(SQ_ALU_SRC_LITERAL),\n");
+ else if (src1_sel == 254)
+ printf("\tSRC1_SEL(SQ_ALU_SRC_PV),\n");
+ else if (src1_sel == 255)
+ printf("\tSRC1_SEL(SQ_ALU_SRC_PS),\n");
+ else
+ printf("\tSRC1_SEL(%d),\n", src1_sel);
+ }
+
+ printf("\tSRC1_REL(%s),\n", rel[src1_rel]);
+ printf("\tSRC1_ELEM(%s),\n", elem[src1_elem]);
+ printf("\tSRC1_NEG(%d),\n", src1_neg);
+ printf("\tINDEX_MODE(%s),\n", im[index_mode]);
+ printf("\tPRED_SEL(%s),\n", ps[pred_sel]);
+ printf("\tLAST(%d)", last);
+ printf("),\n");
+}
+
+void tex_dword0(uint32_t dword0, int is_r700)
+{
+ int inst, bfm, fwq;
+ int resource_id, src_gpr;
+ int sr, ac;
+
+ printf("TEX_DWORD0(");
+ inst = (dword0 >> 0) & 0x1f;
+ printf("TEX_INST(%s),\n", tex_op[inst]);
+ bfm = (dword0 >> 5) & 0x1;
+ printf("\tBC_FRAC_MODE(%d),\n", bfm);
+ fwq = (dword0 >> 7) & 0x1;
+ printf("\tFETCH_WHOLE_QUAD(%d),\n", fwq);
+ resource_id = (dword0 >> 8) & 0xff;
+ printf("\tRESOURCE_ID(%d),\n", resource_id);
+ src_gpr = (dword0 >> 16) & 0x7f;
+ printf("\tSRC_GPR(%d),\n", src_gpr);
+ sr = (dword0 >> 23) & 0x1;
+ printf("\tSRC_REL(%s),\n", rel[sr]);
+ if (is_r700) {
+ ac = (dword0 >> 24) & 0x1;
+ printf("\tR7xx_ALT_CONST(%d)", ac);
+ } else {
+ printf("\tR7xx_ALT_CONST(0)");
+ }
+ printf("),\n");
+}
+
+void tex_dword1(uint32_t dword1, int is_r700)
+{
+ int dst_gpr, dr;
+ int dsx, dsy, dsz, dsw;
+ int lod_bias;
+ int ctx, cty, ctz, ctw;
+
+ printf("TEX_DWORD1(");
+ dst_gpr = (dword1 >> 0) & 0x7f;
+ printf("DST_GPR(%d),\n", dst_gpr);
+ dr = (dword1 >> 7) & 0x1;
+ printf("\tDST_REL(%s),\n", rel[dr]);
+ dsx = (dword1 >> 9) & 0x7;
+ printf("\tDST_SEL_X(%s),\n", sel[dsx]);
+ dsy = (dword1 >> 12) & 0x7;
+ printf("\tDST_SEL_Y(%s),\n", sel[dsy]);
+ dsz = (dword1 >> 15) & 0x7;
+ printf("\tDST_SEL_Z(%s),\n", sel[dsz]);
+ dsw = (dword1 >> 18) & 0x7;
+ printf("\tDST_SEL_W(%s),\n", sel[dsw]);
+ lod_bias = (dword1 >> 21) & 0x7f;
+ printf("\tLOD_BIAS(%d),\n", lod_bias);
+ ctx = (dword1 >> 28) & 0x1;
+ printf("\tCOORD_TYPE_X(%s),\n", coord_type[ctx]);
+ cty = (dword1 >> 29) & 0x1;
+ printf("\tCOORD_TYPE_Y(%s),\n", coord_type[cty]);
+ ctz = (dword1 >> 30) & 0x1;
+ printf("\tCOORD_TYPE_Z(%s),\n", coord_type[ctz]);
+ ctw = (dword1 >> 31) & 0x1;
+ printf("\tCOORD_TYPE_W(%s)", coord_type[ctw]);
+ printf("),\n");
+}
+
+void tex_dword2(uint32_t dword2, int is_r700)
+{
+ int ox, oy, oz;
+ int sampler_id;
+ int ssx, ssy, ssz, ssw;
+
+ printf("TEX_DWORD2(");
+ ox = (dword2 >> 0) & 0x1f;
+ printf("OFFSET_X(%d),\n", ox);
+ oy = (dword2 >> 5) & 0x1f;
+ printf("\tOFFSET_Y(%d),\n", oy);
+ oz = (dword2 >> 10) & 0x1f;
+ printf("\tOFFSET_Z(%d),\n", oz);
+ sampler_id = (dword2 >> 15) & 0x1f;
+ printf("\tSAMPLER_ID(%d),\n", sampler_id);
+ ssx = (dword2 >> 20) & 0x7;
+ printf("\tSRC_SEL_X(%s),\n", sel[ssx]);
+ ssy = (dword2 >> 23) & 0x7;
+ printf("\tSRC_SEL_Y(%s),\n", sel[ssy]);
+ ssz = (dword2 >> 26) & 0x7;
+ printf("\tSRC_SEL_Z(%s),\n", sel[ssz]);
+ ssw = (dword2 >> 29) & 0x7;
+ printf("\tSRC_SEL_W(%s)", sel[ssw]);
+ printf("),\n");
+}
+
+void tex_dword_pad()
+{
+ printf("TEX_DWORD_PAD,\n");
+}
+
+void vtx_dword0(uint32_t dword0, int is_r700)
+{
+ int inst, ft, fwq;
+ int buffer_id, src_gpr;
+ int sr, ssx, mfc;
+
+ printf("VTX_DWORD0(");
+ inst = (dword0 >> 0) & 0x1f;
+ printf("VTX_INST(%s),\n", vtx_op[inst]);
+ ft = (dword0 >> 5) & 0x3;
+ printf("\tFETCH_TYPE(%s),\n", fetch_type[ft]);
+ fwq = (dword0 >> 7) & 0x1;
+ printf("\tFETCH_WHOLE_QUAD(%d),\n", fwq);
+ buffer_id = (dword0 >> 8) & 0xff;
+ printf("\tBUFFER_ID(%d),\n", buffer_id);
+ src_gpr = (dword0 >> 16) & 0x7f;
+ printf("\tSRC_GPR(%d),\n", src_gpr);
+ sr = (dword0 >> 23) & 0x1;
+ printf("\tSRC_REL(%s),\n", rel[sr]);
+ ssx = (dword0 >> 24) & 0x3;
+ printf("\tSRC_SEL_X(%s),\n", sel[ssx]);
+ mfc = (dword0 >> 26) & 0x3f;
+ mfc++;
+ printf("\tMEGA_FETCH_COUNT(%d)", mfc);
+ printf("),\n");
+}
+
+void vtx_dword1_sem(uint32_t dword1, int is_r700)
+{
+ int semantic_id;
+ int dsx, dsy, dsz, dsw;
+ int ucf, df, nfa, fca, sma;
+
+ printf("VTX_DWORD1_SEM(");
+ semantic_id = (dword1 >> 0) & 0xff;
+ printf("SEMANTIC_ID(%d),\n", semantic_id);
+ dsx = (dword1 >> 9) & 0x7;
+ printf("\tDST_SEL_X(%s),\n", sel[dsx]);
+ dsy = (dword1 >> 12) & 0x7;
+ printf("\tDST_SEL_Y(%s),\n", sel[dsy]);
+ dsz = (dword1 >> 15) & 0x7;
+ printf("\tDST_SEL_Z(%s),\n", sel[dsz]);
+ dsw = (dword1 >> 18) & 0x7;
+ printf("\tDST_SEL_W(%s),\n", sel[dsw]);
+ ucf = (dword1 >> 21) & 0x1;
+ printf("\tUSE_CONST_FIELDS(%d),\n", ucf);
+ df = (dword1 >> 22) & 0x3f;
+ printf("\tDATA_FORMAT(%d),\n", df);
+ nfa = (dword1 >> 28) & 0x3;
+ printf("\tNUM_FORMAT_ALL(%s),\n", num_format_all[nfa]);
+ fca = (dword1 >> 30) & 0x1;
+ printf("\tFORMAT_COMP_ALL(%s),\n", format_comp_all[fca]);
+ sma = (dword1 >> 31) & 0x1;
+ printf("\tSRF_MODE_ALL(%s)", srf_mode_all[sma]);
+ printf("),\n");
+}
+
+void vtx_dword1_gpr(uint32_t dword1, int is_r700)
+{
+ int dst_gpr, dr;
+ int dsx, dsy, dsz, dsw;
+ int ucf, df, nfa, fca, sma;
+
+ printf("VTX_DWORD1_GPR(");
+ dst_gpr = (dword1 >> 0) & 0x7f;
+ printf("DST_GPR(%d),\n", dst_gpr);
+ dr = (dword1 >> 7) & 0x1;
+ printf("\tDST_REL(%s),\n", rel[dr]);
+ dsx = (dword1 >> 9) & 0x7;
+ printf("\tDST_SEL_X(%s),\n", sel[dsx]);
+ dsy = (dword1 >> 12) & 0x7;
+ printf("\tDST_SEL_Y(%s),\n", sel[dsy]);
+ dsz = (dword1 >> 15) & 0x7;
+ printf("\tDST_SEL_Z(%s),\n", sel[dsz]);
+ dsw = (dword1 >> 18) & 0x7;
+ printf("\tDST_SEL_W(%s),\n", sel[dsw]);
+ ucf = (dword1 >> 21) & 0x1;
+ printf("\tUSE_CONST_FIELDS(%d),\n", ucf);
+ df = (dword1 >> 22) & 0x3f;
+ printf("\tDATA_FORMAT(%d),\n", df);
+ nfa = (dword1 >> 28) & 0x3;
+ printf("\tNUM_FORMAT_ALL(%s),\n", num_format_all[nfa]);
+ fca = (dword1 >> 30) & 0x1;
+ printf("\tFORMAT_COMP_ALL(%s),\n", format_comp_all[fca]);
+ sma = (dword1 >> 31) & 0x1;
+ printf("\tSRF_MODE_ALL(%s)", srf_mode_all[sma]);
+ printf("),\n");
+}
+
+void vtx_dword2(uint32_t dword2, int is_r700)
+{
+ int offset, es;
+ int cbns, mf;
+
+ printf("VTX_DWORD2(");
+ offset = (dword2 >> 0) & 0xffff;
+ printf("OFFSET(%d),\n", offset);
+ es = (dword2 >> 16) & 0x3;
+ printf("\tENDIAN_SWAP(%s),\n", endian_swap[es]);
+ cbns = (dword2 >> 18) & 0x1;
+ printf("\tCONST_BUF_NO_STRIDE(%d),\n", cbns);
+ mf = (dword2 >> 19) & 0x1;
+ printf("\tMEGA_FETCH(%d),\n", mf);
+ printf("),\n");
+}
+
+void vtx_dword_pad()
+{
+ printf("VTX_DWORD_PAD,\n");
+}
+
+void cf_dword0(uint32_t dword0, int is_r700)
+{
+ printf("CF_DWORD0(");
+ printf("ADDR(%d)", dword0);
+ printf("),\n");
+}
+
+void cf_dword1(uint32_t dword1, int is_r700)
+{
+ int pop_count, cf_const;
+ int cf_cond, count, call_count;
+ int eop, vpm, cf_inst, wqm;
+ int barrier;
+
+ printf("CF_DWORD1(");
+ pop_count = (dword1 >> 0) & 0x7;
+ printf("POP_COUNT(%d),\n", pop_count);
+ cf_const = (dword1 >> 3) & 0x1f;
+ printf("\tCF_CONST(%d),\n", cf_const);
+ cf_cond = (dword1 >> 8) & 0x3;
+ printf("\tCOND(%s),\n", cond[cf_cond]);
+ count = (dword1 >> 10) & 0x7;
+ count++;
+ printf("\tCOUNT(%d),\n", count);
+ call_count = (dword1 >> 13) & 0x3f;
+ printf("\tCALL_COUNT(%d),\n", call_count);
+ eop = (dword1 >> 21) & 0x1;
+ printf("\tEND_OF_PROGRAM(%d),\n", eop);
+ vpm = (dword1 >> 22) & 0x1;
+ printf("\tVALID_PIXEL_MODE(%d),\n", vpm);
+ cf_inst = (dword1 >> 23) & 0x7f;
+ printf("\tCF_INST(%s),\n", cf_op[cf_inst]);
+ wqm = (dword1 >> 30) & 0x1;
+ printf("\tWHOLE_QUAD_MODE(%d),\n", wqm);
+ barrier = (dword1 >> 31) & 0x1;
+ printf("\tBARRIER(%d)", barrier);
+ printf("),\n");
+}
+
+void cf_alu_dword0(uint32_t dword0, int is_r700)
+{
+ int kb0, kb1, km0;
+
+ kb0 = (dword0 >> 22) & 0xf;
+ kb1 = (dword0 >> 26) & 0xf;
+ km0 = (dword0 >> 30) & 0x3;
+
+ printf("CF_ALU_DWORD0(");
+ printf("ADDR(%d),\n", dword0 & 0x3fffff);
+ printf("\tKCACHE_BANK0(%d),\n", kb0);
+ printf("\tKCACHE_BANK1(%d),\n", kb1);
+ printf("\tKCACHE_MODE0(%s)", km[km0]);
+ printf("),\n");
+}
+
+void cf_alu_dword1(uint32_t dword1, int is_r700)
+{
+ int km1, ka0, ka1, count, uw;
+ int cf_inst, wqm, barrier;
+
+ printf("CF_ALU_DWORD1(");
+ km1 = (dword1 >> 0) & 0x3;
+ printf("KCACHE_MODE1(%s),\n", km[km1]);
+ ka0 = (dword1 >> 2) & 0xff;
+ printf("\tKCACHE_ADDR0(%d),\n", ka0);
+ ka1 = (dword1 >> 10) & 0xff;
+ printf("\tKCACHE_ADDR1(%d),\n", ka1);
+ count = (dword1 >> 18) & 0x7f;
+ count++;
+ printf("\tCOUNT(%d),\n", count);
+ uw = (dword1 >> 25) & 0x1;
+ printf("\tUSES_WATERFALL(%d),\n", uw);
+ cf_inst = (dword1 >> 26) & 0xf;
+ printf("\tCF_INST(%s),\n", cf_alu_op[cf_inst]);
+ wqm = (dword1 >> 30) & 0x1;
+ printf("\tWHOLE_QUAD_MODE(%d),\n", wqm);
+ barrier = (dword1 >> 31) & 0x1;
+ printf("\tBARRIER(%d)", barrier);
+ printf("),\n");
+}
+
+void cf_alloc_imp_exp_dword0(uint32_t dword0, int is_mem, int is_r700)
+{
+ int array_base, type, rw_gpr;
+ int rw_rel, index_gpr, elem_size;
+
+ printf("CF_ALLOC_IMP_EXP_DWORD0(");
+ array_base = (dword0 >> 0) & 0x1fff;
+ type = (dword0 >> 13) & 0x3;
+ if (is_mem) {
+ printf("ARRAY_BASE(%d),\n", array_base);
+ printf("\tTYPE(%s),\n", cf_mem_type[type]);
+ } else {
+ if (type == 0)
+ printf("ARRAY_BASE(%s),\n", ab_pixel[array_base]);
+ else if (type == 1)
+ printf("ARRAY_BASE(%s),\n", ab_pos[array_base]);
+ else
+ printf("ARRAY_BASE(%d),\n", array_base);
+ printf("\tTYPE(%s),\n", cf_exp_type[type]);
+ }
+ rw_gpr = (dword0 >> 15) & 0x7f;
+ printf("\tRW_GPR(%d),\n", rw_gpr);
+ rw_rel = (dword0 >> 22) & 0x1;
+ printf("\tRW_REL(%s),\n", rel[rw_rel]);
+ index_gpr = (dword0 >> 23) & 0x7f;
+ printf("\tINDEX_GPR(%d),\n", index_gpr);
+ elem_size = (dword0 >> 30) & 0x3;
+ printf("\tELEM_SIZE(%d)", elem_size);
+ printf("),\n");
+}
+
+void cf_alloc_imp_exp_dword1_buf(uint32_t dword1, int is_r700)
+{
+ int array_size, comp_mask, elem_loop;
+ int bc, eop, vpm, cf_inst, wqm, barrier;
+
+ printf("CF_ALLOC_IMP_EXP_DWORD1_BUF(");
+ array_size = (dword1 >> 0) & 0xfff;
+ printf("ARRAY_SIZE(%d),\n", array_size);
+ comp_mask = (dword1 >> 12) & 0xf;
+ printf("\tCOMP_MASK(%d),\n", comp_mask);
+ if (is_r700) {
+ printf("\tR6xx_ELEM_LOOP(0),\n");
+ } else {
+ elem_loop = (dword1 >> 16) & 0x1;
+ printf("\tR6xx_ELEM_LOOP(%d),\n", elem_loop);
+ }
+ bc = (dword1 >> 17) & 0xf;
+ printf("\tBURST_COUNT(%d),\n", bc);
+ eop = (dword1 >> 21) & 0x1;
+ printf("\tEND_OF_PROGRAM(%d),\n", eop);
+ vpm = (dword1 >> 22) & 0x1;
+ printf("\tVALID_PIXEL_MODE(%d),\n", vpm);
+ cf_inst = (dword1 >> 23) & 0x7f;
+ printf("\tCF_INST(%s),\n", cf_exp_op[cf_inst]);
+ wqm = (dword1 >> 30) & 0x1;
+ printf("\tWHOLE_QUAD_MODE(%d),\n", wqm);
+ barrier = (dword1 >> 31) & 0x1;
+ printf("\tBARRIER(%d)", barrier);
+ printf("),\n");
+
+}
+
+void cf_alloc_imp_exp_dword1_swiz(uint32_t dword1, int is_r700)
+{
+ int sel_x, sel_y, sel_z, sel_w, elem_loop;
+ int bc, eop, vpm, cf_inst, wqm, barrier;
+
+ printf("CF_ALLOC_IMP_EXP_DWORD1_SWIZ(");
+ sel_x = (dword1 >> 0) & 0x7;
+ printf("SRC_SEL_X(%s),\n", sel[sel_x]);
+ sel_y = (dword1 >> 3) & 0x7;
+ printf("\tSRC_SEL_Y(%s),\n", sel[sel_y]);
+ sel_z = (dword1 >> 6) & 0x7;
+ printf("\tSRC_SEL_Z(%s),\n", sel[sel_z]);
+ sel_w = (dword1 >> 9) & 0x7;
+ printf("\tSRC_SEL_W(%s),\n", sel[sel_w]);
+ elem_loop = (dword1 >> 16) & 0x1;
+ printf("\tELEM_LOOP(%d),\n", elem_loop);
+ bc = (dword1 >> 17) & 0xf;
+ printf("\tBURST_COUNT(%d),\n", bc);
+ eop = (dword1 >> 21) & 0x1;
+ printf("\tEND_OF_PROGRAM(%d),\n", eop);
+ vpm = (dword1 >> 22) & 0x1;
+ printf("\tVALID_PIXEL_MODE(%d),\n", vpm);
+ cf_inst = (dword1 >> 23) & 0x7f;
+ printf("\tCF_INST(%s),\n", cf_exp_op[cf_inst]);
+ wqm = (dword1 >> 30) & 0x1;
+ printf("\tWHOLE_QUAD_MODE(%d),\n", wqm);
+ barrier = (dword1 >> 31) & 0x1;
+ printf("\tBARRIER(%d)", barrier);
+ printf("),\n");
+
+}
+
+void do_alu_clause(uint32_t *inst, int offset, int count, int is_r700)
+{
+ uint32_t dword0, dword1;
+ int op3;
+ int src0, src1, src2;
+ int elem0, elem1, elem2;
+ int inline_const_count = 0;
+
+ while (count) {
+ dword0 = inst[offset];
+ dword1 = inst[offset + 1];
+ if (inline_const_count) {
+ printf("0x%08x,\n",dword0);
+ printf("0x%08x,\n",dword1);
+ inline_const_count--;
+ } else {
+ src0 = (dword0 >> 0) & 0x1ff;
+ src1 = (dword0 >> 13) & 0x1ff;
+ elem0 = (dword0 >> 10) & 0x3;
+ elem1 = (dword0 >> 23) & 0x3;
+ alu_dword0(dword0, is_r700);
+ // if high 3 bits of inst are 000b, OP2, else OP3
+ op3 = (dword1 >> 15) & 0x7;
+ if (op3) {
+ alu_dword1_op3(dword1, is_r700);
+ src2 = (dword1 >> 0) & 0x1ff;
+ elem2 = (dword1 >> 10) & 0x3;
+ } else {
+ alu_dword1_op2(dword1, is_r700);
+ src2 = 0;
+ elem2 = 0;
+ }
+
+ // inline constants
+ if ((src0 == 253) || (src1 == 253) || (src2 == 253)) {
+ if ((elem0 > 1) || (elem1 > 1) || (elem2 > 1))
+ inline_const_count = 2; // 3-4 constants
+ else
+ inline_const_count = 1; // 1-2 constants
+ }
+ }
+ count--;
+ offset += 2;
+ }
+}
+
+void do_tex_clause(uint32_t *inst, int offset, int count, int is_r700)
+{
+ uint32_t dword0, dword1, dword2;
+
+ while (count) {
+ dword0 = inst[offset];
+ dword1 = inst[offset + 1];
+ dword2 = inst[offset + 2];
+ tex_dword0(dword0, is_r700);
+ tex_dword1(dword1, is_r700);
+ tex_dword2(dword2, is_r700);
+ tex_dword_pad();
+
+ count--;
+ offset += 4;
+ }
+}
+
+void do_vtx_clause(uint32_t *inst, int offset, int count, int is_r700)
+{
+ uint32_t dword0, dword1, dword2;
+ int op;
+
+ while (count) {
+ dword0 = inst[offset];
+ dword1 = inst[offset + 1];
+ dword2 = inst[offset + 2];
+ vtx_dword0(dword0, is_r700);
+ // if op = 1, sem, else gpr
+ op = (dword0 >> 0) & 0x1f;
+ if (op == 1)
+ vtx_dword1_sem(dword1, is_r700);
+ else
+ vtx_dword1_gpr(dword1, is_r700);
+ vtx_dword2(dword2, is_r700);
+ vtx_dword_pad();
+
+ count--;
+ offset += 4;
+ }
+}
+
+static int do_cf_inst(uint32_t *inst, uint32_t offset, int is_r700)
+{
+ uint32_t dword0, dword1;
+ int encoding, addr, count, op;
+ int eop = 0;
+
+ dword0 = inst[offset];
+ dword1 = inst[offset + 1];
+
+ // if high 2 bits of inst are 10b or 11b, ALU,
+ // else if 01b, imp/exp
+ // else if 00b, cf
+ encoding = (dword1 >> 28) & 0x3;
+ if (encoding >= 2) {
+ cf_alu_dword0(dword0, is_r700);
+ cf_alu_dword1(dword1, is_r700);
+ addr = (dword0 >> 0) & 0x3fffff;
+ addr *= 2;
+ count = (dword1 >> 18) & 0x7f;
+ count++;
+ do_alu_clause(inst, addr, count, is_r700);
+ } else if (encoding == 1) {
+ op = (dword1 >> 23) & 0x7f;
+ if ((op == 39) || (op == 40)) {
+ // export inst
+ cf_alloc_imp_exp_dword0(dword0, 0, is_r700);
+ cf_alloc_imp_exp_dword1_swiz(dword1, is_r700);
+ } else {
+ // mem inst
+ cf_alloc_imp_exp_dword0(dword0, 1, is_r700);
+ cf_alloc_imp_exp_dword1_buf(dword1, is_r700);
+ }
+ eop = (dword1 >> 21) & 0x1;
+ } else {
+ op = (dword1 >> 23) & 0x7f;
+ addr = dword0;
+ addr *= 2;
+ count = (dword1 >> 10) & 0x7;
+ if (is_r700)
+ count += (dword1 >> 19) & 0x1;
+ count++;
+ cf_dword0(dword0, is_r700);
+ cf_dword1(dword1, is_r700);
+ if (op == 1)
+ do_tex_clause(inst, addr, count, is_r700);
+ else if ((op == 2) || (op == 3))
+ do_vtx_clause(inst, addr, count, is_r700);
+ eop = (dword1 >> 21) & 0x1;
+ }
+ return eop;
+}
+
+int main () {
+ uint32_t sp[] = {
+ 0x00000014,
+ 0x00800400,
+ 0x00000003,
+ 0xA03C0000,
+ 0xC0018000,
+ 0x94200688,
+ 0x10200001,
+ 0x004C2810,
+ 0x10A00401,
+ 0x204C2800,
+ 0x11200801,
+ 0x404C2800,
+ 0x91A00C01,
+ 0x60442800,
+ 0x10202001,
+ 0x004C2800,
+ 0x10A02401,
+ 0x204C2810,
+ 0x11202801,
+ 0x404C2800,
+ 0x91A02C01,
+ 0x60442800,
+ 0x10204001,
+ 0x004C2800,
+ 0x10A04401,
+ 0x204C2800,
+ 0x11204801,
+ 0x404C2810,
+ 0x91A04C01,
+ 0x60442800,
+ 0x10000002,
+ 0x00740C90,
+ 0x10000402,
+ 0x20740C90,
+ 0x10000802,
+ 0x40740C90,
+ 0x90000C02,
+ 0x60600C90,
+ 0x00000000,
+ 0x00000000,
+ 0x00000010,
+ 0xF00FF001,
+ 0x68800000,
+ 0xDEADDEAD,
+ 0x00000110,
+ 0xF01D1E01,
+ 0x68808000,
+ 0xDEADDEAD,
+ };
+ int is_r700 = 1; //1
+ uint32_t dword0 = sp[0];
+ uint32_t dword1 = sp[1];
+ uint32_t offset = 0;
+ int eop = 0;
+
+ while (offset < (sizeof(sp) / 8)) {
+ eop = do_cf_inst(sp, offset, is_r700);
+
+ if (eop)
+ break;
+ offset += 2;
+ }
+
+ return 0;
+}
diff --git a/r600_basic.c b/r600_basic.c
new file mode 100644
index 0000000..29f3f3d
--- /dev/null
+++ b/r600_basic.c
@@ -0,0 +1,82 @@
+/*
+ * r600_demo
+ *
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Basic tests
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "r600_reg.h"
+#include "r600_emit.h"
+#include "r600_lib.h"
+
+
+/* Test for working CP */
+void test_cp (adapter_t *adapt)
+{
+ printf ("\n* CP test: PACK0 scratch reg\n\n");
+
+ /* Test for CP working by simple register writes (CP_PACKET0) */
+ reg_write32 (SCRATCH_REG7, 0xdeadbeef);
+ wait_reg (adapt, SCRATCH_REG7, 0xdeadbeef, "PACK0: init");
+
+ pack0 (SCRATCH_REG7, 1);
+ e32 (0xcafebabe);
+ flush_cmds ();
+ wait_reg (adapt, SCRATCH_REG7, 0xcafebabe, "CP: scratch");
+}
+
+
+/* Test CP + DMA */
+void test_packets (adapter_t *adapt)
+{
+ printf ("\n* PACK0 scratch reg test\n\n");
+
+ /* Test for CP working by simple register writes (CP_PACKET0) */
+ reg_write32 (SCRATCH_REG7, 0xdeadbeef);
+ wait_reg (adapt, SCRATCH_REG7, 0xdeadbeef, "PACK0: init");
+
+ pack0 (SCRATCH_REG7, 1);
+ e32 (0xcafebabe);
+ flush_cmds ();
+ wait_reg (adapt, SCRATCH_REG7, 0xcafebabe, "PACK0: scratch");
+ pack0 (SCRATCH_REG7, 1);
+ e32 (0xfeedface);
+ flush_cmds ();
+ wait_reg (adapt, SCRATCH_REG7, 0xfeedface, "PACK0: scratch2");
+
+}
+
+void simple_clear (adapter_t* adapt)
+{
+ uint32_t *dst = adapt->display;
+ uint32_t len = adapt->display_pitch * adapt->display_height;
+ int i;
+
+ for (i = len; i; --i)
+ *dst++ = 0x00008000;
+}
+
diff --git a/r600_broken.c b/r600_broken.c
new file mode 100644
index 0000000..6e9933e
--- /dev/null
+++ b/r600_broken.c
@@ -0,0 +1,509 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <math.h>
+#include <assert.h>
+#include "r600_reg.h"
+#include "r600_emit.h"
+#include "r600_lib.h"
+#include "r600_shader.h"
+
+
+/*
+ * Old tests - disfunctional
+ * to be removed after analysis
+ */
+
+
+#if 0
+
+/* Trivial vertex shader: fetch indexed vertex, put it straight to SX (position buffer)
+ * Data layout float x,y,z,w
+ */
+static uint64_t trivial_vs[] = {
+ CF_WORD (CF_INST_VTX, 2 /* vtx */, 1, 0),
+ CF_EXPORT_WORD (CF_INST_EXPORT_DONE, CF_EXPORT_POS, 1, 1, 60, CF_FLAG_BARRIER | CF_FLAG_END_OF_PROGRAM),
+ /* vtx: */
+ VTX_2WORDS_MEGA (0, 0, 0, 1, FORMAT_32_32_32_32_FLOAT, NUM_FORMAT_NORM, 16, 0)
+} ;
+/* Trivial pixel shader: load constant color from cfile(0), put straight to FB */
+static uint64_t trivial_ps[] = {
+ CF_ALU_WORD (CF_INST_ALU, 2 /* alu */, 4, 0),
+ CF_EXPORT_WORD (CF_INST_EXPORT_DONE, CF_EXPORT_PIXEL, 0, 1, 0, CF_FLAG_BARRIER | CF_FLAG_END_OF_PROGRAM),
+ /* alu: */
+ ALU_OP2_WORD (ALU_OP2_MOV, ALU_GPR(0)|ALU_DEST_X, ALU_CFILE(0)|ALU_SRC_X, 0, ALU_VEC_012, 0),
+ ALU_OP2_WORD (ALU_OP2_MOV, ALU_GPR(0)|ALU_DEST_Y, ALU_CFILE(0)|ALU_SRC_Y, 0, ALU_VEC_012, 0),
+ ALU_OP2_WORD (ALU_OP2_MOV, ALU_GPR(0)|ALU_DEST_Z, ALU_CFILE(0)|ALU_SRC_Z, 0, ALU_VEC_012, 0),
+ ALU_OP2_WORD (ALU_OP2_MOV, ALU_GPR(0)|ALU_DEST_W, ALU_CFILE(0)|ALU_SRC_W, 0, ALU_VEC_012, ALU_FLAG_LAST)
+} ;
+
+#else
+
+/* Trivial vertex shader: fetch indexed vertex + color, put it straight to SX (position buffer)
+ * Data layout float x,y,z,w,r,g,b,a
+ */
+static uint64_t trivial_vs[] = {
+ CF_WORD (CF_INST_VTX, 2 /* vtx */, 2, 0),
+ CF_EXPORT_WORD (CF_INST_EXPORT_DONE, CF_EXPORT_POS, 1, 1, 60, CF_FLAG_BARRIER),
+ CF_EXPORT_WORD (CF_INST_EXPORT_DONE, CF_EXPORT_PARAM, 2, 1, 0, CF_FLAG_BARRIER | CF_FLAG_END_OF_PROGRAM),
+ /* vtx: */
+ VTX_2WORDS_MEGA (0, 0, 0, 1, FORMAT_32_32_32_32_FLOAT, NUM_FORMAT_NORM_SIGNED, 32, 0),
+ VTX_2WORDS (0, 16, 0, 2, FORMAT_32_32_32_32_FLOAT, NUM_FORMAT_NORM_SIGNED, 16, 0),
+} ;
+/* Trivial pixel shader: put param(0) straight to FB */
+static uint64_t trivial_ps[] = {
+ CF_EXPORT_WORD (CF_INST_EXPORT_DONE, CF_EXPORT_PIXEL, 0, 1, 0, CF_FLAG_END_OF_PROGRAM),
+} ;
+
+#endif
+
+
+void init_3d(adapter_t *adapt)
+{
+ int i, num_qds, num_simds;
+ uint32_t reg;
+ printf ("\n* 3D Init\n\n");
+
+ /*
+ * Mostly according to 15.1.4 Vertex Input Control, some regs according to TCore
+ */
+
+ if (adapt->chipset <= CHIPSET_RV670) {
+ /* R6xx only (if at all - verify) */
+ /* Get into 3D mode, so that 2D packets trigger reload of buffers */
+ pack3 (IT_START_3D_CMDBUF, 1);
+ e32 (0);
+ }
+#if 0
+ pack3 (IT_ME_INITIALIZE, 6);
+ e32 (0x01);
+ e32 (0x00); /* as in TCore; consider 0x03 instead */
+ e32 (7); /* consider 1 (0 currently in DRM) */
+ e32 (0x00010000); /* 0 currently in DRM */
+ e32 (0);
+ e32 (0);
+ e32 (0x80000000);
+ e32 (0x80000000);
+ e32 (0x80000000);
+ e32 (0x80000000);
+ e32 (0x80000000);
+ e32 (0x80000000);
+#endif
+
+ /* get number of available pipes, needed for a few register inits */
+ reg = reg_read32 (CC_GC_SHADER_PIPE_CONFIG);
+ num_qds = R6XX_MAX_QD_PIPES - count_bits (reg & INACTIVE_QD_PIPES_mask);
+ num_simds = R6XX_MAX_SIMDS - count_bits (reg & INACTIVE_SIMDS_mask);
+ printf ("Number of QD pipes: %d Number of SIMDs: %d\n", num_qds, num_simds);
+
+ /*
+ * VGT (Vertex Grouper Tessellator)
+ */
+ ereg (VGT_VTX_VECT_EJECT_REG, PRIM_COUNT_mask); /* Max number of prims allowed per vertex vector */
+ ereg (VGT_DMA_NUM_INSTANCES, 1);
+ ereg (VGT_PRIMITIVEID_EN, 0); /* Primitive ID generation off */
+ ereg (VGT_MULTI_PRIM_IB_RESET_EN, 0); /* Prim Reset Index off */
+ ereg (VGT_VTX_CNT_EN, 0); /* Auto Index Generation off*/
+ ereg (VGT_GS_MODE, 0); /* Geometry Shader off */
+ ereg (VGT_REUSE_OFF, REUSE_OFF_bit); /* ? */
+ ereg (VGT_MAX_VTX_INDX, 0xffffffff); /* vertex index max clamp - interesting for security */
+ ereg (VGT_MIN_VTX_INDX, 0);
+ ereg (VGT_INDX_OFFSET, 0);
+ if (adapt->chipset <= CHIPSET_RV670) {
+ ereg (VGT_OUT_DEALLOC_CNTL, num_qds*4);
+ ereg (VGT_VERTEX_REUSE_BLOCK_CNTL, num_qds*4 - 2);
+ } else {
+ ereg (VGT_OUT_DEALLOC_CNTL, 16);
+ ereg (VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
+ }
+// ereg (VGT_GROUP_VECT_0_FMT_CNTL, 7); /* Not needed for MODE 0 primitives */
+ ereg (VGT_INDEX_TYPE, 1); /* 32 bit per index if not autoinc */
+
+
+ /*
+ * PA (Primitive Assembler)
+ */
+ pack0 (PA_SC_SCREEN_SCISSOR_TL, 2); /* Screen scissor */
+ e32 (0 | (0 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift));
+ e32 (1600 | (1200 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift));
+ pack0 (PA_SC_WINDOW_OFFSET, 3); /* Window scissor */
+ e32 (0 | (0 << WINDOW_Y_OFFSET_shift)); /* Window offset */
+ e32 (0 | (0 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift));
+ e32 (640 | (480 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift));
+ ereg (PA_CL_CLIP_CNTL, 0); /* ZCLIP_NEAR_DISABLE_bit | ZCLIP_FAR_DISABLE_bit; DX_LINEAR_ATTR_CLIP_ENA? */
+ ereg (PA_SU_SC_MODE_CNTL, /* Culling + Poly mode (0:tri 1:dual?) + etc */
+ (0 << POLY_MODE_shift) |
+ (2 << POLYMODE_FRONT_PTYPE_shift) | (2 << POLYMODE_BACK_PTYPE_shift) |
+ VTX_WINDOW_OFFSET_ENABLE_bit | PROVOKING_VTX_LAST_bit); /* TCore: | PERSP_CORR_DIS_bit */
+ ereg (PA_SC_MODE_CNTL, 0);
+ ereg (PA_SC_CLIPRECT_RULE, CLIP_RULE_mask); /* Aux Cliprects (always pass, so ignore setup) */
+// PA_SC_CLIPRECT_[0-3]_TL/BR
+ pack0 (PA_SC_GENERIC_SCISSOR_TL, 2);
+ e32 (0);
+ e32 (0xffffffff);
+ pack0 (PA_SC_VPORT_SCISSOR_0_TL, 2);
+ e32 (0);
+ e32 (0xffffffff);
+ pack0 (PA_SC_VPORT_ZMIN_0, 2);
+ efloat (0.0);
+ efloat (1.0);
+ pack0 (PA_SU_POINT_SIZE, 2);
+ e32 (0x0008 | (0x0008 << PA_SU_POINT_SIZE__WIDTH_shift)); /* Default: half pixel radius */
+ e32 (0 | (0xffff << MAX_SIZE_shift)); /* Min 0, max 4k radius */
+ ereg (PA_SC_AA_CONFIG, 0);
+ ereg (PA_SC_MPASS_PS_CNTL, 0);
+ ereg (PA_SC_AA_MASK, 0xffffffff);
+ pack0 (PA_CL_GB_VERT_CLIP_ADJ, 4);
+ efloat (1.0); /* "should be set to 1.0 for no guard band"? */
+ efloat (1.0);
+ efloat (1.0);
+ efloat (1.0);
+ ereg (PA_SU_VTX_CNTL, 1 | /* Pixel Center 0: @0.0 1: @0.5 */
+ (0 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | /* Float->Fixed 0: trunc 1: Round 2: Round to even */
+ (0 << QUANT_MODE_shift)); /* 0: 1/16 1: 1/8 2: 1/4 3: 1/2 4: 1 5: 1/256 */
+ pack0 (PA_CL_POINT_X_RAD, 4);
+ e32 (0);
+ e32 (0);
+ e32 (0); /* PA_CL_POINT_SIZE */
+ e32 (0); /* PA_CL_POINT_CULL_RAD */
+ ereg (PA_CL_VTE_CNTL,
+ VPORT_X_SCALE_ENA_bit | VPORT_X_OFFSET_ENA_bit |
+ VPORT_Y_SCALE_ENA_bit | VPORT_Y_OFFSET_ENA_bit |
+ VPORT_Z_SCALE_ENA_bit | VPORT_Z_OFFSET_ENA_bit |
+ VTX_XY_FMT_bit | VTX_Z_FMT_bit); /* perfcounter */
+ ereg (PA_SC_LINE_CNTL, LAST_PIXEL_bit); /* verify, also EXPAND_LINE_WIDTH_bit (AA lines) */
+ pack0 (PA_CL_VPORT_XSCALE_0, 6); /* Viewport trafo */
+ efloat (1.0); /* XScale */
+ efloat (0.0); /* XOffset */
+ efloat (1.0); /* YScale */
+ efloat (0.0); /* YOffset */
+ efloat (1.0); /* ZScale */
+ efloat (0.0); /* ZOffset */
+ ereg (PA_CL_VS_OUT_CNTL, 0); /* vtx_point_size, user clip distance, vtx_kill_flag */
+ pack0 (PA_SU_LINE_CNTL, 2);
+ e32 (0x0008); /* half pixel width */
+ e32 (LINE_PATTERN_mask); /* no stipple */
+
+
+ /*
+ * CB (Color Block)
+ */
+ ereg (CB_COLOR0_BASE, adapt->color_gpu >> 8);
+ ereg (CB_COLOR0_SIZE, ((adapt->color_pitch >> 3) - 1) |
+ ((adapt->color_pitch*adapt->color_height/64-1) << SLICE_TILE_MAX_shift));
+ ereg (CB_COLOR0_VIEW, 0 | SLICE_MAX_mask);
+ ereg (CB_COLOR0_INFO, /* clear_color, blend_clamp, blend_bypass */
+ (COLOR_8_8_8_8 << CB_COLOR0_INFO__FORMAT_shift) |
+ (ARRAY_LINEAR_ALIGNED << CB_COLOR0_INFO__ARRAY_MODE_shift) |
+ (NUMBER_SRGB /* ? */ << NUMBER_TYPE_shift) |
+ CB_COLOR0_INFO__READ_SIZE_bit | SIMPLE_FLOAT_bit);
+ ereg (CB_COLOR_CONTROL, DITHER_ENABLE_bit |
+ (0x01 << TARGET_BLEND_ENABLE_shift) |
+ (0xcc << ROP3_shift)); /* special modes, per-mrt-blend */
+ ereg (CB_CLRCMP_CONTROL, 0);
+ ereg (CB_SHADER_MASK, 0x0f);
+ ereg (CB_TARGET_MASK, 0x0f);
+ ereg (CB_BLEND_CONTROL, BLEND_ONE); /* dest := src */
+ pack0 (CB_CLEAR_RED, 4); /* ? */
+ efloat (1.0);
+ efloat (0.0);
+ efloat (0.0);
+ efloat (1.0);
+
+
+ /*
+ * DB (Depth Block)
+ */
+ ereg (DB_DEPTH_BASE, adapt->depth_gpu >> 8);
+ ereg (DB_DEPTH_SIZE, ((adapt->depth_pitch >> 3) - 1) |
+ ((adapt->depth_pitch*adapt->depth_height/64-1) << SLICE_TILE_MAX_shift));
+ ereg (DB_DEPTH_VIEW, 0 | SLICE_MAX_mask); /* ??? "Maximum allowed Z slice index" */
+ ereg (DB_DEPTH_INFO, DEPTH_8_24 | (ARRAY_1D_TILED_THIN1 << DB_DEPTH_INFO__ARRAY_MODE_shift)); /* hiz */
+// ereg (DB_HTILE_DATA_BASE, htilebase >> 8);
+ ereg (DB_PREFETCH_LIMIT, (adapt->depth_height >> 3)-1);
+//DB_PRELOAD_CONTROL
+ ereg (DB_RENDER_CONTROL, 0); /* hiz */
+ ereg (DB_RENDER_OVERRIDE, 0); /* hiz, his */
+//DB_HTILE_SURFACE
+ ereg (DB_SHADER_CONTROL,
+ (Z_ORDER__EARLY_Z_THEN_LATE_Z << Z_ORDER_shift));
+// | KILL_ENABLE_bit | DUAL_EXPORT_ENABLE_bit);
+ ereg (DB_STENCILREFMASK, 0xff << STENCILWRITEMASK_shift);
+ ereg (DB_STENCILREFMASK_BF, 0xff << STENCILWRITEMASK_shift);
+ pack0 (DB_DEPTH_CLEAR, 1);
+ efloat (1.0);
+ ereg (DB_DEPTH_CONTROL, Z_WRITE_ENABLE_bit); /* stencil, ztest */
+ ereg (DB_STENCIL_CLEAR, 0);
+// ereg (DB_ALPHA_TO_MASK, 0); /* ? */
+
+
+ /*
+ * SQ (Sequencer / Shader)
+ */
+ ereg (SQ_CONFIG, VC_ENABLE_bit | DX9_CONSTS_bit | /* DX9 for constant file, =0 for kcache */
+// ALU_INST_PREFER_VECTOR_bit |
+ /* Priority of VS vs. PS has to be tested - see also SPI_CONFIG_CNTL */
+// (3 << PS_PRIO_shift) | (2 << VS_PRIO_shift) |
+ (2 << PS_PRIO_shift) | (3 << VS_PRIO_shift) |
+ (1 << GS_PRIO_shift) | (0 << ES_PRIO_shift));
+ ereg (SQ_GPR_RESOURCE_MGMT_1, 191 | (63 << NUM_VS_GPRS_shift) |
+ (4 << NUM_CLAUSE_TEMP_GPRS_shift) |
+ (0 << CLAUSE_SEQ_PRIO_shift)); /* fixed number of total regs? */
+ ereg (SQ_THREAD_RESOURCE_MGMT, 95 | (31 << NUM_VS_THREADS_shift));
+ ereg (SQ_STACK_RESOURCE_MGMT_1, 2047 | (2047 << NUM_VS_STACK_ENTRIES_shift));
+ ereg (SQ_VTX_BASE_VTX_LOC, 0); /* ? */
+ ereg (SQ_VTX_START_INST_LOC, 0); /* ? */
+
+ ereg (SQ_PGM_EXPORTS_PS, 3); /* Export 1 color + z */ /* ? */
+ ereg (SQ_VTX_SEMANTIC_CLEAR, 0xffffffff);
+ ereg (SQ_PGM_RESOURCES_FS, 0);
+// ereg (SQ_ESGS_RING_ITEMSIZE, 0);
+// ereg (SQ_GSVS_RING_ITEMSIZE, 0);
+// ereg (SQ_ESTMP_RING_ITEMSIZE, 0);
+// ereg (SQ_GSTMP_RING_ITEMSIZE, 0);
+ ereg (SQ_VSTMP_RING_ITEMSIZE, 0); /* ? */
+ ereg (SQ_PSTMP_RING_ITEMSIZE, 0);
+ ereg (SQ_FBUF_RING_ITEMSIZE, 0);
+ ereg (SQ_REDUC_RING_ITEMSIZE, 0);
+// ereg (SQ_GS_VERT_ITEMSIZE, 0);
+
+
+ /*
+ * SPI (Shader Processor Interpolator)
+ */
+ ereg (SPI_CONFIG_CNTL, 0); /* Prio VS, GS, ES, PS */
+ ereg (SPI_CONFIG_CNTL_1, 0);
+ ereg (SPI_INTERP_CONTROL_0, FLAT_SHADE_ENA_bit); /* doc: turn off? point sprite params? */
+ pack0 (SPI_FOG_CNTL, 3);
+ e32 (0);
+ e32 (0);
+ e32 (0);
+ ereg (SPI_INPUT_Z, 0); /* ? */
+ pack0 (SPI_VS_OUT_ID_0, SPI_VS_OUT_ID_0_num);
+ for (i = 0; i < SPI_VS_OUT_ID_0_num; i++) /* VS output mapping */
+ e32 (0x03020100 + i*0x04040404);
+ pack0 (SPI_PS_INPUT_CNTL_0, SPI_PS_INPUT_CNTL_0_num);
+ for (i = 0; i < SPI_PS_INPUT_CNTL_0_num; i++) /* interpolation settings */
+ e32 ((i /*<< SEMANTIC_shift*/) | (3 << DEFAULT_VAL_shift) | SEL_CENTROID_bit); /* SEL_CENTROID unknown? */
+ ereg (SPI_VS_OUT_CONFIG, 0 << VS_EXPORT_COUNT_shift); /* number of exported vectors -1 */
+ ereg (SPI_PS_IN_CONTROL_0, 1); /* number of parameters to interpolate, generate */
+ ereg (SPI_PS_IN_CONTROL_1, 0);
+
+ /*
+ * SX (Shader Export)
+ */
+ ereg (SX_EXPORT_BUFFER_SIZES, 0x1f | /* ? */ /* err in doc: default 0x3 ?!? */
+ (0x1f << POSITION_BUFFER_SIZE_shift) | (0x1f << SMX_BUFFER_SIZE_shift));
+// ereg (SX_MEMORY_EXPORT_BASE, (vtx_gpu + 512) >> 8); /* ? */
+ ereg (SX_MEMORY_EXPORT_BASE, (adapt->display_gpu) >> 8); /* ? */
+ ereg (SX_MEMORY_EXPORT_SIZE, 1024);
+ ereg (SX_ALPHA_TEST_CONTROL, ALPHA_TEST_BYPASS_bit); /* diff bypass vs. !enable ? */
+ ereg (SX_MISC, 0); /* ? */
+
+ /*
+ * Misc
+ */
+ ereg (GC_USER_SHADER_PIPE_CONFIG, 0); /* ? */
+}
+
+
+static void analyze_program (char *what, uint64_t *p, int plen, int *cf, int *alu, int *fetch)
+{
+ int i;
+ uint32_t *p32 = (uint32_t *) p;
+
+ *alu = *fetch = plen;
+
+ /* searches for end of cf by first CF_FLAG_END_OF_PROGRAM - jumps are evil (read: won't work) */
+ for (i = 0; i < plen; i++) {
+ uint32_t inst = (p[i] & 0x3f80000000000000ULL) >> (23+32), t;
+ if (inst & 0x40) {
+ t = p[i] & 0xffffffff;
+ if (t < *alu)
+ *alu = t;
+ }
+ if (inst == CF_INST_TEX || inst == CF_INST_VTX || inst == CF_INST_VTX_TC) {
+ t = p[i] & 0x003fffff;
+ if (t < *fetch)
+ *fetch = t;
+ if (t < *alu)
+ *alu = t;
+ }
+ if (p[i] & CF_FLAG_END_OF_PROGRAM)
+ break;
+ }
+ assert (p[i] & CF_FLAG_END_OF_PROGRAM);
+
+ *cf = i+1;
+ printf (" %s shader (len main cf %d, alu at %d, fetches at %d, len %d:\n",
+ what, *cf, *alu, *fetch, plen);
+ for (i = 0; i < plen; i++, p32 += 2)
+ printf (" %08x %08x%s", p32[0], p32[1], (i & 3) == 3 ? "\n":"");
+ if ((i & 3) != 0)
+ printf ("\n");
+}
+
+
+void init_programs (adapter_t *adapt, uint64_t *vs, int vslen, int vsgpr, uint64_t *ps, int pslen, int psgpr)
+{
+ int cf, alu, fetch;
+
+ memcpy ((char *)vtx, vs, vslen);
+ flush_cache ((char *)vtx, (char *)vtx + vslen);
+ analyze_program ("Vertex", vs, vslen/8, &cf, &alu, &fetch);
+
+ ereg (SQ_PGM_START_VS, vtx_gpu >> 8); /* Base address vertex shader */
+ ereg (SQ_PGM_CF_OFFSET_VS, 0); /* Base offset CF (control flow) */
+ ereg (SQ_PGM_END_CF_VS, alu); /* End offset CF (ignored by HW?) */
+ ereg (SQ_PGM_END_ALU_VS, fetch); /* End offset ALU (ignored by HW?) */
+ ereg (SQ_PGM_END_FETCH_VS, vslen); /* End offset Fetches (ignored by HW?) */
+ ereg (SQ_PGM_RESOURCES_VS, (vsgpr /* << NUM_GS_GPRS_shift */) | (0 << STACK_SIZE_shift) |
+ PRIME_CACHE_ON_DRAW_bit | FETCH_CACHE_LINES_mask |
+ PRIME_CACHE_ENABLE_bit | PRIME_CACHE_ON_CONST_bit);
+
+ memcpy ((char *)vtx + 4096, ps, pslen);
+ flush_cache ((char *)vtx + 4096, (char *)vtx + 4096 + pslen);
+ analyze_program ("Pixel", ps, pslen/8, &cf, &alu, &fetch);
+
+ ereg (SQ_PGM_START_PS, (vtx_gpu + 4096) >> 8); /* All the same for pixel shader */
+ ereg (SQ_PGM_CF_OFFSET_PS, 0);
+ ereg (SQ_PGM_END_CF_PS, alu);
+ ereg (SQ_PGM_END_ALU_PS, fetch);
+ ereg (SQ_PGM_END_FETCH_PS, pslen);
+ ereg (SQ_PGM_RESOURCES_PS, (psgpr /* << NUM_GS_GPRS_shift */) | (0 << STACK_SIZE_shift) |
+ PRIME_CACHE_ON_DRAW_bit | FETCH_CACHE_LINES_mask |
+ PRIME_CACHE_ENABLE_bit | PRIME_CACHE_ON_CONST_bit |
+ (adapt->chipset == CHIPSET_R600 ? UNCACHED_FIRST_INST_bit : 0));
+
+ printf ("\n");
+}
+
+
+
+void clear_buffers (adapter_t* adapt, int colbuf, int zbuf)
+{
+ /* Choose random of 8 half saturated colors (blue may be saturated) */
+ uint32_t color = random ();
+ color = ((color & 8) << (31-3)) | ((color & 4) << (23-2)) | ((color & 2) << (15-1)) | ((color & 1) << 7) | 0x0000003f;
+ color = 0x400000ff;
+
+ /* Enable clearing of buffers selected, disable writing to others */
+ if (colbuf) {
+ ereg (CB_COLOR0_INFO,
+ (COLOR_8_8_8_8 << CB_COLOR0_INFO__FORMAT_shift) |
+ (ARRAY_LINEAR_ALIGNED << CB_COLOR0_INFO__ARRAY_MODE_shift) |
+ (NUMBER_SRGB << NUMBER_TYPE_shift) |
+ CB_COLOR0_INFO__READ_SIZE_bit | CLEAR_COLOR_bit | SIMPLE_FLOAT_bit); /* clear_color: 0x3f800000 ?!? */
+ ereg (CB_COLOR_CONTROL, DITHER_ENABLE_bit |
+ (0x03 << SPECIAL_OP_shift) |
+ (0x01 << TARGET_BLEND_ENABLE_shift) |
+ (0xcc << ROP3_shift));
+ /* State, shouldn't be changed after clear */
+ pack0 (CB_CLEAR_RED, 4);
+ efloat (1.0); /* ? No freaking clue, set to red to see it happen */
+ efloat (0.0);
+ efloat (0.0);
+ efloat (1.0);
+ } else {
+ ereg (CB_TARGET_MASK, 0);
+ }
+ if (zbuf) {
+ ereg (DB_RENDER_CONTROL, DEPTH_CLEAR_ENABLE_bit | STENCIL_CLEAR_ENABLE_bit);
+// ereg (DB_DEPTH_CONTROL, STENCIL_ENABLE_bit | Z_ENABLE_bit | Z_WRITE_ENABLE_bit | (1 << STENCILFAIL_shift));
+ ereg (DB_DEPTH_CONTROL, Z_WRITE_ENABLE_bit | (1 << STENCILFAIL_shift));
+ /* State, shouldn't be changed after clear */
+ pack0 (DB_DEPTH_CLEAR, 1);
+ ereg (DB_STENCIL_CLEAR, 0 | (((0-7) & 0xff) << MIN_shift));
+ efloat (1.0);
+ } else {
+ ereg (DB_DEPTH_CONTROL, 0);
+ }
+
+ /* TODO: draw quad for clearing */
+
+ /* Cleanup: reset to sane values */
+ ereg (CB_COLOR0_INFO, /* clear_color, blend_clamp, blend_bypass */
+ (COLOR_8_8_8_8 << CB_COLOR0_INFO__FORMAT_shift) |
+ (ARRAY_LINEAR_ALIGNED << CB_COLOR0_INFO__ARRAY_MODE_shift) |
+ (NUMBER_SRGB /* ? */ << NUMBER_TYPE_shift) |
+ CB_COLOR0_INFO__READ_SIZE_bit | SIMPLE_FLOAT_bit); /* clear_color: 0x3f800000 ?!? */
+ ereg (CB_COLOR_CONTROL, DITHER_ENABLE_bit |
+ (0x01 << TARGET_BLEND_ENABLE_shift) |
+ (0xcc << ROP3_shift)); /* special modes, per-mrt-blend */
+ ereg (DB_RENDER_CONTROL, 0);
+ ereg (DB_DEPTH_CONTROL, Z_WRITE_ENABLE_bit);
+ ereg (CB_TARGET_MASK, 0x0f);
+}
+
+
+void test_triangles(adapter_t *adapt)
+{
+ int i;
+ static float vertices[] = { /* format x,y,z,w,r,g,b,a */
+ 0, 0, 0, 1,
+ 1.0, 1.0, 0.0, 1.0,
+ 200, 0, 0, 1,
+ 1.0, 0.0, 1.0, 1.0,
+ 0, 200, 0, 1, /* eol */
+ 0.0, 1.0, 1.0, 1.0,
+ 100, 100, 0, 1,
+ 1.0, 0.0, 0.0, 1.0,
+ 110, 100, 0, 1,
+ 1.0, 0.0, 0.0, 1.0,
+ 100, 110, 0, 1,
+ 1.0, 0.0, 0.0, 1.0,
+ 200, 200, 0, 1,
+ 1.0, 0.0, 0.0, 1.0,
+ 210, 200, 0, 1,
+ 1.0, 0.0, 0.0, 1.0,
+ 200, 210, 0, 1,
+ 1.0, 0.0, 0.0, 1.0,
+ 100, 200, 0, 1,
+ 1.0, 0.0, 0.0, 1.0,
+ 110, 200, 0, 1,
+ 1.0, 0.0, 0.0, 1.0,
+ 100, 210, 0, 1,
+ 1.0, 0.0, 0.0, 1.0
+ } ;
+
+ printf("\nTriangles:\n\n");
+
+ /* Shader upload */
+ init_programs (adapt, trivial_vs, sizeof (trivial_vs), 2, trivial_ps, sizeof (trivial_ps), 1);
+
+ /* Shader constants (cfile) */
+ pack0 (SQ_ALU_CONSTANT0_0, 4); /* consts 0-255.xyzw for PS, 256-511.xyzw for VS */
+ efloat (1.0); /* red ARGB 1/1/0/0 */
+ efloat (1.0);
+ efloat (0.0);
+ efloat (0.0);
+
+ /* Vertex buffer upload */
+ memcpy (((char *)vtx) + 8192, vertices, 3*8*4 * 4); /* overcomitting, in case too much is drawn */
+ flush_cache ((char *)vtx + 8192, (char *)vtx + 8192 + 3*8*4 *4);
+ flush_gpu_input_cache (); /* needed? */
+
+ /* Vertex buffer setup */
+// pack0 (SQ_VTX_CONSTANT_WORD0_0 + 160*7*4, 7);
+ /* FIXME: set all resources until everything works */
+ for (i = SQ_VTX_CONSTANT_WORD0_0; i < 0x3B640; i+=7*4) {
+ pack0 (i, 7);
+ e32 ((vtx_gpu + 8192) & 0xffffffff); /* WORD0 */
+ e32 (3 *8*4); /* WORD1: bytes ? */
+ e32 ((((vtx_gpu + 8192) >> 32) & BASE_ADDRESS_HI_mask) |
+ ((8*4) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
+ (FORMAT_32_32_32_32_FLOAT << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
+ (2 << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
+ SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit); /* WORD2 */
+ e32 (1 << MEM_REQUEST_SIZE_shift); /* WORD3 */
+ e32 (0); /* WORD4 */
+ e32 (0); /* WORD5 */
+ e32 (3 << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift); /* WORD6 */
+ }
+
+ ereg (VGT_PRIMITIVE_TYPE, 4); /* draw (unconnected) triangles */
+ pack3 (IT_DRAW_INDEX_AUTO, 2);
+ e32 (3); /* see VGT_NUM_INDICES */
+ e32 ((2 /*<< SOURCE_SELECT_shift*/)); /* see VGT_DRAW_INITIATOR */
+}
diff --git a/r600_demo.c b/r600_demo.c
new file mode 100644
index 0000000..e035781
--- /dev/null
+++ b/r600_demo.c
@@ -0,0 +1,725 @@
+/*
+ * r600_demo
+ *
+ * Copyright (C) 2008-2009 Matthias Hopf
+ * Copyright (C) 2008-2009 Alexander Deucher
+ *
+ * Based on r300_demo,
+ * Copyright (C) various authors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * r600_demo
+ * Bringup tool for R6xx, R7xx
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <math.h>
+#include <time.h>
+#include <getopt.h>
+
+#include "xf86drm.h"
+#include "radeon_drm.h"
+#include "r600_reg.h"
+#include "r600_lib.h"
+#include "r600_hwapi.h"
+
+
+/* Options */
+int verbose = 0;
+int use_ring_directly = 0;
+int vertex_format = 0;
+int vertex_load_as_int = 0;
+int vertex_unsigned = 0;
+int vertex_alu_scale_override = 0;
+int do_not_flush = 0;
+
+/* DRM context & file descriptor & dma structure */
+int drmFD=-1;
+drm_context_t context;
+drmDMAReq dma;
+
+drmBufMapPtr BufMapPtr=NULL;
+
+volatile void *registers=NULL;
+void *framebuffer=NULL;
+
+int display_width, display_height;
+uint32_t display_gpu; // Should be 64bit, but is currently 32bit address in R6xx, R7xx chips
+
+/* DMA buffers */
+#define DMA_COUNT 3
+#define DMA_SIZE 64000
+enum { INDIRECT = 0, VERTICES, TEXTURE } ;
+
+int indices[DMA_COUNT];
+int sizes[DMA_COUNT];
+
+int indirect_start=0, indirect_end=0, indirect_idx=-1, indirect_size=0, ring_size=0;
+int vtx_idx=-1, tex_idx=-1;
+uint32_t *indirect, *vtx, *tex;
+void *gart, *gartbuf, *garttex, *ring;
+uint64_t gart_gpu, gartbuf_gpu, garttex_gpu, vtx_gpu, tex_gpu, ring_gpu;
+uint32_t garttex_handle;
+
+adapter_t adapter;
+
+
+/* This sets some specifics card pipe functionalities according
+ to card model. */
+
+
+void xf86InstallSIGIOHandler(void)
+{
+ fprintf(stderr,"I should not be called !!!\n");
+ exit(-1);
+}
+
+
+void xf86RemoveSIGIOHandler(void)
+{
+ fprintf(stderr,"I should not be called !!!\n");
+ exit(-1);
+}
+
+
+void print_versions(void)
+{
+ drmVersionPtr v;
+ v=drmGetVersion(drmFD);
+ fprintf(stderr,"driver \"%s\" version %d.%d.%d\n",v->name,v->version_major,v->version_minor,v->version_patchlevel);
+ fprintf(stderr,"driver description \"%s\"\n",v->desc);
+ fprintf(stderr,"AGPVendorID: %04x AGPDeviceId: %04x\n",
+ drmAgpVendorId(drmFD),
+ drmAgpDeviceId(drmFD));
+ fprintf(stderr,"BusID: \"%s\"\n", drmGetBusid(drmFD));
+}
+
+
+void GetMaps(void)
+{
+ int i, r;
+ drm_handle_t offset;
+ drmSize size;
+ drmMapType type;
+ drmMapFlags flags;
+ drm_handle_t handle;
+ int mtrr;
+ char *typename;
+ drm_radeon_getparam_t gp;
+ uint32_t gpu;
+
+ gp.param = RADEON_PARAM_GART_BASE;
+ gp.value = (int *) &gpu;
+ if ( (r = drmCommandWriteRead (drmFD, DRM_RADEON_GETPARAM, &gp, sizeof(gp))) < 0)
+ drmError(r, "drm: RADEON_PARAM_GART_BASE");
+ gart_gpu = gpu;
+ if (verbose >= 2)
+ fprintf (stderr, "gart_base: 0x" PRINTF_UINT64_HEX "\n", gart_gpu);
+ gart = NULL; // unknown how to get this - except for by calculation with gartbuf
+
+ gp.param = RADEON_PARAM_GART_BUFFER_OFFSET;
+ gp.value = (int *) &gpu;
+ if ( (r = drmCommandWriteRead (drmFD, DRM_RADEON_GETPARAM, &gp, sizeof(gp))) < 0)
+ drmError(r, "drm: RADEON_PARAM_GART_BUFFER_OFFSET");
+ gartbuf_gpu = gpu;
+ if (verbose >= 2)
+ fprintf (stderr, "gart_buffer: 0x" PRINTF_UINT64_HEX "\n", gartbuf_gpu);
+
+ gp.param = RADEON_PARAM_GART_TEX_HANDLE;
+ gp.value = (int *) &garttex_handle;
+ if ( (r = drmCommandWriteRead (drmFD, DRM_RADEON_GETPARAM, &gp, sizeof(gp))) < 0)
+ drmError(r, "drm: RADEON_PARAM_GART_TEX_HANDLE");
+ if (verbose >= 2)
+ fprintf (stderr, "gart_tex_handle: 0x%08x (ret %d)\n", garttex_handle, r);
+
+// agp_physical=drmAgpBase(drmFD);
+// fprintf(stderr,"AGP base: 0x%08x\n", (unsigned int)agp_physical);
+
+ if (verbose >= 2)
+ fprintf(stderr,"Map Offset Size Type Handle Mtrr\n");
+
+ for(i=0; !drmGetMap(drmFD, i, &offset, &size, &type, &flags, &handle, &mtrr); i++){
+ switch(type){
+ case DRM_FRAME_BUFFER:
+ typename = "FB";
+ if((r=drmMap(drmFD, offset, size, &framebuffer))<0)
+ drmError(r, "drm: DRM_FRAME_BUFFER");
+ break;
+ case DRM_REGISTERS:
+ typename = "REG";
+ if((r=drmMap(drmFD, offset, size, (drmAddressPtr)&registers))<0)
+ drmError(r, "drm: DRM_REGISTERS");
+ break;
+ case DRM_SHM:
+ typename = "SHM";
+ break;
+ case DRM_AGP:
+ typename = "AGP";
+#if 0
+ if(offset==agp_physical){
+ if((r=drmMap(drmFD, offset, size, (drmAddressPtr)&agp_space))<0)
+ drmError(r, "drm: DRM_AGP");
+ }
+#endif
+ break;
+ case DRM_SCATTER_GATHER:
+ typename = "SG";
+ break;
+ default:
+ typename = "???";
+ break;
+ }
+ if (verbose >= 2)
+ fprintf(stderr,"%2d 0x%08x %8d KB %3.3s 0x%08x ",
+ i, offset, size/1024, typename, handle);
+ if(mtrr < 0 ) {
+ if (verbose >= 2)
+ fprintf(stderr," none");
+ } else {
+ if (verbose >= 2)
+ fprintf(stderr,"%4d", mtrr);
+ }
+ if (handle == garttex_handle) {
+ if (verbose >= 2)
+ fprintf (stderr, " texture heap");
+ if( (r=drmMap (drmFD, offset, size, (drmAddressPtr)&garttex))<0) {
+ drmError(r, "drm: texture heap");
+ garttex_gpu = 0;
+ garttex = NULL;
+ }
+ garttex_gpu = offset;
+ }
+ if (verbose >= 2)
+ fprintf (stderr, "\n");
+ }
+#if 0
+/* On PPC ask the graphics card where it see AGP */
+#if BYTE_ORDER == BIG_ENDIAN
+ agp_physical=(((get_int(RADEON_MC_AGP_LOCATION))& 0x0ffffU) << 16);
+#endif
+#endif
+
+ ring_size = 8 << (reg_read32 (CP_RB_CNTL) & RB_BUFSZ_mask);
+ for(i=0; !drmGetMap(drmFD, i, &offset, &size, &type, &flags, &handle, &mtrr); i++) {
+ /* !!! This is extremely fragile !!! */
+ if (type == DRM_SCATTER_GATHER && size == ring_size + 4096)
+ if((r=drmMap(drmFD, offset, size, &ring))<0) {
+ drmError(r, "drm: ring");
+ ring = NULL;
+ }
+ }
+}
+
+
+void GetBufs(void)
+{
+ int i,r;
+ uint32_t offset, addedoffset = 0;
+
+ dma.context = context;
+ dma.send_count = 0;
+ dma.request_count = DMA_COUNT;
+ dma.request_size = DMA_SIZE;
+ dma.request_list = indices;
+ dma.request_sizes = sizes;
+ dma.flags = DRM_DMA_WAIT;
+ for (i=0; i<2000000; i++) {
+ drmGetLock(drmFD, context, DRM_LOCK_READY);
+ if((r=drmDMA(drmFD, &dma))!=EBUSY){
+ drmUnlock(drmFD,context);
+ if(!r)break;
+
+ drmError(r, __func__);
+ exit(-1);
+ }
+ fprintf(stderr,"drmDMA()=EBUSY, trying again\n");
+ drmUnlock(drmFD,context);
+ }
+ if (verbose >= 2) {
+ fprintf(stderr, "Buffers:\n");
+ for(i=0;i<dma.granted_count;i++){
+ fprintf(stderr, "%3d: index = %d, size = %d\n",
+ i, dma.request_list[i], dma.request_sizes[i]);
+ }
+ }
+
+#ifdef USE_RING_DIRECTLY
+ indirect_size = ring_size;
+ indirect = calloc (indirect_size, 1);
+#else
+ indirect_idx=indices[INDIRECT];
+ indirect_size=sizes[INDIRECT];
+#endif
+ vtx_idx=indices[VERTICES];
+ tex_idx=indices[TEXTURE];
+
+ if (verbose >= 2)
+ fprintf (stderr, "Mapping buffers:\n");
+ BufMapPtr = drmMapBufs(drmFD);
+ if(BufMapPtr == NULL){
+ drmError (r, __func__);
+ fprintf (stderr, "Could not map buffers, aborting\n");
+ exit (-1);
+ }
+ gartbuf = BufMapPtr->list[0].address;
+
+ if (verbose >= 2)
+ fprintf (stderr, " idx size\tused\taddress\n");
+ for (i = 0; i < BufMapPtr->count; i++) {
+ if (verbose >= 2)
+ fprintf (stderr, " %-3d %d\t%d\t%p",
+ BufMapPtr->list[i].idx,
+ BufMapPtr->list[i].total,
+ BufMapPtr->list[i].used,
+ BufMapPtr->list[i].address);
+ if (BufMapPtr->list[i].idx == indirect_idx) {
+ if (verbose >= 2)
+ fprintf (stderr, "\tINDIRECT");
+ indirect = BufMapPtr->list[i].address;
+ indirect_start = BufMapPtr->list[i].used;
+ indirect_end =indirect_start;
+ }
+ if (BufMapPtr->list[i].idx == vtx_idx) {
+ vtx = BufMapPtr->list[i].address;
+ memset (vtx, 0, 32768);
+ vtx_gpu = gartbuf_gpu + addedoffset;
+ if (verbose >= 2)
+ fprintf (stderr, "\tVERTICES cpu %p gpu 0x" PRINTF_UINT64_HEX, vtx, vtx_gpu);
+ }
+ if (BufMapPtr->list[i].idx == tex_idx) {
+ tex = BufMapPtr->list[i].address;
+ memset (tex, 0, 32768);
+ tex_gpu = gartbuf_gpu + addedoffset;
+ if (verbose >= 2)
+ fprintf (stderr, "\tTEXTURE cpu %p gpu 0x" PRINTF_UINT64_HEX, tex, tex_gpu);
+ }
+ if (verbose >= 2)
+ fprintf (stderr,"\n");
+ addedoffset += BufMapPtr->list[i].total;
+ }
+
+ ring_gpu = reg_read32 (CP_RB_BASE) << 8;
+ if (verbose >= 2)
+ fprintf (stderr, "command buffer: cpu %p size %x\nring buffer: cpu %p gpu, 0x"
+ PRINTF_UINT64_HEX "\n",
+ indirect, indirect_size, ring, ring_gpu);
+ if (! ring && use_ring_directly) {
+ fprintf (stderr, "Cannot map ring\n");
+ exit (1);
+ }
+ if (verbose >= 2)
+ fprintf (stderr, "indirect buffer: cpu %p size %x\n", indirect, indirect_size);
+
+ drm_radeon_mem_alloc_t alloc;
+ alloc.region = RADEON_MEM_REGION_GART;
+ alloc.alignment = 4096;
+ alloc.size = 4096;
+ alloc.region_offset = (int *) &offset;
+
+#if 0
+ /* Apparently, all card internal offsets except gart_gpu (BASE) and
+ * gartbuf_gpu (BUFFER_OFFSET) are wrong...
+ * So use command buffers for all data as well. */
+ if ( (r = drmCommandWriteRead (drmFD, DRM_RADEON_ALLOC, &alloc, sizeof(alloc))) < 0)
+ drmError(r, __func__);
+ vtx_gpu = garttex_gpu + offset;
+// vtx_gpu = gart_gpu + offset;
+ vtx = garttex + offset;
+ fprintf (stderr, "alloced: offset 0x%08x, cpu %p, gpu 0x%08llx\n", offset, vtx, vtx_gpu);
+#endif
+}
+
+
+void read_registers(void)
+{
+ uint32_t vendor;
+
+ if(registers==NULL){
+ fprintf(stderr,"No register map found\n");
+ exit(-1);
+ }
+
+ vendor = reg_read32 (VENDOR_DEVICE_ID);
+ adapter.chipID = vendor >> 16;
+ vendor &= 0xffff;
+ fprintf (stderr,"VendorID: %04x DeviceID: %04x\n", vendor, adapter.chipID);
+ if (vendor != 0x1002) {
+ fprintf (stderr,"VendorID is not 0x1002, aborting\n");
+ exit(-1);
+ }
+
+ switch (adapter.chipID) {
+ case 0x9400: case 0x9401: case 0x9402: case 0x9403:
+ case 0x9405: case 0x940A: case 0x940B: case 0x940F:
+ adapter.chipset = CHIPSET_R600;
+ break;
+ case 0x94C0: case 0x94C1: case 0x94C3: case 0x94C4:
+ case 0x94C5: case 0x94C6: case 0x94C7: case 0x94CC:
+ adapter.chipset = CHIPSET_RV610;
+ break;
+ case 0x9500: case 0x9501: case 0x9505: case 0x9507:
+ case 0x9511: case 0x9515: case 0x9517: case 0x9519:
+ adapter.chipset = CHIPSET_RV670;
+ break;
+ case 0x9580: case 0x9586: case 0x9587: case 0x9588:
+ case 0x9589: case 0x958A: case 0x958C: case 0x958D:
+ case 0x958E:
+ adapter.chipset = CHIPSET_RV630;
+ break;
+ case 0x9590: case 0x9596: case 0x9597: case 0x9598:
+ case 0x9599:
+ adapter.chipset = CHIPSET_RV635;
+ break;
+ case 0x95C0: case 0x95C5: case 0x95C7: case 0x95CC:
+ case 0x95CD: case 0x95CE: case 0x95CF:
+ adapter.chipset = CHIPSET_RV620;
+ break;
+ case 0x9440: case 0x9442: case 0x9444: case 0x9446:
+ case 0x944E: case 0x9456:
+ adapter.chipset = CHIPSET_RV770;
+ break;
+ case 0x9540: case 0x9541: case 0x9592: case 0x954E:
+ case 0x954F: case 0x9552: case 0x9553:
+ adapter.chipset = CHIPSET_RV710;
+ break;
+ case 0x9487: case 0x948F: case 0x9490: case 0x9498:
+ case 0x9480: case 0x9488: case 0x949c: case 0x949E:
+ case 0x949F:
+ adapter.chipset = CHIPSET_RV730;
+ break;
+ case 0x94C8: /* RHD_M74 */ case 0x94C9: /* RHD_M72 */
+ case 0x94CB: /* RHD_M72 */ case 0x9504: /* RHD_M88 */
+ case 0x9506: /* RHD_M88 */ case 0x9508: /* RHD_M88 */
+ case 0x9509: /* RHD_M88 */ case 0x9581: /* RHD_M76 */
+ case 0x9583: /* RHD_M76 */ case 0x958B: /* RHD_M76 */
+ case 0x958F: /* RHD_M76 */ case 0x950F: /* RHD_R680 */
+ default:
+ adapter.chipset = CHIPSET_NONE;
+ }
+
+ switch (adapter.chipset) {
+ case CHIPSET_R600:
+ fprintf (stderr, "Chipset: R600\n\n");
+ break;
+ case CHIPSET_RV610:
+ fprintf (stderr, "Chipset: RV610\n\n");
+ break;
+ case CHIPSET_RV620:
+ fprintf (stderr, "Chipset: RV620\n\n");
+ break;
+ case CHIPSET_RV630:
+ fprintf (stderr, "Chipset: RV630\n\n");
+ break;
+ case CHIPSET_RV635:
+ fprintf (stderr, "Chipset: RV635\n\n");
+ break;
+ case CHIPSET_RV670:
+ fprintf (stderr, "Chipset: RV670\n\n");
+ break;
+ case CHIPSET_RV770:
+ fprintf (stderr, "Chipset: RV770\n\n");
+ break;
+ case CHIPSET_RV710:
+ fprintf (stderr, "Chipset: RV710\n\n");
+ break;
+ case CHIPSET_RV730:
+ fprintf (stderr, "Chipset: RV730\n\n");
+ break;
+ default:
+ fprintf (stderr, "Chipset: untested, #%d\n\n", adapter.chipset);
+ }
+
+ display_width = reg_read32 (D1GRPH_PITCH);
+ display_height = reg_read32 (D1GRPH_Y_END);
+ display_gpu = reg_read32 (D1GRPH_PRIMARY_SURFACE_ADDRESS);
+ if (verbose >= 2) {
+ fprintf(stderr,"display_width=%d\n", display_width);
+ fprintf(stderr,"display gpu: 0x%08x\n", display_gpu);
+ }
+
+ if (adapter.chipset < CHIPSET_RV770)
+ adapter.framebuffer_gpu = ((uint64_t) (reg_read32 (R6XX_MC_VM_FB_LOCATION) & 0xffff)) << 24;
+ else
+ adapter.framebuffer_gpu = ((uint64_t) (reg_read32 (R7XX_MC_VM_FB_LOCATION) & 0xffff)) << 24;
+}
+
+void usage (char *argv[]) {
+ printf ("\nUsage: %s [opts] <test(s)> [<reg>=<val>] [<reg>-[-<reg>]] [...]\n"
+ "Options:\n"
+ "-v\tVerbose (multiple to increase)\n"
+ "-r\tDirect ring programming\n"
+ "-f[0-4]\tVertex format 0: float\n"
+ "\t 1: int32 (FMT_32_32_FLOAT) 2: int16 (FMT_16_16_FLOAT)\n"
+ "\t 3: int32 (FMT_32_32) 4: int16 (FMT_16_16)\n"
+ "-i[0-2]\tLoad vertices 0: _SCALED by fetch engine\n"
+ "\t 1: _INT (& scale by ALU if vertex format != float)\n"
+ "\t 2: _NORM (& scale by ALU if vertex format != float)\n"
+ "-u\tVertex format unsigned\n"
+ "-S[0-2]\tVertex alu scale override 0: default 1: alu scale 2: no alu scale\n"
+ "-n\tDon't flush command buffer on tests, just print (implies -v)\n"
+ "\n"
+ "Test is composed of:\n"
+ "\n"
+ ".\tAdditional CP test\n"
+ "p\tExtensive CP tests\n"
+ "\n"
+ "r\t'r'eset GPU + CP\n"
+ "-\tSleep for 100 millisecond\n"
+ "c\tCPU based clear screen\n"
+ "\n"
+ "t\tedited tri test 2d (minimal)\n"
+ "T\tedited tri test 3d (clipping, modelview)\n"
+ "q\ttextured quad test (scaled)\n"
+ "\n"
+ "w\twin tri test\n"
+ "W\twin clear test\n"
+ "b\twin blit test\n"
+ "\n"
+ "x\ttemporary test (various)\n"
+ "\n"
+ "e\tEXA solid test\n"
+ "E\tEXA copy test\n"
+ "\n"
+ "[reg]s are dumped (also ranges) or written to, register addresses in hex\n"
+ "\n",
+ argv[0]);
+ exit (1);
+}
+
+int main(int argc, char *argv[])
+{
+ int i, r;
+
+ fprintf (stderr, "\n*** %s, version %s\n\n", argv[0], VERSION);
+ while ((i = getopt(argc, argv, "vrf:i:uS:n")) != -1) {
+ switch (i) {
+ case 'v':
+ verbose++;
+ break;
+ case 'r':
+ use_ring_directly = 1;
+ break;
+ case 'f':
+ vertex_format = atoi (optarg);
+ if (vertex_format < 0 || vertex_format > 4)
+ usage(argv);
+ break;
+ case 'i':
+ vertex_load_as_int = atoi (optarg);
+ if (vertex_load_as_int < 0 || vertex_load_as_int > 2)
+ usage(argv);
+ break;
+ case 'u':
+ vertex_unsigned = 1;
+ break;
+ case 'S':
+ vertex_alu_scale_override = atoi (optarg);
+ if (vertex_alu_scale_override < 0 || vertex_alu_scale_override > 2)
+ usage(argv);
+ break;
+ case 'n':
+ verbose++;
+ do_not_flush = 1;
+ break;
+ default:
+ usage (argv);
+ }
+ }
+
+ if (optind >= argc)
+ usage (argv);
+
+ drmFD=drmOpen("radeon", NULL);
+ if(drmFD<0){
+ drmError(drmFD, __func__ );
+ fprintf(stderr, "Check that BusId is correct. You can find the correct BusId in /var/log/Xorg.0.log\n");
+ fprintf(stderr, "You can also try setting the environment variable LIBGL_DEBUG to \"verbose\" to see what libdrm is trying to do.\n");
+ exit(-1);
+ }
+ if (verbose >= 1)
+ print_versions();
+ if((r=drmCreateContext(drmFD, &context))){
+ drmError(r, __func__);
+ fprintf(stderr, "Could not create context, aborting\n");
+ exit(-1);
+ }
+ if (verbose >= 2)
+ fprintf(stderr,"Context %d\n",context);
+
+ GetMaps();
+ GetBufs();
+
+ /* setup */
+
+ srandom (time (NULL));
+ read_registers();
+ reg_write32 (SCRATCH_REG7, 0);
+
+ adapter.framebuffer = framebuffer;
+ if (verbose >= 2)
+ fprintf(stderr, "framebuffer cpu %p, gpu 0x" PRINTF_UINT64_HEX "\n",
+ framebuffer, adapter.framebuffer_gpu);
+ adapter.display_gpu = display_gpu;
+ adapter.display = framebuffer + display_gpu - adapter.framebuffer_gpu;
+ adapter.display_pitch = display_width;
+ adapter.display_width = display_width;
+ adapter.display_height = display_height;
+
+ adapter.color_gpu = display_gpu;
+ adapter.color_pitch = display_width;
+ adapter.color_height = 480;
+
+ adapter.depth_gpu = display_gpu + display_width*4*500;
+ adapter.depth_pitch = display_width;
+ adapter.depth_height = 480;
+
+ if (verbose >= 1) {
+ fprintf (stderr, "\nfb: gpu 0x" PRINTF_UINT64_HEX ", cpu %p\n",
+ adapter.framebuffer_gpu, adapter.framebuffer);
+ fprintf (stderr, "display: gpu 0x" PRINTF_UINT64_HEX ", cpu %p (%dx%d) @%d\n",
+ adapter.display_gpu, adapter.display, adapter.display_width, adapter.display_height, adapter.display_pitch);
+ fprintf (stderr, "color RT: gpu 0x" PRINTF_UINT64_HEX " (%dx%d)\n",
+ adapter.color_gpu, adapter.color_pitch, adapter.color_height);
+ fprintf (stderr, "depth: gpu 0x" PRINTF_UINT64_HEX " (%dx%d)\n",
+ adapter.depth_gpu, adapter.depth_pitch, adapter.depth_height);
+ fprintf (stderr, "gart: gpu 0x" PRINTF_UINT64_HEX ", cpu %p\n",
+ gart_gpu, gart);
+ fprintf (stderr, "gart bufs:gpu 0x" PRINTF_UINT64_HEX ", cpu %p\n",
+ gartbuf_gpu, gartbuf);
+ fprintf (stderr, "buf vtx: gpu 0x" PRINTF_UINT64_HEX ", cpu %p\n",
+ vtx_gpu, vtx);
+ fprintf (stderr, "buf tex: gpu 0x" PRINTF_UINT64_HEX ", cpu %p\n",
+ tex_gpu, tex);
+ fprintf (stderr, "ring: gpu 0x" PRINTF_UINT64_HEX ", cpu %p\n",
+ ring_gpu, ring);
+ }
+ if (verbose >= 1)
+ fprintf (stderr, "garttex: gpu 0x" PRINTF_UINT64_HEX ", cpu %p\n", garttex_gpu, garttex);
+
+ if (verbose >= 1)
+ show_state (&adapter);
+
+ if (verbose >= 1)
+ fprintf(stderr,"\nTests:\n\n");
+ system("sync");
+
+ /* play around with indirect buffer here */
+
+ switch (argv[optind][0]) {
+ case 'r':
+ case '\0':
+ break;
+ default:
+ /* CP test only if not resetting or dumping */
+ if (! do_not_flush) {
+ test_cp (&adapter);
+ flush_cmds ();
+ }
+ }
+
+ for (i=0; argv[optind][i]; i++) {
+ switch (argv[optind][i]) {
+ case '.':
+ test_cp (&adapter);
+ break;
+ case 'p':
+ test_packets (&adapter);
+ break;
+ case 'r':
+ soft_reset (&adapter);
+ break;
+ case '-':
+ usleep (100000);
+ break;
+ case 'c':
+ simple_clear (&adapter);
+ break;
+ case 't':
+ tri_test_2d (&adapter);
+ break;
+ case 'T':
+ tri_test_3d (&adapter);
+ break;
+ case 'q':
+ quad_test_tex_scaled (&adapter);
+ break;
+ case 'w':
+ pm4play_tri_test (&adapter);
+ break;
+ case 'W':
+ pm4play_clear_test (&adapter);
+ break;
+ case 'b':
+ pm4play_blit_test (&adapter);
+ break;
+ case 'x':
+ tmp_test (&adapter);
+ break;
+ case 'e':
+ test_solid (&adapter);
+ break;
+ case 'E':
+ test_copy (&adapter);
+ break;
+ default:
+ fprintf (stderr, "***** Don't know '%c' test\n\n", argv[optind][i]);
+ exit (1);
+ }
+ flush_cmds ();
+ if (strcmp (argv[optind], "r") != 0 && ! do_not_flush) {
+ /* Verify that CP is not locked up */
+ test_cp (&adapter);
+ flush_cmds ();
+ }
+ }
+
+ if (argv[optind][0] != '\0')
+ usleep (100000);
+
+ /* If registers are to be dumped or written to, do it */
+ if (argv[optind+1]) {
+ printf ("\n");
+ for (i = optind+1; argv[i]; i++) {
+ unsigned int start, end=0, val;
+ if (sscanf (argv[i], "%x=%x", &start, &val) == 2)
+ write_register (&adapter, start, val); /* reread after writing. */
+ if (sscanf (argv[i], "%x-%x", &start, &end)) {
+ do {
+ dump_register (&adapter, start);
+ start += 4;
+ } while (start <= end);
+ }
+ }
+ }
+
+ show_state (&adapter);
+
+ /* free stuff */
+
+ drmFreeBufs(drmFD,dma.granted_count,indices);
+
+ drmClose(drmFD);
+ return 0;
+}
diff --git a/r600_emit.h b/r600_emit.h
new file mode 100644
index 0000000..14d6d33
--- /dev/null
+++ b/r600_emit.h
@@ -0,0 +1,217 @@
+/*
+ * RadeonHD R6xx, R7xx DRI driver
+ *
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef _R600_EMIT_H
+#define _R600_EMIT_H
+
+/* This contains various usability inline functions + macros.
+ * All based on E32(), CMD_BUFFER_PREAMBLE(), CMD_BUFFER_DEBUG_AVAIL(),
+ * which have to be defined externaly: */
+
+#include "r600_hwapi.h"
+
+
+/* R6xx knows 2 ways to set registers: Packet0, and a set of Packet3 commands,
+ * that set certain parts of the register file.
+ * Set this to 1 if you want to use the various Packet3 commands. */
+//#define USE_SET_CPCMDS 1
+
+
+#define CP_PACKET0(reg, n) (RADEON_CP_PACKET0 | (((n)-1)<<16) | ((reg)>>2))
+#define CP_PACKET3(cmd, n) (RADEON_CP_PACKET3 | (((n)-1)<<16) | ((cmd)<<8))
+
+
+/*
+ * Emit macros
+ */
+
+/* Emit uint32_t */
+/* #define E32(x) already defined outside this headerfile */
+
+
+/* Emit float value */
+#define EFLOAT(x) \
+ do { \
+ union { float f; uint32_t d; } _u; \
+ _u.f=(x); \
+ E32(_u.d); \
+ } while (0)
+
+
+/* Emit list-of-register writes */
+#ifdef USE_SET_CPCMDS
+# define PACK0(reg, num) pack0 (reg, num)
+#else
+# define PACK0(reg, num) \
+ do { \
+ CMD_BUFFER_DEBUG_AVAIL (num+1); \
+ DEBUG (if (num > 0x4000) { \
+ fprintf (stderr, "Cannot emit packet0 0x%02x with %d dwords\n", reg, num); \
+ exit (1); } \
+ ); \
+ E32 (CP_PACKET0 (reg, num)); \
+ } while (0)
+#endif
+
+
+/* Emit Packet3 commands */
+#define PACK3(cmd, num) \
+ do { \
+ CMD_BUFFER_DEBUG_AVAIL (num+1); \
+ DEBUG (if (num > 0x4000) { \
+ fprintf (stderr, "Cannot emit packet3 0x%02x with %d dwords\n", cmd, num); \
+ exit (1); } \
+ ); \
+ E32 (CP_PACKET3 (cmd, num)); \
+ } while (0);
+
+
+/* write a single register, uint32_t */
+#define EREG(reg, val) \
+ do { \
+ CMD_BUFFER_DEBUG_AVAIL (2); \
+ PACK0 (reg, 1); \
+ E32 (val); \
+ } while (0)
+
+/* write a single register, float */
+#define EREGFLOAT(reg, val) \
+ do { \
+ CMD_BUFFER_DEBUG_AVAIL (2); \
+ PACK0 (reg, 1); \
+ EFLOAT (val); \
+ } while (0)
+
+
+/*
+ * Support macros for emitting standard command sequences
+ */
+
+#define EMIT_WAIT_3D_IDLE() \
+ EREG (WAIT_UNTIL, WAIT_3D_IDLE_bit);
+#define ELEN_WAIT_3D_IDLE 2
+
+#define EMIT_WAIT_3D_IDLE_CLEAN() \
+ PACK3 (IT_EVENT_WRITE, 1); /* flush caches, no timestamp */ \
+ E32 (CACHE_FLUSH_AND_INV_EVENT); \
+ EREG (WAIT_UNTIL, WAIT_3D_IDLECLEAN_bit);
+#define ELEN_WAIT_3D_IDLE_CLEAN 4
+
+
+/*
+ * Function versions (old style, to be nuked)
+ */
+
+void static inline e32 (uint32_t dword)
+{
+ CMD_BUFFER_NOCHECK_PREAMBLE;
+ E32 (dword);
+}
+
+void static inline efloat (float f)
+{
+ union {
+ float f;
+ uint32_t d;
+ } _u;
+ CMD_BUFFER_NOCHECK_PREAMBLE;
+ _u.f = f;
+ E32 (_u.d);
+}
+
+static void inline pack3 (int cmd, unsigned num)
+{
+ CMD_BUFFER_PREAMBLE (num+1);
+ if (num > 0x4000) {
+ fprintf (stderr, "Cannot emit packet3 0x%02x with %d dwords\n", cmd, num);
+ exit (1);
+ }
+ E32 (CP_PACKET3 (cmd, num));
+}
+
+/* write num registers, start at reg */
+/* If register falls in a special area, special commands are issued */
+void static inline pack0 (uint32_t reg, int num)
+{
+#ifdef USE_SET_CPCMDS
+ CMD_BUFFER_PREAMBLE (num+2);
+ DEBUG (if(num > 0x3fff) {
+ fprintf (stderr, "Cannot emit packet0 emulation 0x%04x with %d dwords\n", reg, num);
+ exit (1); } );
+ if (reg >= SET_CONFIG_REG_offset && reg < SET_CONFIG_REG_end) {
+ PACK3 (IT_SET_CONFIG_REG, num+1);
+ E32 ((reg-SET_CONFIG_REG_offset) >> 2);
+ } else if (reg >= SET_CONTEXT_REG_offset && reg < SET_CONTEXT_REG_end) {
+ PACK3 (IT_SET_CONTEXT_REG, num+1);
+ E32 ((reg-0x28000) >> 2);
+ } else if (reg >= SET_ALU_CONST_offset && reg < SET_ALU_CONST_end) {
+ PACK3 (IT_SET_ALU_CONST, num+1);
+ E32 ((reg-SET_ALU_CONST_offset) >> 2);
+ } else if (reg >= SET_RESOURCE_offset && reg < SET_RESOURCE_end) {
+ PACK3 (IT_SET_RESOURCE, num+1);
+ E32 ((reg-SET_RESOURCE_offset) >> 2);
+ } else if (reg >= SET_SAMPLER_offset && reg < SET_SAMPLER_end) {
+ PACK3 (IT_SET_SAMPLER, num+1);
+ E32 ((reg-SET_SAMPLER_offset) >> 2);
+ } else if (reg >= SET_CTL_CONST_offset && reg < SET_CTL_CONST_end) {
+ PACK3 (IT_SET_CTL_CONST, num+1);
+ E32 ((reg-SET_CTL_CONST_offset) >> 2);
+ } else if (reg >= SET_LOOP_CONST_offset && reg < SET_LOOP_CONST_end) {
+ PACK3 (IT_SET_LOOP_CONST, num+1);
+ E32 ((reg-SET_LOOP_CONST_offset) >> 2);
+ } else if (reg >= SET_BOOL_CONST_offset && reg < SET_BOOL_CONST_end) {
+ PACK3 (IT_SET_BOOL_CONST, num+1);
+ E32 ((reg-SET_BOOL_CONST_offset) >> 2);
+ } else {
+ E32 (CP_PACKET0 (reg, num));
+ }
+#else
+ CMD_BUFFER_PREAMBLE (num+1);
+ DEBUG (if(num > 0x4000) {
+ fprintf (stderr, "Cannot emit packet0 0x%04x with %d dwords\n", reg, num);
+ exit (1); } );
+ E32 (CP_PACKET0 (reg, num));
+#endif
+}
+
+void static inline ereg (uint32_t reg, uint32_t val)
+{
+ pack0 (reg, 1);
+ e32 (val);
+}
+
+static inline void flush_cache (void *start, void *end)
+{
+ volatile uint32_t *p;
+ /* Read back each cache line from main memory to ensure write combined data is flushed */
+ for (p = start; (void *) p < end; p += 16) /* a cache line is at least 64 bytes for all modern cpus */
+ (void) *p;
+}
+
+static inline void flush_cache_indirect (void)
+{
+ flush_cache (indirect, (uint32_t *) indirect + indirect_end);
+}
+
+#endif /* _R600_EMIT_H */
diff --git a/r600_exa.c b/r600_exa.c
new file mode 100644
index 0000000..16bf714
--- /dev/null
+++ b/r600_exa.c
@@ -0,0 +1,943 @@
+/*
+ * EXA acceleration functions
+ *
+ * Copyright (C) 2008-2009 Alexander Deucher
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "r600_reg.h"
+#include "r600_emit.h"
+#include "r600_lib.h"
+#include "r600_state.h"
+#include "r600_init.h"
+#include "r600_shader.h"
+
+struct r6xx_solid_vertex {
+ int32_t x;
+ int32_t y;
+ uint32_t color;
+};
+
+struct r6xx_copy_vertex {
+ int32_t x;
+ int32_t y;
+ int32_t s;
+ int32_t t;
+};
+
+static void
+R600PrepareSolid(adapter_t *adapt, int alu, uint32_t pm, uint32_t fg)
+{
+ cb_config_t cb_conf;
+ shader_config_t vs_conf, ps_conf;
+ uint64_t vs_addr, ps_addr;
+ int i = 0;
+ uint32_t vs[20];
+ uint32_t ps[2];
+ uint32_t blendcntl;
+
+ //0
+ vs[i++] = CF_DWORD0(ADDR(6));
+ vs[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ COUNT(2),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_VTX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ //1
+ vs[i++] = CF_ALU_DWORD0(ADDR(4),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(0));
+ vs[i++] = CF_ALU_DWORD1(KCACHE_MODE1(0),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ COUNT(2),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ //2
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ //3
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(2),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ //4
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1), /* ALU inst 0 */
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INT_TO_FLT),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ //5
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1), /* ALU inst 1 */
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INT_TO_FLT),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ //6/7
+ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(12));
+ vs[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32), //xxx
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_INT), //xxx
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ vs[i++] = VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1));
+ vs[i++] = VTX_DWORD_PAD;
+ //8/9
+ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(4));
+ vs[i++] = VTX_DWORD1_GPR(DST_GPR(2),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_8_8_8_8),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_UNSIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ vs[i++] = VTX_DWORD2(OFFSET(8),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0));
+ vs[i++] = VTX_DWORD_PAD;
+
+ i = 0;
+ ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+ ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ CLEAR (cb_conf);
+ CLEAR (vs_conf);
+ CLEAR (ps_conf);
+
+ if (verbose) {
+ dump_shader (adapt, vs, sizeof(vs), "vertex");
+ dump_shader (adapt, ps, sizeof(ps), "pixel");
+ printf ("\n");
+ }
+
+ /* Init */
+ start_3d(adapt);
+
+ cp_set_surface_sync();
+
+ set_default_state(adapt);
+
+ /* Scissor / viewport */
+ ereg (PA_CL_VTE_CNTL, VTX_XY_FMT_bit);
+ ereg (PA_CL_CLIP_CNTL, CLIP_DISABLE_bit);
+
+ // fixme use gart buffer directly
+ vs_addr = upload (adapt, vs, sizeof(vs), 0);
+ ps_addr = upload (adapt, ps, sizeof(ps), 4096);
+
+ /* Shader */
+ vs_conf.shader_addr = vs_addr;
+ vs_conf.num_gprs = 4;
+ vs_conf.stack_size = 1;
+ vs_setup (adapt, &vs_conf);
+
+ ps_conf.shader_addr = ps_addr;
+ ps_conf.num_gprs = 1;
+ ps_conf.stack_size = 0;
+ ps_conf.uncached_first_inst = 1;
+ ps_conf.clamp_consts = 1;
+ ps_conf.export_mode = 2;
+ ps_setup (adapt, &ps_conf);
+
+ /* Render setup */
+ // XXX fix me planemask
+ ereg (CB_SHADER_MASK, (0x0f << OUTPUT0_ENABLE_shift));
+ ereg (R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit));
+ /* Clear */
+ //blendcntl = (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift);
+ /* Src */
+ //blendcntl = (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift);
+ /* Dst */
+ //blendcntl = (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift);
+ /* Over */
+ blendcntl = (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift);
+ /* OverReverse */
+ //blendcntl = (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift);
+ /* In */
+ //blendcntl = (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift);
+ /* InReverse */
+ //blendcntl = (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift);
+ /* Out */
+ //blendcntl = (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift);
+ /* OutReverse */
+ //blendcntl = (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift);
+ /* Atop */
+ //blendcntl = (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift);
+ /* AtopReverse */
+ //blendcntl = (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift);
+ /* Xor */
+ //blendcntl = (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift);
+ /* Add */
+ //blendcntl = (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift);
+
+ // XXX fix me rops
+ if (adapt->chipset == CHIPSET_R600) {
+ ereg (CB_COLOR_CONTROL, ((0xcc << ROP3_shift) | /* copy */
+ (1 << TARGET_BLEND_ENABLE_shift)));
+ ereg (CB_BLEND_CONTROL, blendcntl);
+ } else {
+ ereg (CB_COLOR_CONTROL, ((0xcc << ROP3_shift) | /* copy */
+ (1 << TARGET_BLEND_ENABLE_shift) |
+ PER_MRT_BLEND_bit));
+ ereg (CB_BLEND0_CONTROL, blendcntl);
+ }
+
+ cb_conf.id = 0;
+ cb_conf.w = adapt->color_pitch;
+ cb_conf.h = adapt->color_height;
+ cb_conf.base = adapt->color_gpu;
+ // XXX fix me depth 16
+ cb_conf.format = COLOR_8_8_8_8;
+ cb_conf.comp_swap = 0;
+ cb_conf.source_format = 1;
+ cb_conf.blend_clamp = 1;
+ set_render_target(adapt, &cb_conf);
+
+ ereg (PA_SU_SC_MODE_CNTL, (FACE_bit |
+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) |
+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));
+ ereg (DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */
+ DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
+
+ /* Interpolator setup */
+ /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
+ * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
+ ereg (SPI_PS_IN_CONTROL_0, (((2 - 1) << NUM_INTERP_shift)));
+ ereg (SPI_PS_IN_CONTROL_1, 0);
+ ereg (SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) |
+ (0x03 << DEFAULT_VAL_shift) |
+ FLAT_SHADE_bit |
+ SEL_CENTROID_bit));
+ ereg (SPI_INTERP_CONTROL_0, /* FLAT_SHADE_ENA_bit | */ 0);
+
+}
+
+static void
+R600Solid(adapter_t *adapt, int x1, int y1, int x2, int y2, uint32_t fg,
+ struct r6xx_solid_vertex *vb, int *index)
+{
+ struct r6xx_solid_vertex vertex[3];
+
+ vertex[0].x = x1;
+ vertex[0].y = y1;
+ vertex[0].color = fg;
+
+ vertex[1].x = x1;
+ vertex[1].y = y2;
+ vertex[1].color = fg;
+
+ vertex[2].x = x2;
+ vertex[2].y = y2;
+ vertex[2].color = fg;
+
+ // append to vertex buffer
+ vb[(*index)++] = vertex[0];
+ vb[(*index)++] = vertex[1];
+ vb[(*index)++] = vertex[2];
+}
+
+static void
+R600DoneSolid(adapter_t *adapt, struct r6xx_solid_vertex *vb, int *index)
+{
+ uint64_t vb_addr;
+ draw_config_t draw_conf;
+ vtx_resource_t vtx_res;
+
+
+ CLEAR (draw_conf);
+ CLEAR (vtx_res);
+
+ // don't draw if vb is empty
+ if (*index == 0)
+ return;
+
+ // fixme use gart buffer directly
+ vb_addr = upload (adapt, vb, (*index) * 12, 8192);
+
+ /* Vertex buffer setup */
+ vtx_res.id = SQ_VTX_RESOURCE_vs;
+ vtx_res.vtx_size_dw = 12 / 4;
+ vtx_res.vtx_num_entries = (*index) * 12 / 4;
+ vtx_res.mem_req_size = 1;
+ vtx_res.vb_addr = vb_addr;
+ set_vtx_resource (adapt, &vtx_res);
+
+ /* Draw */
+ draw_conf.prim_type = DI_PT_RECTLIST;
+ draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
+ draw_conf.num_instances = 1;
+ draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
+ draw_conf.index_type = DI_INDEX_SIZE_16_BIT;
+
+ ereg (VGT_INSTANCE_STEP_RATE_0, 0); /* ? */
+ ereg (VGT_INSTANCE_STEP_RATE_1, 0);
+
+ ereg (VGT_MAX_VTX_INDX, draw_conf.num_indices);
+ ereg (VGT_MIN_VTX_INDX, 0);
+ ereg (VGT_INDX_OFFSET, 0);
+
+ draw_auto(adapt, &draw_conf);
+
+ wait_3d_idle_clean();
+
+}
+
+void
+test_solid(adapter_t *adapt)
+{
+ struct r6xx_solid_vertex vb[256];
+ int alu = 3;
+ uint32_t pm = 0xffffffff;
+ uint32_t fg = 0x80ff0000;
+ int index = 0;
+
+ R600PrepareSolid(adapt, alu, pm, fg);
+
+ // build vertex buffer
+ R600Solid(adapt, 0, 0, 200, 200, fg, vb, &index);
+ R600Solid(adapt, 500, 500, 700, 700, fg, vb, &index);
+ R600Solid(adapt, 0, 300, 300, 400, fg, vb, &index);
+
+ R600DoneSolid(adapt, vb, &index);
+
+}
+
+static void
+R600PrepareCopy(adapter_t *adapt,
+ int xdir, int ydir,
+ int rop,
+ uint32_t planemask)
+{
+ int i = 0;
+ uint32_t vs[24];
+ uint32_t ps[8];
+
+ //0
+ vs[i++] = CF_DWORD0(ADDR(8));
+ vs[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ COUNT(2),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_VTX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ //1
+ vs[i++] = CF_ALU_DWORD0(ADDR(4),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(0));
+ vs[i++] = CF_ALU_DWORD1(KCACHE_MODE1(0),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ COUNT(4),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ //2
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ //3
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ //4
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1), /* ALU inst 0 */
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INT_TO_FLT),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ //5
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1), /* ALU inst 1 */
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INT_TO_FLT),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ //6
+ vs[i++] = ALU_DWORD0(SRC0_SEL(0), /* ALU inst 0 */
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INT_TO_FLT),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ //7
+ vs[i++] = ALU_DWORD0(SRC0_SEL(0), /* ALU inst 1 */
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INT_TO_FLT),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ //8/9
+ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(16));
+ vs[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32), //xxx
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_INT), //xxx
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ vs[i++] = VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1));
+ vs[i++] = VTX_DWORD_PAD;
+ //10/11
+ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ vs[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32), //xxx
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_INT), //xxx
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ vs[i++] = VTX_DWORD2(OFFSET(8),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0));
+ vs[i++] = VTX_DWORD_PAD;
+
+ i = 0;
+
+ // CF INST 0
+ ps[i++] = CF_DWORD0(ADDR(2));
+ ps[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ COUNT(1),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TEX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ // CF INST 1
+ ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+ ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ // TEX INST 0
+ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ BC_FRAC_MODE(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ R7xx_ALT_CONST(0));
+ ps[i++] = TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_UNNORMALIZED),
+ COORD_TYPE_Y(TEX_UNNORMALIZED),
+ COORD_TYPE_Z(TEX_UNNORMALIZED),
+ COORD_TYPE_W(TEX_UNNORMALIZED));
+ ps[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+
+ cb_config_t cb_conf;
+ tex_resource_t tex_res;
+ tex_sampler_t tex_samp;
+ shader_config_t vs_conf, ps_conf;
+ uint64_t vs_addr, ps_addr;
+
+
+ CLEAR (cb_conf);
+ CLEAR (tex_res);
+ CLEAR (tex_samp);
+ CLEAR (vs_conf);
+ CLEAR (ps_conf);
+
+ if (verbose) {
+ dump_shader (adapt, vs, sizeof(vs), "vertex");
+ dump_shader (adapt, ps, sizeof(ps), "pixel");
+ printf ("\n");
+ }
+
+ /* Init */
+ start_3d(adapt);
+
+ cp_set_surface_sync();
+
+ set_default_state(adapt);
+
+ /* Scissor / viewport */
+ ereg (PA_CL_VTE_CNTL, VTX_XY_FMT_bit);
+ ereg (PA_CL_CLIP_CNTL, CLIP_DISABLE_bit);
+
+ // fixme use gart buffer directly
+ vs_addr = upload (adapt, vs, sizeof(vs), 0);
+ ps_addr = upload (adapt, ps, sizeof(ps), 4096);
+
+ /* Shader */
+ vs_conf.shader_addr = vs_addr;
+ vs_conf.num_gprs = 4;
+ vs_conf.stack_size = 1;
+ vs_setup (adapt, &vs_conf);
+
+ ps_conf.shader_addr = ps_addr;
+ ps_conf.num_gprs = 1;
+ ps_conf.stack_size = 0;
+ ps_conf.uncached_first_inst = 1;
+ ps_conf.clamp_consts = 1;
+ ps_conf.export_mode = 2;
+ ps_setup (adapt, &ps_conf);
+
+
+ /* Texture */
+ tex_res.id = 0;
+ tex_res.w = adapt->color_pitch;
+ tex_res.h = adapt->color_height;
+ tex_res.pitch = adapt->color_pitch; //xxx
+ tex_res.depth = 0;
+ tex_res.dim = 1; //2D
+ tex_res.base = adapt->color_gpu;
+ tex_res.mip_base = adapt->color_gpu;
+ // fix me depth 16, etc.
+ tex_res.format = FMT_8_8_8_8;
+ tex_res.request_size = 2;
+ tex_res.dst_sel_x = 0;
+ tex_res.dst_sel_y = 1;
+ tex_res.dst_sel_z = 2;
+ tex_res.dst_sel_w = 3;
+ tex_res.base_level = 1;
+ tex_res.last_level = 0;
+ tex_res.perf_modulation = 1;
+ set_tex_resource (adapt, &tex_res);
+
+ tex_samp.id = 0;
+ tex_samp.clamp_x = 2;
+ tex_samp.clamp_y = 2;
+ tex_samp.clamp_z = 0;
+ tex_samp.xy_mag_filter = 0; /* 0: point 1:bilinear 2:bicubic */
+ tex_samp.xy_min_filter = 0; /* 0: point 1:bilinear 2:bicubic */
+ tex_samp.z_filter = 0; /* 0: none 1: point 2: linear */
+ tex_samp.mip_filter = 0; /* no mipmap */
+ set_tex_sampler (adapt, &tex_samp);
+
+
+ /* Render setup */
+ //fixme planemask
+ ereg (CB_SHADER_MASK, (0x0f << OUTPUT0_ENABLE_shift));
+ ereg (R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit));
+ // fixme rops
+ ereg (CB_COLOR_CONTROL, (0xcc << ROP3_shift)); /* copy */
+
+ cb_conf.id = 0;
+ cb_conf.w = adapt->color_pitch;
+ cb_conf.h = adapt->color_height;
+ cb_conf.base = adapt->color_gpu;
+ // fix me depth 16, etc.
+ cb_conf.format = COLOR_8_8_8_8;
+ cb_conf.comp_swap = 0;
+ cb_conf.source_format = 1;
+ cb_conf.blend_clamp = 1;
+ set_render_target(adapt, &cb_conf);
+
+ ereg (PA_SU_SC_MODE_CNTL, (FACE_bit |
+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) |
+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));
+ ereg (DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */
+ DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
+
+ /* Interpolator setup */
+ /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
+ * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
+ ereg (SPI_PS_IN_CONTROL_0, (((2 - 1) << NUM_INTERP_shift)));
+ ereg (SPI_PS_IN_CONTROL_1, 0);
+ ereg (SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) |
+ (0x03 << DEFAULT_VAL_shift) |
+ //FLAT_SHADE_bit |
+ SEL_CENTROID_bit));
+ ereg (SPI_INTERP_CONTROL_0, /* FLAT_SHADE_ENA_bit | */ 0);
+
+}
+
+static void
+R600Copy(adapter_t *adapt,
+ int srcX, int srcY,
+ int dstX, int dstY,
+ int w, int h,
+ struct r6xx_copy_vertex *vb, int *index)
+{
+ struct r6xx_copy_vertex vertex[3];
+
+ vertex[0].x = dstX;
+ vertex[0].y = dstY;
+ vertex[0].s = srcX;
+ vertex[0].t = srcY;
+
+ vertex[1].x = dstX;
+ vertex[1].y = dstY + h;
+ vertex[1].s = srcX;
+ vertex[1].t = srcY + h;
+
+ vertex[2].x = dstX + w;
+ vertex[2].y = dstY + h;
+ vertex[2].s = srcX + w;
+ vertex[2].t = srcY + h;
+
+ // append to vertex buffer
+ vb[(*index)++] = vertex[0];
+ vb[(*index)++] = vertex[1];
+ vb[(*index)++] = vertex[2];
+
+}
+
+static void
+R600DoneCopy(adapter_t *adapt, struct r6xx_copy_vertex *vb, int *index)
+{
+ draw_config_t draw_conf;
+ vtx_resource_t vtx_res;
+ uint64_t vb_addr;
+
+ CLEAR (draw_conf);
+ CLEAR (vtx_res);
+
+ // don't draw if vb is empty
+ if (*index == 0)
+ return;
+
+ // fixme use gart buffer directly
+ vb_addr = upload (adapt, vb, (*index) * 16, 8192);
+
+ /* Vertex buffer setup */
+ vtx_res.id = SQ_VTX_RESOURCE_vs;
+ vtx_res.vtx_size_dw = 16 / 4;
+ vtx_res.vtx_num_entries = (*index) * 16 / 4;
+ vtx_res.mem_req_size = 1;
+ vtx_res.vb_addr = vb_addr;
+ set_vtx_resource (adapt, &vtx_res);
+
+ draw_conf.prim_type = DI_PT_RECTLIST;
+ draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
+ draw_conf.num_instances = 1;
+ draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
+ draw_conf.index_type = DI_INDEX_SIZE_16_BIT;
+
+ ereg (VGT_INSTANCE_STEP_RATE_0, 0); /* ? */
+ ereg (VGT_INSTANCE_STEP_RATE_1, 0);
+
+ ereg (VGT_MAX_VTX_INDX, draw_conf.num_indices);
+ ereg (VGT_MIN_VTX_INDX, 0);
+ ereg (VGT_INDX_OFFSET, 0);
+
+ draw_auto(adapt, &draw_conf);
+
+ wait_3d_idle_clean();
+
+}
+
+void
+test_copy(adapter_t *adapt)
+{
+ struct r6xx_copy_vertex vb[256];
+ int rop = 3;
+ uint32_t pm = 0xffffffff;
+ int index = 0;
+ int xdir = 0;
+ int ydir = 0;
+
+ R600PrepareCopy(adapt, xdir, ydir, rop, pm);
+
+ // build vertex buffer
+ R600Copy(adapt, 0, 0, 500, 500, 200, 200, vb, &index);
+ R600Copy(adapt, 0, 0, 500, 0, 200, 200, vb, &index);
+ R600Copy(adapt, 0, 0, 0, 500, 200, 200, vb, &index);
+
+ R600DoneCopy(adapt, vb, &index);
+
+}
+
diff --git a/r600_hwapi.h b/r600_hwapi.h
new file mode 100644
index 0000000..6fa713b
--- /dev/null
+++ b/r600_hwapi.h
@@ -0,0 +1,131 @@
+/*
+ * r600_demo
+ *
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __R600_HWAPI_H__
+#define __R600_HWAPI_H__
+
+/*
+ * This defines hardware api related stuff
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <endian.h>
+
+
+#if BYTE_ORDER == BIG_ENDIAN
+# define SWAP16(A) ((((unsigned short)(A) & 0xff00) >> 8) | \
+ (((unsigned short)(A) & 0x00ff) << 8))
+# define SWAP32(A) ((((CARD32)(A) & 0xff000000) >> 24) | \
+ (((CARD32)(A) & 0x00ff0000) >> 8) | \
+ (((CARD32)(A) & 0x0000ff00) << 8) | \
+ (((CARD32)(A) & 0x000000ff) << 24))
+#else
+# define SWAP16(A) (A)
+# define SWAP32(A) (A)
+#endif
+
+
+#define DEBUG(x) do { x } while (0) /* Always, on r600_demo */
+
+/*
+ * Internal stuff needed for API
+ */
+
+extern volatile void *registers;
+extern int indirect_end, indirect_size;
+extern uint32_t *indirect;
+
+
+void flush_cmds (void);
+
+
+/*
+ * Register access
+ */
+static inline uint32_t reg_read32 (uint32_t reg)
+{
+ uint32_t v;
+ v = * (volatile uint32_t *) (registers+reg);
+ return SWAP32 (v);
+}
+
+static inline void reg_write32 (uint32_t reg, uint32_t v)
+{
+ * (volatile uint32_t *) (registers+reg) = SWAP32 (v);
+}
+
+
+/*
+ * Packet Macros + Routines
+ */
+
+/* Preamble. Allocate space for dwords uint32_t values. If not enough space
+ * is available, bail out.
+ * In a production driver this can even be a NOP. Real buffer allocation has
+ * to be done beforehand. */
+/* For r600demo this is trivial. */
+#define CMD_BUFFER_PREAMBLE(dwords) \
+ checkCmdBufferSpace (dwords)
+
+
+/* Preamble, without space checking. Use with care. */
+/* For r600demo this is trivial. */
+#define CMD_BUFFER_NOCHECK_PREAMBLE \
+ ((void) 0)
+
+
+/* Check for additional space for dwords uint32_t values. If not enough space
+ * is available, bail out. */
+/* For r600demo this is trivial. */
+#define CMD_BUFFER_ALLOC(dwords) \
+ checkCmdBufferSpace (dwords)
+
+
+/* Space checker for debugging purposes. Bail out if not enough space. */
+/* Always enabled in r600demo. */
+#define CMD_BUFFER_DEBUG_AVAIL(dwords) \
+ checkCmdBufferSpace (dwords)
+
+
+/* Emit a single uint32_t value */
+#define E32(dword) \
+ do { \
+ CMD_BUFFER_DEBUG_AVAIL (1); \
+ indirect[indirect_end>>2] = (dword); \
+ indirect_end += 4; \
+ } while (0)
+
+
+static inline void checkCmdBufferSpace (int dwords)
+{
+ if (indirect_end + 4*dwords >= indirect_size) {
+ fprintf (stderr, "Insufficient indirect buffer size (have %d, need %d) - aborting\n",
+ dwords, (indirect_size-indirect_end)/4);
+ exit (1);
+ }
+}
+
+
+#endif
diff --git a/r600_init.c b/r600_init.c
new file mode 100644
index 0000000..cf3ab05
--- /dev/null
+++ b/r600_init.c
@@ -0,0 +1,1024 @@
+/*
+ * RadeonHD R6xx, R7xx DRI driver
+ *
+ * Copyright (C) 2008-2009 Alexander Deucher
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Hardware setup
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "r600_reg.h"
+#include "r600_emit.h"
+#include "r600_lib.h"
+#include "r600_state.h"
+
+
+/* Define to clear all constants, samplers, etc.
+ * Not necessary for working conditions, but helps debugging. */
+#define CLEAN_SETUP 1
+
+/* Define to use SQ_TEX_SAMPLER_WORD instead of SQ_TEX_SAMPLER_*
+ * during texture sampler setup. Shouldn't change anything, except
+ * for _WORD needing less register accesses. */
+#define USE_TEX_SAMPLER_WORD 1
+
+/*
+ * Helpers
+ */
+static inline int count_bits (uint32_t val)
+{
+ int i, ret = 0;
+ for (i = 0; i < 32; i++) {
+ ret += val & 1;
+ val >>= 1;
+ }
+ return ret;
+}
+
+/*
+ * Initializations cleanups
+ */
+
+void reset_cb()
+{
+ int i;
+ CMD_BUFFER_PREAMBLE (8+1);
+
+ PACK0 (CB_COLOR0_INFO, 8);
+ for (i = 0; i < 8; i++)
+ E32 (0);
+}
+
+void reset_td_samplers()
+{
+ int i;
+ CMD_BUFFER_PREAMBLE (2 * (4*TD_PS_SAMPLER0_BORDER_RED_num+1) + 2 * ELEN_WAIT_3D_IDLE);
+
+ EMIT_WAIT_3D_IDLE ();
+ PACK0 (TD_PS_SAMPLER0_BORDER_RED, 4*TD_PS_SAMPLER0_BORDER_RED_num);
+ for (i = 0; i < 4*TD_PS_SAMPLER0_BORDER_RED_num; i++)
+ E32 (0);
+ PACK0 (TD_VS_SAMPLER0_BORDER_RED, 4*TD_VS_SAMPLER0_BORDER_RED_num);
+ for (i = 0; i < 4*TD_VS_SAMPLER0_BORDER_RED_num; i++)
+ E32 (0);
+ EMIT_WAIT_3D_IDLE ();
+}
+
+void reset_sampler_const ()
+{
+ int i;
+#ifdef USE_TEX_SAMPLER_WORD
+ CMD_BUFFER_PREAMBLE (SQ_TEX_SAMPLER_WORD_all_num * 4);
+
+ for (i = 0; i < SQ_TEX_SAMPLER_WORD_all_num; i++) {
+ PACK0 (SQ_TEX_SAMPLER_WORD + i * SQ_TEX_SAMPLER_WORD_offset, 3);
+ E32 (SQ_TEX_DEPTH_COMPARE_LESSEQUAL << DEPTH_COMPARE_FUNCTION_shift);
+ E32 (MAX_LOD_mask);
+ E32 (0);
+ }
+#else
+ CMD_BUFFER_PREAMBLE (SQ_TEX_SAMPLER_REGS_all_num * 17);
+
+ for (i = 0; i < SQ_TEX_SAMPLER_REGS_all_num; i++) {
+ int j;
+ PACK0 (SQ_TEX_SAMPLER_REGS + i * SQ_TEX_SAMPLER_REGS_offset, 16);
+ for (j = 0; j < 12; j++)
+ E32 (0);
+ E32 (SQ_TEX_SAMPLER_MAX_LOD_0__DATA_mask);
+ E32 (SQ_TEX_DEPTH_COMPARE_LESSEQUAL);
+ E32 (0);
+ E32 (0);
+ }
+#endif
+}
+
+void reset_dx9_alu_consts()
+{
+ int i;
+ const int count = SQ_ALU_CONSTANT_all_num * (SQ_ALU_CONSTANT_offset >> 2);
+ CMD_BUFFER_PREAMBLE (count + 1);
+
+ PACK0 (SQ_ALU_CONSTANT, count);
+ for (i = 0; i < count; i++)
+ EFLOAT (0.0);
+}
+
+void reset_bool_loop_const()
+{
+ int i;
+ CMD_BUFFER_PREAMBLE (SQ_BOOL_CONST_0_num * 2 + SQ_LOOP_CONST_all_num + 1);
+
+ for (i = 0; i < SQ_BOOL_CONST_0_num; i++)
+ EREG (SQ_BOOL_CONST_0 + (i << 2), 0);
+ PACK0 (SQ_LOOP_CONST, SQ_LOOP_CONST_all_num);
+ for (i = 0; i < SQ_LOOP_CONST_all_num; i++)
+ E32 (0);
+}
+
+void start_3d(adapter_t *adapt)
+{
+ CMD_BUFFER_PREAMBLE (3 + ELEN_WAIT_3D_IDLE_CLEAN);
+
+ if (adapt->chipset <= CHIPSET_RV670) {
+ CMD_BUFFER_ALLOC (2);
+ PACK3 (IT_START_3D_CMDBUF, 1);
+ E32 (0);
+ }
+
+ PACK3 (IT_CONTEXT_CONTROL, 2);
+ E32 (0x80000000);
+ E32 (0x80000000);
+
+ EMIT_WAIT_3D_IDLE_CLEAN ();
+}
+
+/*
+ * Setup of functional groups
+ */
+
+// asic stack/thread/gpr limits - need to query the drm
+void sq_setup(adapter_t *adapt, sq_config_t *sq_conf)
+{
+ uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
+ uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
+ CMD_BUFFER_PREAMBLE (7 + 2*2);
+
+ if ((adapt->chipset == CHIPSET_RV610) ||
+ (adapt->chipset == CHIPSET_RV620) ||
+ (adapt->chipset == CHIPSET_RS780) ||
+ (adapt->chipset == CHIPSET_RV710))
+ sq_config = 0; // no VC
+ else
+ sq_config = VC_ENABLE_bit;
+
+ sq_config |= (DX9_CONSTS_bit |
+ ALU_INST_PREFER_VECTOR_bit |
+ (sq_conf->ps_prio << PS_PRIO_shift) |
+ (sq_conf->vs_prio << VS_PRIO_shift) |
+ (sq_conf->gs_prio << GS_PRIO_shift) |
+ (sq_conf->es_prio << ES_PRIO_shift));
+
+ sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
+ (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
+ (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
+ sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
+ (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
+
+ sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
+ (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
+ (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
+ (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
+
+ sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
+ (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
+
+ sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
+ (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
+
+ PACK0 (SQ_CONFIG, 6);
+ E32 (sq_config);
+ E32 (sq_gpr_resource_mgmt_1);
+ E32 (sq_gpr_resource_mgmt_2);
+ E32 (sq_thread_resource_mgmt);
+ E32 (sq_stack_resource_mgmt_1);
+ E32 (sq_stack_resource_mgmt_2);
+
+ /* These regs live in VGT space, but should be programmed according to
+ * the number of available qd pipes, never to be touched again */
+ // XXX: move to drm
+ EREG (VGT_OUT_DEALLOC_CNTL, sq_conf->num_qd_pipes*4);
+ EREG (VGT_VERTEX_REUSE_BLOCK_CNTL, sq_conf->num_qd_pipes*4 - 2);
+}
+
+void set_render_target(adapter_t *adapt, cb_config_t *cb_conf)
+{
+ uint32_t cb_color_info;
+ CMD_BUFFER_PREAMBLE (7*2);
+
+ cb_color_info = ((cb_conf->endian << ENDIAN_shift) |
+ (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) |
+ (cb_conf->array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) |
+ (cb_conf->number_type << NUMBER_TYPE_shift) |
+ (cb_conf->comp_swap << COMP_SWAP_shift) |
+ (cb_conf->tile_mode << CB_COLOR0_INFO__TILE_MODE_shift));
+ if (cb_conf->read_size)
+ cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit;
+ if (cb_conf->blend_clamp)
+ cb_color_info |= BLEND_CLAMP_bit;
+ if (cb_conf->clear_color)
+ cb_color_info |= CLEAR_COLOR_bit;
+ if (cb_conf->blend_bypass)
+ cb_color_info |= BLEND_BYPASS_bit;
+ if (cb_conf->blend_float32)
+ cb_color_info |= BLEND_FLOAT32_bit;
+ if (cb_conf->simple_float)
+ cb_color_info |= SIMPLE_FLOAT_bit;
+ if (cb_conf->round_mode)
+ cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
+ if (cb_conf->tile_compact)
+ cb_color_info |= TILE_COMPACT_bit;
+ if (cb_conf->source_format)
+ cb_color_info |= SOURCE_FORMAT_bit;
+
+ EREG ((CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8));
+ // rv6xx workaround
+ if ((adapt->chipset > CHIPSET_R600) &&
+ (adapt->chipset < CHIPSET_RV770)) {
+ CMD_BUFFER_ALLOC (2);
+ PACK3 (IT_SURFACE_BASE_UPDATE, 1);
+ E32 ((2 << cb_conf->id));
+ }
+ // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib
+ EREG ((CB_COLOR0_SIZE + (4 * cb_conf->id)), ((((cb_conf->w / 8) - 1) << PITCH_TILE_MAX_shift) |
+ ((((cb_conf->w * cb_conf->h)/64)-1) << SLICE_TILE_MAX_shift)));
+ EREG ((CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0 << SLICE_START_shift) |
+ (0 << SLICE_MAX_shift)));
+ EREG ((CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info);
+ EREG ((CB_COLOR0_TILE + (4 * cb_conf->id)), (0 >> 8)); // CMASK per-tile data base/256
+ EREG ((CB_COLOR0_FRAG + (4 * cb_conf->id)), (0 >> 8)); // FMASK per-tile data base/256
+ EREG ((CB_COLOR0_MASK + (4 * cb_conf->id)), ((0 << CMASK_BLOCK_MAX_shift) |
+ (0 << FMASK_TILE_MAX_shift)));
+}
+
+void set_depth_target(adapter_t *adapt, db_config_t *db_conf)
+{
+ uint32_t db_depth_info;
+ CMD_BUFFER_PREAMBLE (4*2);
+
+ db_depth_info = ((db_conf->format << DB_DEPTH_INFO__FORMAT_shift) |
+ (db_conf->array_mode << DB_DEPTH_INFO__ARRAY_MODE_shift));
+ if (db_conf->read_size)
+ db_depth_info |= DB_DEPTH_INFO__READ_SIZE_bit;
+ if (db_conf->tile_surface_en)
+ db_depth_info |= TILE_SURFACE_ENABLE_bit;
+ if (db_conf->tile_compact)
+ db_depth_info |= TILE_COMPACT_bit;
+ if (db_conf->zrange_precision)
+ db_depth_info |= ZRANGE_PRECISION_bit;
+
+ EREG (DB_DEPTH_BASE, (db_conf->base >> 8));
+ // rv6xx workaround
+ if ((adapt->chipset > CHIPSET_R600) &&
+ (adapt->chipset < CHIPSET_RV770)) {
+ CMD_BUFFER_ALLOC (2);
+ PACK3 (IT_SURFACE_BASE_UPDATE, 1);
+ E32 (DEPTH_BASE);
+ }
+ EREG (DB_DEPTH_INFO, db_depth_info);
+ EREG (DB_DEPTH_SIZE, ((((db_conf->w / 8) - 1) << PITCH_TILE_MAX_shift) |
+ (((db_conf->w * db_conf->h) - 1) << SLICE_TILE_MAX_shift)));
+ EREG (DB_DEPTH_VIEW, ((0 << SLICE_START_shift) |
+ (0 << SLICE_MAX_shift)));
+}
+
+void cp_set_surface_sync()
+{
+ CMD_BUFFER_PREAMBLE (4*2 + 7 + 2 + 2);
+
+ EREG (CP_COHER_CNTL, 0x19800000);
+ EREG (CP_COHER_SIZE, 0xFFFFFFFF);
+ EREG (CP_COHER_BASE, 0x00000000);
+ PACK3 (IT_WAIT_REG_MEM, 6);
+ E32 (0x00000003); // ME, Register, EqualTo
+ E32 (CP_COHER_STATUS >> 2);
+ E32 (0);
+ E32 (0); // Ref value
+ E32 (STATUS_bit); // Ref mask
+ E32 (10); // Wait interval
+ PACK3 (IT_EVENT_WRITE, 1);
+ E32 (PIPELINESTAT_STOP);
+ PACK3 (IT_EVENT_WRITE, 1);
+ E32 (PERFCOUNTER_STOP);
+}
+
+void fs_setup(adapter_t *adapt, shader_config_t *fs_conf)
+{
+ uint32_t sq_pgm_resources;
+ sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
+ (fs_conf->stack_size << STACK_SIZE_shift));
+ CMD_BUFFER_PREAMBLE (6);
+
+ if (fs_conf->dx10_clamp)
+ sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit;
+
+ EREG (SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
+ EREG (SQ_PGM_RESOURCES_FS, sq_pgm_resources);
+ EREG (SQ_PGM_CF_OFFSET_FS, 0);
+}
+
+void vs_setup(adapter_t *adapt, shader_config_t *vs_conf)
+{
+ uint32_t sq_pgm_resources;
+ sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
+ (vs_conf->stack_size << STACK_SIZE_shift));
+ CMD_BUFFER_PREAMBLE (6);
+
+ if (vs_conf->dx10_clamp)
+ sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit;
+ if (vs_conf->fetch_cache_lines)
+ sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
+ if (vs_conf->uncached_first_inst)
+ sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
+
+ EREG (SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
+ EREG (SQ_PGM_RESOURCES_VS, sq_pgm_resources);
+ EREG (SQ_PGM_CF_OFFSET_VS, 0);
+}
+
+void ps_setup(adapter_t *adapt, shader_config_t *ps_conf)
+{
+ uint32_t sq_pgm_resources;
+ sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
+ (ps_conf->stack_size << STACK_SIZE_shift));
+ CMD_BUFFER_PREAMBLE (8);
+
+ if (ps_conf->dx10_clamp)
+ sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit;
+ if (ps_conf->fetch_cache_lines)
+ sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
+ if (ps_conf->uncached_first_inst)
+ sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
+ if (ps_conf->clamp_consts)
+ sq_pgm_resources |= CLAMP_CONSTS_bit;
+
+ EREG (SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
+ EREG (SQ_PGM_RESOURCES_PS, sq_pgm_resources);
+ EREG (SQ_PGM_EXPORTS_PS, ps_conf->export_mode);
+ EREG (SQ_PGM_CF_OFFSET_PS, 0);
+}
+
+void set_alu_consts(int offset, int count, float *const_buf)
+{
+ int i;
+ const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2);
+ CMD_BUFFER_PREAMBLE (countreg + 1);
+
+ PACK0 (SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg);
+ for (i = 0; i < countreg; i++)
+ EFLOAT (const_buf[i]);
+}
+
+void set_vtx_resource(adapter_t *adapt, vtx_resource_t *res)
+{
+ uint32_t sq_vtx_constant_word2;
+ CMD_BUFFER_PREAMBLE (8);
+
+ sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
+ ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
+ (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
+ (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
+ (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
+ if (res->clamp_x)
+ sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
+
+ if (res->format_comp_all)
+ sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
+
+ if (res->srf_mode_all)
+ sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
+
+ PACK0 (SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7);
+
+ E32 (res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS
+ E32 ((res->vtx_num_entries << 2) - 1); // 1: SIZE
+ E32 (sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
+ E32 (res->mem_req_size << MEM_REQUEST_SIZE_shift); // 3: MEM_REQUEST_SIZE ?!?
+ E32 (0); // 4: n/a
+ E32 (0); // 5: n/a
+ E32 (SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift); // 6: TYPE
+}
+
+void set_tex_resource(adapter_t *adapt, tex_resource_t *tex_res)
+{
+ uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
+ uint32_t sq_tex_resource_word5, sq_tex_resource_word6;
+ CMD_BUFFER_PREAMBLE (8);
+
+ sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) |
+ (tex_res->tile_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift));
+
+ if (tex_res->w)
+ sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) |
+ ((tex_res->w - 1) << TEX_WIDTH_shift));
+
+ if (tex_res->tile_type)
+ sq_tex_resource_word0 |= TILE_TYPE_bit;
+
+ sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift);
+
+ if (tex_res->h)
+ sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
+ if (tex_res->depth)
+ sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
+
+ sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
+ (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
+ (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
+ (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
+ (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
+ (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
+ (tex_res->request_size << REQUEST_SIZE_shift) |
+ (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
+ (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
+ (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
+ (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
+ (tex_res->base_level << BASE_LEVEL_shift));
+
+ if (tex_res->srf_mode_all)
+ sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
+ if (tex_res->force_degamma)
+ sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
+
+ sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
+ (tex_res->base_array << BASE_ARRAY_shift) |
+ (tex_res->last_array << LAST_ARRAY_shift));
+
+ sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) |
+ (tex_res->perf_modulation << PERF_MODULATION_shift) |
+ (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift));
+
+ if (tex_res->interlaced)
+ sq_tex_resource_word6 |= INTERLACED_bit;
+
+ PACK0 (SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7);
+ E32 (sq_tex_resource_word0);
+ E32 (sq_tex_resource_word1);
+ E32 (((tex_res->base) >> 8));
+ E32 (((tex_res->mip_base) >> 8));
+ E32 (sq_tex_resource_word4);
+ E32 (sq_tex_resource_word5);
+ E32 (sq_tex_resource_word6);
+}
+
+void set_tex_sampler (adapter_t *adapt, tex_sampler_t *s)
+{
+ uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
+ CMD_BUFFER_PREAMBLE (4);
+
+ sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) |
+ (s->clamp_y << CLAMP_Y_shift) |
+ (s->clamp_z << CLAMP_Z_shift) |
+ (s->xy_mag_filter << XY_MAG_FILTER_shift) |
+ (s->xy_min_filter << XY_MIN_FILTER_shift) |
+ (s->z_filter << Z_FILTER_shift) |
+ (s->mip_filter << MIP_FILTER_shift) |
+ (s->border_color << BORDER_COLOR_TYPE_shift) |
+ (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) |
+ (s->chroma_key << CHROMA_KEY_shift));
+ if (s->point_sampling_clamp)
+ sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit;
+ if (s->tex_array_override)
+ sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit;
+ if (s->lod_uses_minor_axis)
+ sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit;
+
+ sq_tex_sampler_word1 = ((s->min_lod << MIN_LOD_shift) |
+ (s->max_lod << MAX_LOD_shift) |
+ (s->lod_bias << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift));
+
+ sq_tex_sampler_word2 = ((s->lod_bias2 << LOD_BIAS_SEC_shift) |
+ (s->perf_mip << PERF_MIP_shift) |
+ (s->perf_z << PERF_Z_shift));
+ if (s->mc_coord_truncate)
+ sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
+ if (s->force_degamma)
+ sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
+ if (s->high_precision_filter)
+ sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit;
+ if (s->fetch_4)
+ sq_tex_sampler_word2 |= FETCH_4_bit;
+ if (s->sample_is_pcf)
+ sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit;
+ if (s->type)
+ sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
+
+ PACK0 (SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
+ E32 (sq_tex_sampler_word0);
+ E32 (sq_tex_sampler_word1);
+ E32 (sq_tex_sampler_word2);
+}
+
+//XXX deal with clip offsets in clip setup
+
+void set_screen_scissor(int x1, int y1, int x2, int y2)
+{
+ CMD_BUFFER_PREAMBLE (2 * 2);
+
+ EREG (PA_SC_SCREEN_SCISSOR_TL, ((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
+ (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
+ EREG (PA_SC_SCREEN_SCISSOR_BR, ((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
+ (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
+}
+
+void set_vport_scissor(int id, int x1, int y1, int x2, int y2)
+{
+ CMD_BUFFER_PREAMBLE (2 * 2);
+
+ EREG (PA_SC_VPORT_SCISSOR_0_TL +
+ id * PA_SC_VPORT_SCISSOR_0_TL_offset, ((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
+ (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
+ WINDOW_OFFSET_DISABLE_bit));
+ EREG (PA_SC_VPORT_SCISSOR_0_BR +
+ id * PA_SC_VPORT_SCISSOR_0_BR_offset, ((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
+ (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
+}
+
+void set_generic_scissor(int x1, int y1, int x2, int y2)
+{
+ CMD_BUFFER_PREAMBLE (2 * 2);
+
+ EREG (PA_SC_GENERIC_SCISSOR_TL, ((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
+ (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
+ WINDOW_OFFSET_DISABLE_bit));
+ EREG (PA_SC_GENERIC_SCISSOR_BR, ((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
+ (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
+}
+
+void set_window_scissor(int x1, int y1, int x2, int y2)
+{
+ CMD_BUFFER_PREAMBLE (2 * 2);
+
+ EREG (PA_SC_WINDOW_SCISSOR_TL, ((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
+ (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
+ WINDOW_OFFSET_DISABLE_bit));
+ EREG (PA_SC_WINDOW_SCISSOR_BR, ((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
+ (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
+}
+
+void set_clip_rect(int id, int x1, int y1, int x2, int y2)
+{
+ CMD_BUFFER_PREAMBLE (2 * 2);
+
+ EREG (PA_SC_CLIPRECT_0_TL +
+ id * PA_SC_CLIPRECT_0_TL_offset, ((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
+ (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
+ EREG (PA_SC_CLIPRECT_0_BR +
+ id * PA_SC_CLIPRECT_0_BR_offset, ((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
+ (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
+}
+
+/*
+ * Setup of default state
+ */
+
+void set_default_state(adapter_t *adapt)
+{
+ tex_resource_t tex_res;
+ shader_config_t fs_conf;
+ sq_config_t sq_conf;
+ int i;
+ uint32_t reg;
+ CMD_BUFFER_PREAMBLE (ELEN_WAIT_3D_IDLE);
+
+ memset(&tex_res, 0, sizeof(tex_resource_t));
+ memset(&fs_conf, 0, sizeof(shader_config_t));
+
+ EMIT_WAIT_3D_IDLE();
+
+ // ASIC specific setup, see drm
+ CMD_BUFFER_ALLOC (5*2);
+ if (adapt->chipset <= CHIPSET_RV670) {
+ EREG (TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) |
+ (28 << TD_FIFO_CREDIT_shift)));
+ EREG (VC_ENHANCE, 0);
+ EREG (R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
+ EREG (DB_DEBUG, 0x82000000); /* ? */
+ EREG (DB_WATERMARKS, ((4 << DEPTH_FREE_shift) |
+ (16 << DEPTH_FLUSH_shift) |
+ (0 << FORCE_SUMMARIZE_shift) |
+ (4 << DEPTH_PENDING_FREE_shift) |
+ (16 << DEPTH_CACHELINE_FREE_shift) |
+ 0));
+ } else {
+ EREG (TA_CNTL_AUX, (( 2 << GRADIENT_CREDIT_shift) |
+ (28 << TD_FIFO_CREDIT_shift)));
+ EREG (VC_ENHANCE, 0);
+ EREG (R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit);
+ EREG (DB_DEBUG, 0);
+ EREG (DB_WATERMARKS, ((4 << DEPTH_FREE_shift) |
+ (16 << DEPTH_FLUSH_shift) |
+ (0 << FORCE_SUMMARIZE_shift) |
+ (4 << DEPTH_PENDING_FREE_shift) |
+ (4 << DEPTH_CACHELINE_FREE_shift) |
+ 0));
+ }
+
+#ifdef CLEAN_SETUP
+ reset_td_samplers();
+ reset_dx9_alu_consts();
+ reset_bool_loop_const ();
+ reset_sampler_const ();
+#endif
+
+ // SQ
+ sq_conf.ps_prio = 0;
+ sq_conf.vs_prio = 1;
+ sq_conf.gs_prio = 2;
+ sq_conf.es_prio = 3;
+ // need to set stack/thread/gpr limits based on the asic
+ // for now just set them low enough so any card will work
+ // see r600_cp.c in the drm
+ // maybe add a GET_PARAM for these to the drm
+ if (adapt->chipset <= CHIPSET_RV670) {
+ sq_conf.num_ps_gprs = 100;
+ sq_conf.num_vs_gprs = 24;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 0;
+ sq_conf.num_es_gprs = 0;
+ sq_conf.num_ps_threads = 136;
+ sq_conf.num_vs_threads = 48;
+ sq_conf.num_gs_threads = 4;
+ sq_conf.num_es_threads = 4;
+ sq_conf.num_ps_stack_entries = 40;
+ sq_conf.num_vs_stack_entries = 40;
+ sq_conf.num_gs_stack_entries = 0;
+ sq_conf.num_es_stack_entries = 0;
+ } else {
+ sq_conf.num_ps_gprs = 100;
+ sq_conf.num_vs_gprs = 24;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 0;
+ sq_conf.num_es_gprs = 0;
+ sq_conf.num_ps_threads = 136;
+ sq_conf.num_vs_threads = 48;
+ sq_conf.num_gs_threads = 0;
+ sq_conf.num_es_threads = 0;
+ sq_conf.num_ps_stack_entries = 128;
+ sq_conf.num_vs_stack_entries = 128;
+ sq_conf.num_gs_stack_entries = 0;
+ sq_conf.num_es_stack_entries = 0;
+ }
+
+ // XXX: move to drm
+ reg = reg_read32 (CC_GC_SHADER_PIPE_CONFIG);
+ if (adapt->chipset <= CHIPSET_RV770)
+ sq_conf.num_qd_pipes = R6XX_MAX_QD_PIPES - count_bits (reg & INACTIVE_QD_PIPES_mask);
+ else
+ sq_conf.num_qd_pipes = 4; /* TODO: Verfiy (This is from TCore) */
+
+ sq_setup(adapt, &sq_conf);
+
+ CMD_BUFFER_ALLOC (2*2 + 10 + 9*2 + 2*2);
+
+ EREG (SQ_VTX_BASE_VTX_LOC, 0);
+ EREG (SQ_VTX_START_INST_LOC, 0);
+
+ PACK0 (SQ_ESGS_RING_ITEMSIZE, 9);
+ E32 (0); // SQ_ESGS_RING_ITEMSIZE
+ E32 (0); // SQ_GSVS_RING_ITEMSIZE
+ E32 (0); // SQ_ESTMP_RING_ITEMSIZE
+ E32 (0); // SQ_GSTMP_RING_ITEMSIZE
+ E32 (0); // SQ_VSTMP_RING_ITEMSIZE
+ E32 (0); // SQ_PSTMP_RING_ITEMSIZE
+ E32 (0); // SQ_FBUF_RING_ITEMSIZE
+ E32 (0); // SQ_REDUC_RING_ITEMSIZE
+ E32 (0); // SQ_GS_VERT_ITEMSIZE
+
+ // DB
+ EREG (DB_DEPTH_INFO, 0);
+ EREG (DB_STENCIL_CLEAR, 0);
+ EREG (DB_DEPTH_CLEAR, 0);
+ EREG (DB_STENCILREFMASK, 0);
+ EREG (DB_STENCILREFMASK_BF, 0);
+ EREG (DB_DEPTH_CONTROL, 0);
+ EREG (DB_RENDER_CONTROL, STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit);
+ if (adapt->chipset <= CHIPSET_RV670)
+ EREG (DB_RENDER_OVERRIDE, FORCE_SHADER_Z_ORDER_bit);
+ else
+ EREG (DB_RENDER_OVERRIDE, 0);
+ EREG (DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) |
+ (2 << ALPHA_TO_MASK_OFFSET1_shift) |
+ (2 << ALPHA_TO_MASK_OFFSET2_shift) |
+ (2 << ALPHA_TO_MASK_OFFSET3_shift)));
+
+ // SX
+ EREG (SX_ALPHA_TEST_CONTROL, 0);
+ EREG (SX_ALPHA_REF, 0);
+
+ // CB
+ reset_cb();
+
+ CMD_BUFFER_ALLOC (5 + CB_BLEND0_CONTROL_num+1 + 2);
+
+ PACK0 (CB_BLEND_RED, 4);
+ E32 (0x00000000);
+ E32 (0x00000000);
+ E32 (0x00000000);
+ E32 (0x00000000);
+
+#if CLEAN_SETUP
+ /* CB_COLOR_CONTROL.PER_MRT_BLEND is off */
+ // RV6xx+ have per-MRT blend
+ if (adapt->chipset > CHIPSET_R600) {
+ PACK0 (CB_BLEND0_CONTROL, CB_BLEND0_CONTROL_num);
+ for (i = 0; i < CB_BLEND0_CONTROL_num; i++)
+ E32 (0);
+ }
+#endif
+ EREG (CB_BLEND_CONTROL, 0);
+
+ if (adapt->chipset <= CHIPSET_RV670) {
+ CMD_BUFFER_ALLOC (4);
+ PACK0 (CB_FOG_RED, 3);
+ E32 (0x00000000);
+ E32 (0x00000000);
+ E32 (0x00000000);
+ }
+
+ CMD_BUFFER_ALLOC (2 + 5);
+ EREG (CB_COLOR_CONTROL, 0);
+ PACK0 (CB_CLRCMP_CONTROL, 4);
+ E32 (1 << CLRCMP_FCN_SEL_shift); // CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC
+ E32 (0); // CB_CLRCMP_SRC
+ E32 (0); // CB_CLRCMP_DST
+ E32 (0); // CB_CLRCMP_MSK
+
+ if (adapt->chipset <= CHIPSET_RV670) {
+ CMD_BUFFER_ALLOC (5);
+ PACK0 (CB_CLEAR_RED, 4);
+ EFLOAT(1.0); /* WTF? */
+ EFLOAT(0.0);
+ EFLOAT(1.0);
+ EFLOAT(1.0);
+ }
+ CMD_BUFFER_ALLOC (2);
+ EREG (CB_TARGET_MASK, (0x0f << TARGET0_ENABLE_shift));
+
+ // SC
+ set_generic_scissor(0, 0, 8192, 8192);
+ set_screen_scissor (0, 0, 8192, 8192);
+ CMD_BUFFER_ALLOC (2);
+ EREG (PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) |
+ (0 << WINDOW_Y_OFFSET_shift)));
+ set_window_scissor (0, 0, 8192, 8192);
+
+ CMD_BUFFER_ALLOC (2);
+ EREG (PA_SC_CLIPRECT_RULE, CLIP_RULE_mask);
+
+#if CLEAN_SETUP
+ /* clip boolean is set to always visible -> doesn't matter */
+ for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
+ set_clip_rect (i, 0, 0, 8192, 8192);
+#endif
+
+ CMD_BUFFER_ALLOC (2);
+ if (adapt->chipset <= CHIPSET_RV670)
+ EREG (R7xx_PA_SC_EDGERULE, 0x00000000);
+ else
+ EREG (R7xx_PA_SC_EDGERULE, 0xAAAAAAAA); /* ? */
+
+ for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) {
+ set_vport_scissor (i, 0, 0, 8192, 8192);
+ CMD_BUFFER_ALLOC (3);
+ PACK0 (PA_SC_VPORT_ZMIN_0 + i * PA_SC_VPORT_ZMIN_0_offset, 2);
+ EFLOAT(0.0);
+ EFLOAT(1.0);
+ }
+
+ CMD_BUFFER_ALLOC (2 + 3*2);
+
+ if (adapt->chipset <= CHIPSET_RV670)
+ EREG (PA_SC_MODE_CNTL, (WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit));
+ else
+ EREG (PA_SC_MODE_CNTL, (FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit |
+ 0x00500000)); /* ? */
+
+ EREG (PA_SC_LINE_CNTL, 0);
+ EREG (PA_SC_AA_CONFIG, 0);
+ EREG (PA_SC_AA_MASK, 0xFFFFFFFF);
+
+ //XXX: double check this
+ if (adapt->chipset > CHIPSET_R600) {
+ CMD_BUFFER_ALLOC (2*2);
+ EREG (PA_SC_AA_SAMPLE_LOCS_MCTX, 0);
+ EREG (PA_SC_AA_SAMPLE_LOCS_8S_WD1_M, 0);
+ }
+
+ CMD_BUFFER_ALLOC (2*2 + 7 + 4*2 + 5);
+
+ EREG (PA_SC_LINE_STIPPLE, 0);
+ EREG (PA_SC_MPASS_PS_CNTL, 0);
+
+ // CL
+ PACK0 (PA_CL_VPORT_XSCALE_0, 6);
+ EFLOAT (0.0f); // PA_CL_VPORT_XSCALE
+ EFLOAT (0.0f); // PA_CL_VPORT_XOFFSET
+ EFLOAT (0.0f); // PA_CL_VPORT_YSCALE
+ EFLOAT (0.0f); // PA_CL_VPORT_YOFFSET
+ EFLOAT (0.0f); // PA_CL_VPORT_ZSCALE
+ EFLOAT (0.0f); // PA_CL_VPORT_ZOFFSET
+ EREG (PA_CL_CLIP_CNTL, (CLIP_DISABLE_bit | DX_CLIP_SPACE_DEF_bit));
+ EREG (PA_CL_VTE_CNTL, 0);
+ EREG (PA_CL_VS_OUT_CNTL, 0);
+ EREG (PA_CL_NANINF_CNTL, 0);
+ PACK0 (PA_CL_GB_VERT_CLIP_ADJ, 4);
+ EFLOAT (1.0); // PA_CL_GB_VERT_CLIP_ADJ
+ EFLOAT (1.0); // PA_CL_GB_VERT_DISC_ADJ
+ EFLOAT (1.0); // PA_CL_GB_HORZ_CLIP_ADJ
+ EFLOAT (1.0); // PA_CL_GB_HORZ_DISC_ADJ
+
+#ifdef CLEAN_SETUP
+ CMD_BUFFER_ALLOC (25);
+ /* user clipping planes are disabled by default */
+ PACK0 (PA_CL_UCP_0_X, 24);
+ for (i = 0; i < 24; i++)
+ EFLOAT (0.0);
+#endif
+
+ CMD_BUFFER_ALLOC (8*2 + 2*2 + 2 + 2 + 5*2 + SPI_VS_OUT_ID_0_num+1 + 2);
+
+ // SU
+ EREG (PA_SU_SC_MODE_CNTL, FACE_bit);
+ EREG (PA_SU_POINT_SIZE, 0);
+ EREG (PA_SU_POINT_MINMAX, 0);
+ EREG (PA_SU_POLY_OFFSET_DB_FMT_CNTL, 0);
+ EREG (PA_SU_POLY_OFFSET_BACK_SCALE, 0);
+ EREG (PA_SU_POLY_OFFSET_FRONT_SCALE, 0);
+ EREG (PA_SU_POLY_OFFSET_BACK_OFFSET, 0);
+ EREG (PA_SU_POLY_OFFSET_FRONT_OFFSET, 0);
+
+ EREG (PA_SU_LINE_CNTL, (8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */
+ EREG (PA_SU_VTX_CNTL, ((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
+ (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */
+ EREG (PA_SU_POLY_OFFSET_CLAMP, 0);
+
+ // SPI
+ if (adapt->chipset <= CHIPSET_RV670)
+ EREG (R7xx_SPI_THREAD_GROUPING, 0);
+ else
+ EREG (R7xx_SPI_THREAD_GROUPING, (1 << PS_GROUPING_shift));
+
+ EREG (SPI_INTERP_CONTROL_0, ((2 << PNT_SPRITE_OVRD_X_shift) |
+ (3 << PNT_SPRITE_OVRD_Y_shift) |
+ (0 << PNT_SPRITE_OVRD_Z_shift) |
+ (1 << PNT_SPRITE_OVRD_W_shift))); /* s,t,0,1 */
+ EREG (SPI_INPUT_Z, 0);
+ EREG (SPI_FOG_CNTL, 0);
+ EREG (SPI_FOG_FUNC_SCALE, 0);
+ EREG (SPI_FOG_FUNC_BIAS, 0);
+
+ PACK0 (SPI_VS_OUT_ID_0, SPI_VS_OUT_ID_0_num);
+ for (i = 0; i < SPI_VS_OUT_ID_0_num; i++) /* identity mapping */
+ E32 (0x03020100 + i*0x04040404);
+ EREG (SPI_VS_OUT_CONFIG, 0);
+
+ // clear FS
+ fs_setup(adapt, &fs_conf);
+
+ CMD_BUFFER_ALLOC (5*2 + 20*2);
+
+ // VGT
+ EREG (VGT_MAX_VTX_INDX, 0);
+ EREG (VGT_MIN_VTX_INDX, 0);
+ EREG (VGT_INDX_OFFSET, 0);
+ EREG (VGT_INSTANCE_STEP_RATE_0, 0);
+ EREG (VGT_INSTANCE_STEP_RATE_1, 0);
+
+ EREG (VGT_MULTI_PRIM_IB_RESET_INDX, 0);
+ EREG (VGT_OUTPUT_PATH_CNTL, 0);
+ EREG (VGT_GS_MODE, 0);
+ EREG (VGT_HOS_CNTL, 0);
+ EREG (VGT_HOS_MAX_TESS_LEVEL, 0);
+ EREG (VGT_HOS_MIN_TESS_LEVEL, 0);
+ EREG (VGT_HOS_REUSE_DEPTH, 0);
+ EREG (VGT_GROUP_PRIM_TYPE, 0);
+ EREG (VGT_GROUP_FIRST_DECR, 0);
+ EREG (VGT_GROUP_DECR, 0);
+ EREG (VGT_GROUP_VECT_0_CNTL, 0);
+ EREG (VGT_GROUP_VECT_1_CNTL, 0);
+ EREG (VGT_GROUP_VECT_0_FMT_CNTL, 0);
+ EREG (VGT_GROUP_VECT_1_FMT_CNTL, 0);
+ EREG (VGT_PRIMITIVEID_EN, 0);
+ EREG (VGT_MULTI_PRIM_IB_RESET_EN, 0);
+ EREG (VGT_STRMOUT_EN, 0);
+ EREG (VGT_REUSE_OFF, 0);
+ EREG (VGT_VTX_CNT_EN, 0);
+ EREG (VGT_STRMOUT_BUFFER_EN, 0);
+
+#ifdef CLEAN_SETUP
+ // clear tex resources - PS
+ for (i = 0; i < 16; i++) {
+ tex_res.id = i;
+ set_tex_resource(adapt, &tex_res);
+ }
+
+ // clear tex resources - VS
+ for (i = 160; i < 164; i++) {
+ tex_res.id = i;
+ set_tex_resource(adapt, &tex_res);
+ }
+
+ // clear tex resources - FS
+ for (i = 320; i < 335; i++) {
+ tex_res.id = i;
+ set_tex_resource(adapt, &tex_res);
+ }
+#endif
+
+}
+
+
+/*
+ * Commands
+ */
+
+void draw_immd(adapter_t *adapt, draw_config_t *draw_conf, uint32_t *indices)
+{
+ uint32_t i, count;
+ CMD_BUFFER_PREAMBLE (2 + 2 + 2);
+
+ if (verbose)
+ printf ("Drawing %d vertices\n", draw_conf->num_indices);
+
+ EREG (VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
+ PACK3 (IT_INDEX_TYPE, 1);
+ E32 (draw_conf->index_type);
+ PACK3 (IT_NUM_INSTANCES, 1);
+ E32 (draw_conf->num_instances);
+
+ // calculate num of packets
+ count = 2;
+ if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT)
+ count += (draw_conf->num_indices + 1) / 2;
+ else
+ count += draw_conf->num_indices;
+
+ CMD_BUFFER_ALLOC (count +1);
+ PACK3 (IT_DRAW_INDEX_IMMD, count);
+ E32 (draw_conf->num_indices);
+ E32 (draw_conf->vgt_draw_initiator);
+
+ if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) {
+ for (i = 0; i < draw_conf->num_indices; i += 2) {
+ if ((i + 1) == draw_conf->num_indices)
+ E32 (indices[i]);
+ else
+ E32 ((indices[i] | (indices[i + 1] << 16)));
+ }
+ } else {
+ for (i = 0; i < draw_conf->num_indices; i++)
+ E32 (indices[i]);
+ }
+
+}
+
+void draw_auto(adapter_t *adapt, draw_config_t *draw_conf)
+{
+ CMD_BUFFER_PREAMBLE (2 + 2 + 2 + 3);
+
+ EREG (VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
+ PACK3 (IT_INDEX_TYPE, 1);
+ E32 (draw_conf->index_type);
+ PACK3 (IT_NUM_INSTANCES, 1);
+ E32 (draw_conf->num_instances);
+ PACK3 (IT_DRAW_INDEX_AUTO, 2);
+ E32 (draw_conf->num_indices);
+ E32 (draw_conf->vgt_draw_initiator);
+}
+
+void set_viewport(adapter_t *adapt, uint32_t width, uint32_t height, uint32_t depth)
+{
+ float xscale, yscale, zscale;
+ float xoffset, yoffset, zoffset;
+ CMD_BUFFER_PREAMBLE (7);
+
+ xscale = 0.5f * (float)width;
+ xoffset = 0.0f + xscale;
+ yscale = -0.5f * (float)height;
+ yoffset = 0.0f - yscale;
+ // fixme depth
+ zscale = 1.0f - 0.0f;
+ zoffset = 0.0f;
+
+ PACK0 (PA_CL_VPORT_XSCALE_0, 6);
+ EFLOAT (xscale);
+ EFLOAT (xoffset);
+ EFLOAT (yscale);
+ EFLOAT (yoffset);
+ EFLOAT (zscale);
+ EFLOAT (zoffset);
+}
+
diff --git a/r600_init.h b/r600_init.h
new file mode 100644
index 0000000..e65a346
--- /dev/null
+++ b/r600_init.h
@@ -0,0 +1,71 @@
+/*
+ * RadeonHD R6xx, R7xx DRI driver
+ *
+ * Copyright (C) 2008-2009 Alexander Deucher
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __R600_INIT_H__
+#define __R600_INIT_H__
+
+/*
+ * Hardware setup
+ */
+
+void reset_cb (void);
+void reset_td_samplers (void);
+void reset_sampler_const (adapter_t *adapt);
+void reset_dx9_alu_consts (adapter_t *adapt, uint32_t offset, int count);
+void reset_bool_loop_const (adapter_t *adapt);
+
+void sq_setup (adapter_t *adapt, sq_config_t *sq_conf);
+void set_render_target (adapter_t *adapt, cb_config_t *cb_conf);
+void set_depth_target (adapter_t *adapt, db_config_t *db_conf);
+void cp_set_surface_sync (void);
+void fs_setup (adapter_t *adapt, shader_config_t *fs_conf);
+void vs_setup (adapter_t *adapt, shader_config_t *vs_conf);
+void ps_setup (adapter_t *adapt, shader_config_t *ps_conf);
+void set_alu_consts (int offset, int count, float *const_buf);
+void set_vtx_resource (adapter_t *adapt, vtx_resource_t *vtx_res);
+void set_tex_resource (adapter_t *adapt, tex_resource_t *tex_res);
+void set_tex_sampler (adapter_t *adapt, tex_sampler_t *tex_samp);
+
+void set_viewport(adapter_t *adapt, uint32_t width, uint32_t height, uint32_t depth);
+
+void set_default_state (adapter_t *adapt);
+
+void draw_immd (adapter_t *adapt, draw_config_t *draw_conf, uint32_t *indices);
+void draw_auto (adapter_t *adapt, draw_config_t *draw_conf);
+void wait_3d_idle (void);
+void wait_3d_idle_clean (void);
+void start_3d (adapter_t *adapt);
+
+void set_screen_scissor(int x1, int y1, int x2, int y2);
+void set_vport_scissor(int id, int x1, int y1, int x2, int y2);
+void set_generic_scissor(int x1, int y1, int x2, int y2);
+void set_window_scissor(int x1, int y1, int x2, int y2);
+void set_clip_rect(int id, int x1, int y1, int x2, int y2);
+
+#define TEX_WIDTH 16
+#define TEX_HEIGHT 16
+#define TEX_PITCH 32
+uint32_t *create_sample_texture (int width, int height, int pitch);
+
+#endif
diff --git a/r600_lib.c b/r600_lib.c
new file mode 100644
index 0000000..e33b8a2
--- /dev/null
+++ b/r600_lib.c
@@ -0,0 +1,683 @@
+/*
+ * r600_demo
+ *
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Based on r300_demo,
+ * Copyright (C) various authors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Helper routines, to be split up
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <math.h>
+#include <assert.h>
+
+#include "r600_reg.h"
+#include "r600_emit.h"
+#include "r600_lib.h"
+#include "r600_shader.h"
+#include "xf86drm.h"
+#include "radeon_drm.h"
+
+/* DRM / buffer related */
+extern int drmFD;
+extern int indirect_start, indirect_end, indirect_size, indirect_idx, ring_size;
+extern drm_context_t context;
+extern void *ring;
+
+extern int verbose;
+
+
+void wait_reg (adapter_t *adapt, uint32_t reg, uint32_t v, const char *when)
+{
+ int i;
+ for (i = 0; i < 1e6; i++)
+ if (reg_read32 (reg) == v)
+ break;
+ if (i == 1e6) {
+ fprintf (stderr, "***** %s: still not set after %d loops: 0x%x, should be 0x%x\n",
+ when, i, reg_read32 (reg), v);
+ show_state (adapt);
+ fprintf (stderr, "***** FAILED\n\n");
+ exit (1);
+ } else if (verbose >= 2)
+ fprintf (stderr, "%s: set correctly after %d loops: 0x%x\n", when, i, v);
+}
+
+void wait_3d_idle_clean()
+{
+ //flush caches, don't generate timestamp
+ pack3 (IT_EVENT_WRITE, 1);
+ e32 (CACHE_FLUSH_AND_INV_EVENT);
+ // wait for 3D idle clean
+ ereg (WAIT_UNTIL, WAIT_3D_IDLECLEAN_bit);
+}
+
+
+void wait_3d_idle()
+{
+ ereg (WAIT_UNTIL, WAIT_3D_IDLE_bit);
+}
+
+
+void flush_indirect (void)
+{
+ drm_radeon_indirect_t ind;
+ int r;
+
+ ind.idx = indirect_idx;
+ ind.start = indirect_start;
+ ind.end = indirect_end;
+ ind.discard = 0;
+
+ drmGetLock(drmFD,context,DRM_LOCK_READY);
+ r=drmCommandWriteRead(drmFD, DRM_RADEON_INDIRECT,
+ &ind, sizeof(drm_radeon_indirect_t));
+ drmUnlock(drmFD,context);
+
+ if(r<0){
+ drmError(r, __func__);
+ }
+
+ indirect_start=indirect_end;
+}
+
+
+void flush_ib_to_ring (void)
+{
+ int i;
+ int rstart = reg_read32 (CP_RB_WPTR);
+ int r = rstart;
+
+ for (i = indirect_start>>2; i < indirect_end>>2; i++) {
+ ((uint32_t *) ring) [r] = indirect[i];
+ if (++r >= ring_size>>2)
+ r = 0;
+ while (r == reg_read32 (CP_RB_RPTR)) /* should never happen */
+ fprintf (stderr, "Waiting for ring buffer to become free...\n");
+ }
+
+ if (r > rstart)
+ flush_cache ((uint32_t *) ring + rstart, (uint32_t *) ring + r);
+ else {
+ flush_cache ((uint32_t *) ring + rstart, (char *) ring + ring_size);
+ flush_cache (ring, (uint32_t *) ring + r);
+ }
+
+ reg_write32 (CP_RB_WPTR, r);
+
+ indirect_start=indirect_end;
+}
+
+
+void flush_cmds (void)
+{
+ int i;
+
+ while(indirect_end & 0x3c){
+ e32(RADEON_CP_PACKET2); /* fill up to multiple of 16 dwords */
+ }
+
+// flush_cache_indirect (); /* Apparently not necessary */
+
+ if(indirect_end & 0x3) {
+ fprintf(stderr, "*** indirect_end & 0x3\n");
+ return; /* does not end on dword boundary */
+ }
+ if(indirect_start==indirect_end) {
+ if (verbose >= 2)
+ fprintf(stderr, "indirect empty\n");
+ return;
+ }
+ if(indirect_end>=indirect_size) {
+ fprintf(stderr, "*** indirect out of bounds\n");
+ return;
+ }
+ if (verbose) {
+ printf ("flushing indirect buffer %d with %d dwords%s:\n",
+ indirect_idx, (indirect_end-indirect_start)>>2,
+ use_ring_directly ? " to ring":"");
+ for (i = indirect_start>>2; i < indirect_end>>2; i += 8) {
+ printf (" %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ indirect[i], indirect[i+1], indirect[i+2], indirect[i+3],
+ indirect[i+4], indirect[i+5], indirect[i+6], indirect[i+7]);
+ }
+ }
+
+ if (do_not_flush)
+ return;
+
+ if (use_ring_directly)
+ flush_ib_to_ring (); /* Copy IB directly to ring w/o DRM intervention */
+ else
+ flush_indirect (); /* Flush IB through DRM */
+}
+
+
+void flush_gpu_input_cache (void)
+{
+ /* To be used after texture uploads etc. */
+ pack3 (IT_SURFACE_SYNC, 4);
+ e32 (TC_ACTION_ENA_bit | VC_ACTION_ENA_bit | SH_ACTION_ENA_bit | CR0_ACTION_ENA_bit);
+ e32 (0xffffffff); /* SIZE */
+ e32 (0); /* BASE */
+ e32 (1); /* POLL_INTERVAL useful value? */
+}
+
+
+void flush_gpu_output_cache (void)
+{
+ /* To be used before readpixels, copy-to-texture etc. */
+ pack3 (IT_SURFACE_SYNC, 4);
+ e32 (TC_ACTION_ENA_bit | VC_ACTION_ENA_bit | SH_ACTION_ENA_bit | CR0_ACTION_ENA_bit);
+ e32 (0xffffffff); /* SIZE */
+ e32 (0); /* BASE */
+ e32 (1); /* POLL_INTERVAL useful value? */
+/* Submit a time stamp write back packet.
+ Driver polls the write back scratch register. When write back value presents in the register, driver
+ knows synchronization process has completed.
+ To synchronize all surfaces (flush all caches) by enabling all corresponding bits in CP_COHER_CNTL and setting
+ CP_COHER_SIZE = 0xFFFFFFFF, CP_COHER_BASE = 0x00000000.
+*/
+}
+
+// store shaders/buffers in vram or sysram
+// vram assumes 1024x768 @ 32 bpp, buffers stored after front buffer
+//#define SYS_MEM 1
+
+/* Write shader/buffer to agreed upon location (with offset) and return address */
+uint64_t upload (adapter_t *adapt, void *shader, int size, int offset)
+{
+ uint64_t addr;
+#ifdef SYS_MEM
+ memcpy (((char *)vtx + offset), shader, size);
+ addr = vtx_gpu + offset;
+#else
+ memcpy (((char *)adapt->framebuffer + (adapt->color_pitch * adapt->color_height * 4) + offset), shader, size);
+ addr = adapt->framebuffer_gpu + (adapt->color_pitch * adapt->color_height * 4) + offset;
+#endif
+ if (verbose >= 2) {
+ int i;
+ printf ("Upload %d dwords to offset 0x%x:\n", size/4, offset);
+ for (i = 0; i < size/4; i++)
+ printf (" %08x%s", ((uint32_t *)shader)[i], (i & 7) == 7 ? "\n":"");
+ if ((i & 7) != 0)
+ printf ("\n");
+ }
+ return addr;
+}
+
+/* Dump shader to screen */
+void dump_shader (adapter_t *adapt, uint32_t *shader, int size, char *what)
+{
+ uint32_t *p32 = shader;
+ int plen = size/8, i;
+ printf (" %s shader (len %d):\n", what, plen);
+ for (i = 0; i < plen; i++, p32 += 2)
+ printf (" %08x %08x%s", p32[0], p32[1], (i & 3) == 3 ? "\n":"");
+ if ((i & 3) != 0)
+ printf ("\n");
+}
+
+
+void soft_reset (adapter_t *adapt)
+{
+ uint32_t cpptr, cpmecntl, cprbcntl;
+
+ printf ("\n* Resetting\n");
+
+ cpptr = reg_read32 (CP_RB_WPTR); /* propagated by drm, so we mustn't mess it up */
+ cpmecntl = reg_read32 (CP_ME_CNTL);
+ reg_write32 (CP_ME_CNTL, 0x10000000);
+
+ reg_write32 (GRBM_SOFT_RESET, 0x7fff);
+ reg_read32 (GRBM_SOFT_RESET);
+ usleep (50);
+ reg_write32 (GRBM_SOFT_RESET, 0);
+ reg_read32 (GRBM_SOFT_RESET);
+
+ reg_write32 (CP_RB_WPTR_DELAY, 0);
+ cprbcntl = reg_read32 (CP_RB_CNTL);
+ reg_write32 (CP_RB_CNTL, 0x80000000);
+
+ reg_write32 (CP_RB_RPTR_WR, cpptr); /* Reset CP position to last known */
+ reg_write32 (CP_RB_WPTR, cpptr);
+ reg_write32 (CP_RB_CNTL, cprbcntl);
+ reg_write32 (CP_ME_CNTL, cpmecntl);
+
+ show_state (adapt);
+}
+
+
+void show_state (adapter_t *adapt)
+{
+ uint32_t reg;
+
+#define STATE_TEST(n,x,t) do { if (n (reg & x ## _bit)) printf (" %-20s: %s\n", #n #x, t); } while (0)
+#define STATE_TESTi(n,x,t) do { if (n (reg & x ## _bit)) printf (" %-20s: %s #%d\n", #n #x, t, i); } while (0)
+
+ printf ("\n* Current GPU state\n\n");
+
+ if (verbose >= 1)
+ printf (" CP Read Ptr: 0x%04x CP Write Ptr: 0x%04x\n"
+ " MEQ Read Ptr: 0x%04x MEQ Write Ptr: 0x%04x\n"
+ " ROQ Read Ptr: 0x%04x ROQ Write Ptr: 0x%04x\n"
+ " ROQ IB1 Read Ptr: 0x%04x ROQ IB1 Write Ptr: 0x%04x\n"
+ " CMDFIFO_AVAIL (0-16): %d\n"
+ " ME STATUS: 0x%02x\n"
+ ,
+ reg_read32 (CP_RB_RPTR), reg_read32 (CP_RB_WPTR),
+ reg_read32 (CP_MEQ_STAT) & MEQ_RPTR_mask, (reg_read32 (CP_MEQ_STAT) & MEQ_WPTR_mask) >> MEQ_WPTR_shift,
+ reg_read32 (CP_ROQ_RB_STAT) & ROQ_RPTR_PRIMARY_mask, (reg_read32 (CP_ROQ_RB_STAT) & ROQ_WPTR_PRIMARY_mask) >> ROQ_WPTR_PRIMARY_shift,
+ reg_read32 (CP_ROQ_IB1_STAT) & ROQ_RPTR_INDIRECT1_mask, (reg_read32 (CP_ROQ_IB1_STAT) & ROQ_WPTR_INDIRECT1_mask) >> ROQ_WPTR_INDIRECT1_shift,
+ reg_read32 (GRBM_STATUS) & CMDFIFO_AVAIL_mask, reg_read32 (CP_ME_STATUS));
+
+ if (verbose >= 1)
+ printf (" GRBM_STATUS*:\n");
+ reg = reg_read32 (GRBM_STATUS);
+// Always on due to GRBM_STATUS request from CPU
+// STATE_TEST ( , SRBM_RQ_PENDING, "System Register Backbone request pending in GRBM");
+/// STATE_TEST ( , CP_RQ_PENDING, "Non-queued request from the CP pending in GRBM");
+ STATE_TEST ( , CF_RQ_PENDING, "Queued request from the CP pending in GRBM");
+ STATE_TEST ( , PF_RQ_PENDING, "Request from the CP`s Pre-Fetch Parser pending in GRBM");
+ STATE_TEST ( , GRBM_EE_BUSY, "GRBM Event Engine is busy");
+/// STATE_TEST ( , GRBM_STATUS__VC_BUSY, "Vertex Cache is busy");
+ STATE_TEST (!, DB03_CLEAN, "At least one of Depth Blocks (DB<0-3>) is not clean");
+ STATE_TEST (!, CB03_CLEAN, "At least one of Color Blocks (CB<0-3>) is not clean");
+ STATE_TEST ( , VGT_BUSY_NO_DMA, "VGT (excluding DMA) is busy");
+ STATE_TEST ( , GRBM_STATUS__VGT_BUSY, "Vertex Grouper Tessellator is busy");
+ if (adapt->chipset <= CHIPSET_RV670) {
+ STATE_TEST ( , TA03_BUSY, "At least one of Texture Pipes is busy");
+ STATE_TEST ( , GRBM_STATUS__TC_BUSY, "Texture Cache is busy");
+ } else {
+ STATE_TEST ( , R7XX_TA_BUSY, "At least one of Texture Pipes is busy");
+ }
+ STATE_TEST ( , SX_BUSY, "Shader Export is busy");
+ STATE_TEST ( , SH_BUSY, "Sequencer is busy");
+ STATE_TEST ( , SPI03_BUSY, "Any of the SPI`s are busy");
+ if (adapt->chipset <= CHIPSET_RV670) {
+ STATE_TEST ( , SMX_BUSY, "Shader Memory Export is busy");
+ }
+ STATE_TEST ( , SC_BUSY, "Scan Converter is busy");
+ STATE_TEST ( , PA_BUSY, "Primitive Assembly is busy");
+ STATE_TEST ( , DB03_BUSY, "Any of the Depth Blocks (DB<0-3>) are busy");
+ if (adapt->chipset <= CHIPSET_RV670) {
+ STATE_TEST ( , CR_BUSY, "Compse Rect block is busy");
+ }
+ STATE_TEST ( , CP_COHERENCY_BUSY, "Surface Coherency Logic is busy");
+ STATE_TEST ( , GRBM_STATUS__CP_BUSY, "Command Processor is busy");
+ STATE_TEST ( , CB03_BUSY, "Any of the Color Blocks (CB<0-3>) are busy");
+ STATE_TEST ( , GUI_ACTIVE, "Graphics Pipe is busy. This includes whether the CP and GRBM also have transactions");
+
+ reg = reg_read32 (GRBM_STATUS2);
+ if (adapt->chipset <= CHIPSET_RV670) {
+ STATE_TEST (!, CR_CLEAN, "Compose Rect block is not clean");
+ }
+ STATE_TEST (!, SMX_CLEAN, "Shader Memory Export is not clean");
+ STATE_TEST ( , SPI0_BUSY, "Shader Pipe Interpolator (SPI0) is busy");
+ STATE_TEST ( , SPI1_BUSY, "Shader Pipe Interpolator (SPI1) is busy");
+ STATE_TEST ( , SPI2_BUSY, "Shader Pipe Interpolator (SPI2) is busy");
+ STATE_TEST ( , SPI3_BUSY, "Shader Pipe Interpolator (SPI3) is busy");
+ STATE_TEST ( , TA0_BUSY, "Texture Pipe (TA0) is busy");
+ STATE_TEST ( , TA1_BUSY, "Texture Pipe (TA1) is busy");
+ STATE_TEST ( , TA2_BUSY, "Texture Pipe (TA2) is busy");
+ STATE_TEST ( , TA3_BUSY, "Texture Pipe (TA3) is busy");
+ STATE_TEST ( , DB0_BUSY, "Depth Block (DB0) is busy");
+ STATE_TEST ( , DB1_BUSY, "Depth Block (DB1) is busy");
+ STATE_TEST ( , DB2_BUSY, "Depth Block (DB2) is busy");
+ STATE_TEST ( , DB3_BUSY, "Depth Block (DB3) is busy");
+ STATE_TEST ( , CB0_BUSY, "Color Block (CB0) is busy");
+ STATE_TEST ( , CB1_BUSY, "Color Block (CB1) is busy");
+ STATE_TEST ( , CB2_BUSY, "Color Block (CB2) is busy");
+ STATE_TEST ( , CB3_BUSY, "Color Block (CB3) is busy");
+
+ if (verbose >= 1)
+ printf (" SRBM_STATUS:\n");
+ reg = reg_read32 (SRBM_STATUS);
+ STATE_TEST ( , RLC_RQ_PENDING , "There is an RLC request pending in the SRBM");
+ STATE_TEST ( , RCU_RQ_PENDING , "There is a RCU request pending in the SRBM");
+ STATE_TEST ( , GRBM_RQ_PENDING , "There is a GRBM request pending in the SRBM");
+// Always on due to SRBM_STATUS request from CPU?
+// STATE_TEST ( , HI_RQ_PENDING , "There is a HI/BIF request pending in the SRBM");
+// Always on due to SRBM_STATUS request from CPU?
+// STATE_TEST ( , IO_EXTERN_SIGNAL , "External IO Signal unconditioned");
+ STATE_TEST ( , VMC_BUSY , "Memory Controller Block VMC is Busy");
+ STATE_TEST ( , MCB_BUSY , "Memory Controller Block MCB is Busy");
+ STATE_TEST ( , MCDZ_BUSY , "Memory Controller Block MCDZ is Busy");
+ STATE_TEST ( , MCDY_BUSY , "Memory Controller Block MCDY is Busy");
+// Always on due to SRBM_STATUS request from CPU?
+// STATE_TEST ( , MCDX_BUSY , "Memory Controller Block MCDX is Busy");
+ STATE_TEST ( , MCDW_BUSY , "Memory Controller Block MCDW is Busy");
+ STATE_TEST ( , SEM_BUSY , "Semaphore Block is Busy");
+ STATE_TEST ( , SRBM_STATUS__RLC_BUSY, "Run List Controller is Busy");
+ STATE_TEST ( , PDMA_BUSY , "Paging DMA Block is Busy");
+ STATE_TEST ( , IH_BUSY , "Interrupt Handler is Busy");
+ STATE_TEST ( , CSC_BUSY , "Context Switch Controller is Busy");
+ STATE_TEST ( , CMC7_BUSY , "Context Memory Client7 is Busy");
+ STATE_TEST ( , CMC6_BUSY , "Context Memory Client6 is Busy");
+ STATE_TEST ( , CMC5_BUSY , "Context Memory Client5 is Busy");
+ STATE_TEST ( , CMC4_BUSY , "Context Memory Client4 is Busy");
+ STATE_TEST ( , CMC3_BUSY , "Context Memory Client3 is Busy");
+ STATE_TEST ( , CMC2_BUSY , "Context Memory Client2 is Busy");
+ STATE_TEST ( , CMC1_BUSY , "Context Memory Client1 is Busy");
+ STATE_TEST ( , CMC0_BUSY , "Context Memory Client0 is Busy");
+// Always on due to SRBM_STATUS request from CPU?
+// STATE_TEST ( , BIF_BUSY , "Bus Interface is Busy");
+ STATE_TEST ( , IDCT_BUSY , "IDCT is Busy");
+
+ if (verbose >= 1)
+ printf (" CP_STAT:\n");
+ reg = reg_read32 (CP_STAT);
+ STATE_TEST ( , CSF_RING_BUSY , "The Ring fetcher still has command buffer data to fetch or the PFP still has data left to process from the reorder queue");
+ STATE_TEST ( , CSF_WPTR_POLL_BUSY, "The Write Pointer polling logic is still polling for an updated write pointer");
+ STATE_TEST ( , CSF_INDIRECT1_BUSY, "The Indirect1 fetcher still has command buffer data to fetch or the PFP still has data left to process from the reorder queue");
+ STATE_TEST ( , CSF_INDIRECT2_BUSY, "The Indirect2 fetcher still has command buffer data to fetch or the PFP still has data left to process from the reorder queue");
+ STATE_TEST ( , CSF_STATE_BUSY , "The State fetcher still has command buffer data to fetch or the PFP still has data left to process from the reorder queue");
+ STATE_TEST ( , CSF_PREDICATE_BUSY, "The Predicate fetcher still has command buffer data to fetch or the PFP still has data left to process from the reorder queue");
+ STATE_TEST ( , CSF_BUSY , "The input FIFOs have command buffers to be fetched, or one or more of the fetchers are busy, or the arbiter has a request to send to the MIU");
+ STATE_TEST ( , MIU_RDREQ_BUSY , "The read path logic inside the MIU is busy");
+ STATE_TEST ( , MIU_WRREQ_BUSY , "The write path logic inside the MIU is busy");
+ STATE_TEST ( , ROQ_RING_BUSY , "The Ring portion of the reorder queue has valid data in it");
+ STATE_TEST ( , ROQ_INDIRECT1_BUSY, "The Indirect1 portion of the reorder queue has valid data in it");
+ STATE_TEST ( , ROQ_INDIRECT2_BUSY, "The RiIndirect2 portion of the reorder queue has valid data in it");
+ STATE_TEST ( , ROQ_STATE_BUSY , "The State data portion of the reorder queue has valid data in it");
+ STATE_TEST ( , ROQ_PREDICATE_BUSY, "The Predicate data portion of the reorder queue has valid data in it");
+ STATE_TEST ( , ROQ_ALIGN_BUSY , "The alignment logic inside the reoder queue is busy");
+ STATE_TEST ( , PFP_BUSY , "The Prefetch parser is busy processing PM4 packets");
+ STATE_TEST ( , MEQ_BUSY , "The PFP-to-ME queue has valid data in it");
+ STATE_TEST ( , ME_BUSY , "The MicroEngine is busy processing PM4 packets");
+ STATE_TEST ( , QUERY_BUSY , "The Query unit is busy");
+ STATE_TEST ( , SEMAPHORE_BUSY , "The Semaphore unit is busy");
+ STATE_TEST ( , INTERRUPT_BUSY , "The Interrupt unit is busy");
+ STATE_TEST ( , SURFACE_SYNC_BUSY , "The Surface Sync unit is busy");
+ STATE_TEST ( , DMA_BUSY , "The DMA is busy");
+ STATE_TEST ( , RCIU_BUSY , "The RCIU has pending read or write transactions to the GRBM");
+ STATE_TEST ( , CP_STAT__CP_BUSY , "Any block in the CP is busy");
+
+ if (verbose >= 1)
+ printf (" CP_BUSY_STAT:\n");
+ reg = reg_read32 (CP_BUSY_STAT);
+ STATE_TEST ( , REG_BUS_FIFO_BUSY , "There is a register bus transaction for the CP in the input FIFO");
+ STATE_TEST ( , RING_FETCHING_DATA , "There is still data to be fetched from the Ring Buffer");
+ STATE_TEST ( , INDR1_FETCHING_DATA , "There is still data to be fetched from the Indirect1 Command Buffer");
+ STATE_TEST ( , INDR2_FETCHING_DATA , "There is still data to be fetched from the Indirect2 Command Buffer");
+ STATE_TEST ( , STATE_FETCHING_DATA , "There is still Renderstate or Constant data to be fetched");
+ STATE_TEST ( , PRED_FETCHING_DATA , "There is still Predicate data to be fetched");
+ STATE_TEST ( , COHER_CNTR_NEQ_ZERO , "The Coherency Counter for managing Load & Set packet interaction is not zero");
+ STATE_TEST ( , PFP_PARSING_PACKETS , "The PFP`s packet count is not zero and it is therefore parsing a packet");
+ STATE_TEST ( , ME_PARSING_PACKETS , "The ME`s packet count is not zero and it is therefore parsing a packet");
+ STATE_TEST ( , RCIU_PFP_BUSY , "The PFP path in the RCIU has data to send");
+ STATE_TEST ( , RCIU_ME_BUSY , "The ME path in the RCIU has data to send");
+ STATE_TEST ( , OUTSTANDING_READ_TAGS , "The MIU is waiting for read request data to be returned from the MC");
+ STATE_TEST ( , SEM_CMDFIFO_NOT_EMPTY , "There is either Wait or Signal Semaphore commands waiting to be sent");
+ STATE_TEST ( , SEM_FAILED_AND_HOLDING, "The Wait Semaphore failed to pass within the programmed clock cycles and is holding for a context switch");
+ STATE_TEST ( , SEM_POLLING_FOR_PASS , "The Semaphore Unit is currently polling, waiting for its semaphore to get a Pass response");
+ if (adapt->chipset > CHIPSET_RV670) /* Seems to always be set on R6xx with activated 3D context */
+ STATE_TEST ( , _3D_BUSY , "There is at least one context allocated for 3D processing");
+ STATE_TEST ( , _2D_BUSY , "There is at least one context allocated for 2D processing");
+
+ if (verbose >= 1)
+ printf (" CP_INT_STATUS:\n");
+ reg = reg_read32 (CP_INT_STATUS);
+ STATE_TEST ( , DISABLE_CNTX_SWITCH_INT_STAT, "Interrupt Status for disabling context switching in the RLC. Write: 0 - No affect. 1 - Set Interrupt Status Bit. Default = 0");
+ STATE_TEST ( , ENABLE_CNTX_SWITCH_INT_STAT , "Interrupt Status for enabling context switching in the RLC (see bit 0 for definition)");
+ STATE_TEST ( , SEM_SIGNAL_INT_STAT , "Interrupt Status for Memory Semaphore Signal Interrupt (See bit 31 for definition). Set when CP performs a Signal that increments the memory semaphore from 0 to 1. This informs the scheduler that a consumer process can be scheduled and its WAIT is guaranteed to Pass. Only Valid for RV670");
+ STATE_TEST ( , CNTX_BUSY_INT_STAT , "Interrupt Status for Context Busy (!GUI Idle) Interrupt (See bit 31 for definition)");
+ STATE_TEST ( , CNTX_EMPTY_INT_STAT , "Interrupt Status for Context Empty (GUI Idle) Interrupt (See bit 31 for definition)");
+ STATE_TEST ( , WAITMEM_SEM_INT_STAT , "Interrupt Status for Wait Memory Semaphore Fault (See bit 31 for definition)");
+ STATE_TEST ( , PRIV_INSTR_INT_STAT , "Interrupt Status for Privileged Instruction Fault (See bit 31 for definition)");
+ STATE_TEST ( , PRIV_REG_INT_STAT , "Interrupt Status for Privileged Register Fault (See bit 31 for definition)");
+ STATE_TEST ( , OPCODE_ERROR_INT_STAT , "Interrupt Status for the Opcode Error (See bit 31 for definition)");
+ STATE_TEST ( , SCRATCH_INT_STAT , "Interrupt Status for the Scratch Register Compare Interrupt (See bit 31 for definition)");
+ STATE_TEST ( , TIME_STAMP_INT_STAT , "Interrupt Status for the Time Stamp Interrupt (See bit 31 for definition)");
+ STATE_TEST ( , RESERVED_BIT_ERROR_INT_STAT , "Interrupt Status for Reserved bits != 0 Error (non- functional mode) (See bit 31 for definition)");
+ STATE_TEST ( , DMA_INT_STAT , "Interrupt Status for the DMA Engine (See bit 31 for definition)");
+ STATE_TEST ( , IB2_INT_STAT , "Interrupt Status for CP_INTERRUPT packet in IB2 Stream (See bit 31 for definition)");
+ STATE_TEST ( , IB1_INT_STAT , "Interrupt Status for CP_INTERRUPT packet in IB1 Stream (See bit 31 for definition)");
+ STATE_TEST ( , RB_INT_STAT , "Interrupt Status for CP_INTERRUPT packet in Ring Buffer. Write: 0 - No affect. 1 - Set Interrupt Status Bit. Default = 0");
+
+ if (verbose >= 1)
+ printf (" CP_STALLED_STAT*:\n");
+ reg = reg_read32 (CP_STALLED_STAT1);
+ STATE_TEST ( , RBIU_TO_DMA_NOT_RDY_TO_RCV , "The RBIU cannot write to the DTAQ or SRC, DST, CMD registers");
+ STATE_TEST ( , RBIU_TO_IBS_NOT_RDY_TO_RCV , "The RBIU cannot write to the Indirect Buffer Base & Size registers");
+ STATE_TEST ( , RBIU_TO_SEM_NOT_RDY_TO_RCV , "The RBIU cannot write to the Semaphore units address FIFO");
+ STATE_TEST ( , RBIU_TO_2DREGS_NOT_RDY_TO_RCV, "The RBIU cannot write to the 2D registers in the Scratch RAM");
+ STATE_TEST ( , RBIU_TO_MEMWR_NOT_RDY_TO_RCV , "The RBIU cannot write to the Address & Data FIFOs to initiate a write to memory");
+ STATE_TEST ( , RBIU_TO_MEMRD_NOT_RDY_TO_RCV , "The RBIU cannot write to the Address & Data registers to initiate a read from memory");
+ STATE_TEST ( , RBIU_TO_EOPD_NOT_RDY_TO_RCV , "The RBIU cannot write to the EOP Done FIFO");
+ STATE_TEST ( , RBIU_TO_RECT_NOT_RDY_TO_RCV , "The RBIU cannot write to the Rect Done FIFO");
+ STATE_TEST ( , RBIU_TO_STRMO_NOT_RDY_TO_RCV , "The RBIU cannot write to the Stream Out address FIFO");
+ STATE_TEST ( , RBIU_TO_PSTAT_NOT_RDY_TO_RCV , "The RBIU cannot write to the Pipeline Statistics address FIFO");
+ STATE_TEST ( , MIU_WAITING_ON_RDREQ_FREE , "The MIU read request logic is stalled waiting for Free signals to come back from the MC");
+ STATE_TEST ( , MIU_WAITING_ON_WRREQ_FREE , "The MIU write request logic is stalled waiting for Free signals to come back from the MC");
+ STATE_TEST ( , MIU_NEEDS_AVAIL_WRREQ_PHASE , "The MIU write request logic is stalled waiting for an available phase for write confirmations");
+ STATE_TEST ( , RCIU_WAITING_ON_GRBM_FREE , "The RCIU is stalled waiting for Free signals to come back from the GRBM");
+ STATE_TEST ( , RCIU_WAITING_ON_VGT_FREE , "The RCIU is stalled waiting for VGT Free signals to come back from the GRBM");
+ STATE_TEST ( , RCIU_STALLED_ON_ME_READ , "The RCIU is stalled waiting for register read data to come back for the ME");
+ STATE_TEST ( , RCIU_STALLED_ON_DMA_READ , "The RCIU is stalled waiting for register read data to come back for the DMA");
+ STATE_TEST ( , RCIU_HALTED_BY_REG_VIOLATION , "The RCIU has been halted because of a register violation (a write to a privileged register from a non- privileged command buffer");
+
+ reg = reg_read32 (CP_STALLED_STAT2);
+ STATE_TEST ( , PFP_TO_CSF_NOT_RDY_TO_RCV , "The PFP is stalled trying to write to the I1, I2, State or Predicate FIFOs");
+ STATE_TEST ( , PFP_TO_MEQ_NOT_RDY_TO_RCV , "The PFP is stalled trying to write to the MEQ");
+ STATE_TEST ( , PFP_TO_VGT_NOT_RDY_TO_RCV , "The PFP is stalled trying to write to the VGT");
+ STATE_TEST ( , PFP_HALTED_BY_INSTR_VIOLATION, "The PFP has been halted because of an instruction violation (Priv IB called from a non-priv IB)");
+ STATE_TEST ( , MULTIPASS_IB_PENDING_IN_PFP , "There is a multipass IB pending in the PFP and it is therefore waiting for the Loop or Continue command from the SC");
+ STATE_TEST ( , ME_BRUSH_WC_NOT_RDY_TO_RCV , "There are 32 pending Brush writes with write confirms pending and the ME is stalled trying to write another Brush write confirm");
+ STATE_TEST ( , ME_STALLED_ON_BRUSH_LOGIC , "The ME is either stalled trying to initiate the Brush logic which is currently active or it is stalled waiting for the Brush logic to finish generating writes before sending another write to the RCIU");
+ STATE_TEST ( , CR_CNTX_NOT_AVAIL_TO_ME , "All CR contexts are active and the ME is stalled waiting for a free CR context");
+ STATE_TEST ( , GFX_CNTX_NOT_AVAIL_TO_ME , "All GFX contexts are active and the ME is stalled waiting for a free GFX contexts");
+ STATE_TEST ( , ME_RCIU_NOT_RDY_TO_RCV , "The ME is stalled trying to write to the RCIU");
+ STATE_TEST ( , ME_TO_CONST_NOT_RDY_TO_RCV , "The ME is stalled trying to write down its Constant path");
+ if (adapt->chipset <= CHIPSET_RV670) {
+ // On if CP is idle?
+ // STATE_TEST ( , ME_WAITING_DATA_FROM_PFP , "The ME is expecting data from the PFP, but there is none in the PFP-to-ME queue");
+ STATE_TEST (!, ME_WAITING_DATA_FROM_PFP , "The ME is expecting data from the PFP, and there is some in the PFP-to-ME queue. Indicates CP is not idle.");
+ }
+ STATE_TEST ( , ME_WAITING_ON_PARTIAL_FLUSH , "The ME sent out a Partial Flush event and is waiting for a response from the SPI (SPI_CP_partial_flush)");
+ STATE_TEST ( , RECT_FIFO_NEEDS_CR_RECT_DONE , "The Rect Done FIFO has data to send, but it is waiting for a CR_RECT_DONE pulse");
+ STATE_TEST ( , RECT_FIFO_NEEDS_WR_CONFIRM , "The Rect Done FIFO is waiting for a write confirm before popping the FIFO");
+ STATE_TEST ( , EOPD_FIFO_NEEDS_SC_EOP_DONE , "The Eop Done FIFO has data to send, but it is waiting for a SC_EOP_DONE pulse");
+ STATE_TEST ( , EOPD_FIFO_NEEDS_SMX_EOP_DONE , "The Eop Done FIFO has data to send, but it is waiting for a SMX_EOP_DONE pulse");
+ STATE_TEST ( , EOPD_FIFO_NEEDS_WR_CONFIRM , "The Eop Done FIFO is waiting for a write confirm before popping the FIFO");
+ STATE_TEST ( , EOPD_FIFO_NEEDS_SIGNAL_SEM , "The Eop Done FIFO is waiting for Signal Sem results before popping the FIFO");
+ STATE_TEST ( , SO_NUMPRIM_FIFO_NEEDS_SOADDR , "The Stream Out FIFOs have the Num_Prim data and are waiting for an address");
+ STATE_TEST ( , SO_NUMPRIM_FIFO_NEEDS_NUMPRIM, "The Stream Out FIFOs have the address are are waiting for Num_Prim data");
+ STATE_TEST ( , PIPE_STATS_FIFO_NEEDS_SAMPLE , "The Pipeline Statistics FIFOs are waiting for a Sample signal from one of the reporting blocks");
+ STATE_TEST ( , SURF_SYNC_NEEDS_IDLE_CNTXS , "The Surface Sync logic is waiting for all active contexts to go idle");
+ STATE_TEST ( , SURF_SYNC_NEEDS_ALL_CLEAN , "The Surface Sync logic is waiting for all of the Clean signals to be asserted");
+
+ if (verbose >= 1)
+ printf (" VGT_CNTL_STATUS:\n");
+ reg = reg_read32 (VGT_CNTL_STATUS);
+ STATE_TEST ( , VGT_OUT_INDX_BUSY , "If set, the Output Index block within the VGT is busy");
+ STATE_TEST ( , VGT_OUT_BUSY , "If set, the Output block within the VGT is busy");
+ STATE_TEST ( , VGT_PT_BUSY , "If set, the Pass-thru block within the VGT is busy");
+ STATE_TEST ( , VGT_TE_BUSY , "If set, the Tessellation Engine block within the VGT is busy");
+ STATE_TEST ( , VGT_VR_BUSY , "If set, the Vertex Reuse Block within the VGT is busy");
+ STATE_TEST ( , VGT_GRP_BUSY , "If set, the Grouper Block within the VGT is busy");
+ STATE_TEST ( , VGT_DMA_REQ_BUSY , "If set, the VGT DMA is busy requesting");
+ STATE_TEST ( , VGT_DMA_BUSY , "If set, the VGT DMA is busy");
+ STATE_TEST ( , VGT_GS_BUSY , "If set, VGT GS is actively processing");
+ STATE_TEST ( , VGT_BUSY , "If set, VGT is Busy");
+
+ if (verbose >= 1)
+ printf (" Vertex + Texture:\n");
+ if (adapt->chipset >= CHIPSET_RV630) {
+ reg = reg_read32 (VC_CNTL_STATUS);
+ STATE_TEST ( , RP_BUSY, "Vertex Cache Request Processor is busy");
+ STATE_TEST ( , RG_BUSY, "Vertex Cache Request Generator is busy");
+ STATE_TEST ( , VC_BUSY, "Vertex Cache is busy");
+ reg = reg_read32 (TC_STATUS);
+ STATE_TEST ( , TC_BUSY, "Texture Cache is busy");
+ }
+
+ if (adapt->chipset == CHIPSET_R600) {
+ /* Registers apparently not available on RV630 already */
+ int i;
+ for (i = 0; i < TD0_STATUS_num; i++) {
+ reg = reg_read32 (TD0_STATUS + i * 4);
+ STATE_TESTi ( , BUSY, "Texture Data is busy");
+ }
+ for (i = 0; i < 4; i++) {
+ reg = reg_read32 (TA0_STATUS + i*4);
+ STATE_TESTi ( , FG_PFIFO_EMPTYB , "Gradient FIFO state, pipeline fifo not empty");
+ STATE_TESTi ( , FG_LFIFO_EMPTYB , "Gradient FIFO state, latency fifo not empty");
+ STATE_TESTi ( , FG_SFIFO_EMPTYB , "Gradient FIFO state, state fifo not empty");
+ STATE_TESTi ( , FL_PFIFO_EMPTYB , "LOD FIFO state, pipeline fifo not empty");
+ STATE_TESTi ( , FL_LFIFO_EMPTYB , "LOD FIFO state, latency fifo not empty");
+ STATE_TESTi ( , FL_SFIFO_EMPTYB , "LOD FIFO state, state fifo not empty");
+ STATE_TESTi ( , FA_PFIFO_EMPTYB , "Addresser FIFO state, pipeline fifo not empty");
+ STATE_TESTi ( , FA_LFIFO_EMPTYB , "Addresser FIFO state, latency fifo not empty");
+ STATE_TESTi ( , FA_SFIFO_EMPTYB , "Addresser FIFO state, state fifo not empty");
+ STATE_TESTi ( , IN_BUSY , "Input/LOD(Deriv) busy");
+ STATE_TESTi ( , FG_BUSY , "Gradient FIFO busy");
+ STATE_TESTi ( , FL_BUSY , "LOD FIFO busy");
+ STATE_TESTi ( , TA_BUSY , "Addresser busy");
+ STATE_TESTi ( , FA_BUSY , "Addresser FIFO busy");
+ STATE_TESTi ( , AL_BUSY , "Aligner busy");
+ STATE_TESTi ( , BUSY , "Global TA busy");
+ }
+ }
+
+ if (verbose >= 1)
+ printf (" Various:\n");
+ reg = reg_read32 (PA_CL_CNTL_STATUS);
+ STATE_TEST ( , CL_BUSY, "PA Clipping Unit is busy");
+ reg = reg_read32 (PA_SU_CNTL_STATUS);
+ STATE_TEST ( , SU_BUSY, "PA SU (Rasterizer?) Unit is busy");
+
+ if (verbose >= 1)
+ printf (" Read Errors:\n");
+ reg = reg_read32 (SRBM_READ_ERROR);
+ if (reg & READ_ERROR_bit) {
+ printf (" * SRBM READ ERROR indicated:\n Target Adr 0x%04x by %s%s%s%s\n",
+ (reg & READ_ADDRESS_mask) >> READ_ADDRESS_shift << 2,
+ reg & READ_REQUESTER_HI_bit ? "HI/BIF ":"",
+ reg & READ_REQUESTER_GRBM_bit ? "GRBM ":"",
+ reg & READ_REQUESTER_RCU_bit ? "RCU ":"",
+ reg & READ_REQUESTER_RLC_bit ? "RLC ":"");
+ reg_write32 (SRBM_INT_ACK, 1);
+ reg_write32 (SRBM_INT_STATUS, 1);
+ }
+ reg = reg_read32 (GRBM_READ_ERROR);
+ if (reg & READ_ERROR_bit) {
+ printf (" * GRBM READ ERROR indicated:\n Target Adr 0x%04x by %s%s%s\n",
+ (reg & READ_ADDRESS_mask) >> READ_ADDRESS_shift << 2,
+ reg & READ_REQUESTER_SRBM_bit ? "SRBM ":"",
+ reg & READ_REQUESTER_CP_bit ? "CP ":"",
+ reg & READ_REQUESTER_WU_POLL_bit ? "WU_POLL ":"");
+ reg_write32 (GRBM_READ_ERROR, 0);
+ }
+
+ if (verbose >= 1)
+ printf ("\n");
+}
+
+
+void dump_register (adapter_t *adapt, uint32_t reg)
+{
+ uint32_t val;
+
+#if 0
+ if (reg < 65536) {
+ val = reg_read32 (reg);
+ } else {
+ /* Let the GPU copy the register, because we will not be able to access it directly */
+ reg_write32 (SCRATCH_REG6, 0xdeadbeef);
+ pack3 (IT_COPY_DW, 5);
+ e32 (0x00000000); /* reg to reg copy */
+ e32 (reg>>2);
+ e32 (0);
+ e32 (SCRATCH_REG7>>2);
+ e32 (0);
+ ereg (SCRATCH_REG6, 0xfeedface);
+ flush_cmds ();
+ wait_reg (adapt, SCRATCH_REG6, 0xfeedface, "CP+DMA: PACK3: scratch");
+ val = reg_read32 (SCRATCH_REG7);
+ }
+#else
+ if (reg_read32 (SRBM_READ_ERROR) & READ_ERROR_bit) {
+ printf (" * uncleared SRBM READ ERROR by previous call\n");
+ reg_write32 (SRBM_INT_ACK, 1);
+ reg_write32 (SRBM_INT_STATUS, 1);
+ }
+ if (reg_read32 (GRBM_READ_ERROR) & READ_ERROR_bit) {
+ printf (" * uncleared GRBM READ ERROR by previous call\n");
+ reg_write32 (GRBM_READ_ERROR, 0);
+ }
+ reg_write32 (MM_INDEX, reg);
+ val = reg_read32 (MM_DATA);
+ if (reg_read32 (SRBM_READ_ERROR) & READ_ERROR_bit) {
+ printf (" %08x (# %04x) := not accessible (SRBM)\n", reg, reg >> 2);
+ reg_write32 (SRBM_INT_ACK, 1);
+ reg_write32 (SRBM_INT_STATUS, 1);
+ return;
+ }
+ if (reg_read32 (GRBM_READ_ERROR) & READ_ERROR_bit) {
+ printf (" %08x (# %04x) := not accessible (GRBM)\n", reg, reg >> 2);
+ reg_write32 (GRBM_READ_ERROR, 0);
+ return;
+ }
+#endif
+
+ printf (" %08x (# %04x) := %08x\n", reg, reg >> 2, val);
+}
+
+
+void write_register (adapter_t *adapt, uint32_t reg, uint32_t val)
+{
+ uint32_t oldval;
+
+ if (reg_read32 (SRBM_READ_ERROR) & READ_ERROR_bit) {
+ printf (" * uncleared SRBM READ ERROR by previous call\n");
+ reg_write32 (SRBM_INT_ACK, 1);
+ reg_write32 (SRBM_INT_STATUS, 1);
+ }
+ if (reg_read32 (GRBM_READ_ERROR) & READ_ERROR_bit) {
+ printf (" * uncleared GRBM READ ERROR by previous call\n");
+ reg_write32 (GRBM_READ_ERROR, 0);
+ }
+ reg_write32 (MM_INDEX, reg);
+ oldval = reg_read32 (MM_DATA);
+ reg_write32 (MM_DATA, val);
+ if (reg_read32 (SRBM_READ_ERROR) & READ_ERROR_bit) {
+ printf (" %08x (# %04x) := not accessible (SRBM)\n", reg, reg >> 2);
+ reg_write32 (SRBM_INT_ACK, 1);
+ reg_write32 (SRBM_INT_STATUS, 1);
+ return;
+ }
+ if (reg_read32 (GRBM_READ_ERROR) & READ_ERROR_bit) {
+ printf (" %08x (# %04x) := not accessible (GRBM)\n", reg, reg >> 2);
+ reg_write32 (GRBM_READ_ERROR, 0);
+ return;
+ }
+
+ printf (" %08x (# %04x) set to %08x (old %08x)\n", reg, reg >> 2, val, oldval);
+}
+
+
diff --git a/r600_lib.h b/r600_lib.h
new file mode 100644
index 0000000..88041d1
--- /dev/null
+++ b/r600_lib.h
@@ -0,0 +1,140 @@
+/*
+ * r600_demo
+ *
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Based on r300_demo,
+ * Copyright (C) various authors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __R600_LIB_H__
+#define __R600_LIB_H__
+
+/*
+ * Helper routines, to be split up
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+
+#if __SIZEOF_LONG__ == 8
+# define PRINTF_INT64 "%ld"
+# define PRINTF_UINT64 "%lu"
+# define PRINTF_UINT64_HEX "%010lx" /* Yes, 64bit need 16 characters, but typically 10 are enough (mostly addresses) */
+#else
+# define PRINTF_INT64 "%lld"
+# define PRINTF_UINT64 "%llu"
+# define PRINTF_UINT64_HEX "%010llx"
+#endif
+
+
+#define CLEAR(x) memset (&x, 0, sizeof(x))
+
+
+/* Options */
+extern int verbose;
+extern int use_ring_directly;
+extern int vertex_format;
+extern int vertex_load_as_int;
+extern int vertex_unsigned;
+extern int vertex_alu_scale_override;
+extern int do_not_flush;
+
+
+enum chipset_e {
+ CHIPSET_NONE = 0,
+ CHIPSET_R600,
+ CHIPSET_RV610, CHIPSET_RV620, CHIPSET_RS780, // no VC
+ CHIPSET_RV630, CHIPSET_RV635, CHIPSET_RV670,
+ CHIPSET_RV770, CHIPSET_RV710, CHIPSET_RV730
+};
+
+/* TODO: Should go to _hwapi.h, probably renamed context_t */
+typedef struct {
+ int chipID;
+ enum chipset_e chipset;
+
+ void *framebuffer;
+ uint64_t framebuffer_gpu;
+
+ void *display;
+ uint64_t display_gpu;
+ uint32_t display_pitch;
+ uint32_t display_width;
+ uint32_t display_height;
+
+ uint64_t color_gpu;
+ uint32_t color_pitch;
+ uint32_t color_height;
+ uint64_t depth_gpu;
+ uint32_t depth_pitch;
+ uint32_t depth_height;
+} adapter_t;
+
+/* TODO: Should go to _hwapi.h, probably moved to adapter_t / context_t */
+/* Buffers of at least 64000 bytes, for vertices and textures */
+/* should be allocated in garttex, but due to bugs in DRM currently in buffer space */
+/* pointers are CPU space, uint64 values GPU space */
+extern uint32_t *vtx, *tex;
+extern uint64_t vtx_gpu, tex_gpu;
+
+
+void flush_gpu_input_cache (void);
+void flush_gpu_output_cache (void);
+
+uint64_t upload (adapter_t *adapt, void *shader, int size, int offset);
+void dump_shader (adapter_t *adapt, uint32_t *shader, int size, char *what);
+
+/* FIXME: naming: this waits on the CPU, while the others let the CP wait only */
+void wait_reg (adapter_t *adapt, uint32_t reg, uint32_t v, const char *when);
+
+void wait_3d_idle_clean(void);
+void wait_3d_idle(void);
+
+void simple_clear (adapter_t* adapt);
+
+void soft_reset (adapter_t *adapt);
+void show_state (adapter_t *adapt);
+
+void dump_register (adapter_t *adapt, uint32_t reg);
+void write_register (adapter_t *adapt, uint32_t reg, uint32_t val);
+
+
+/* Demos functions */
+/* r600_basic.c : */
+void test_cp (adapter_t *);
+void test_packets (adapter_t *);
+/* r600_triangles.c : */
+void tri_test_2d (adapter_t *);
+void tri_test_3d (adapter_t *);
+void pm4play_tri_test (adapter_t *);
+void pm4play_clear_test (adapter_t *);
+void pm4play_blit_test (adapter_t *);
+/* r600_texture.c : */
+void quad_test_tex_scaled(adapter_t *);
+void tmp_test (adapter_t *);
+/* r600_exa.c : */
+void test_solid(adapter_t *adapt);
+void test_copy(adapter_t *adapt);
+
+
+#endif
diff --git a/r600_pm4.c b/r600_pm4.c
new file mode 100644
index 0000000..3799786
--- /dev/null
+++ b/r600_pm4.c
@@ -0,0 +1,5699 @@
+/*
+ * RadeonHD R6xx, R7xx DRI driver
+ *
+ * Copyright (C) 2008-2009 Alexander Deucher
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * PM4 tests, captured + cleaned up from test utilities at ATI
+ * Base for all other cleaned up tests.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "r600_reg.h"
+#include "r600_emit.h"
+#include "r600_lib.h"
+#include "r600_state.h"
+#include "r600_init.h"
+#include "r600_shader.h"
+
+
+/*
+ * Simple triangle test
+ */
+
+// 4 DWs per const
+static float tri_vs_alu_consts[] = {
+ 0.5, 0.0, 2.0, 4.0,
+ 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ 2.0, 0.0, 0.0, -1.0,
+ 0.0, -2.0, 0.0, 1.0,
+ 0.0, 0.0, -1.0, -0.0,
+ 0.0, 0.0, 0.0, 1.0,
+ 0.9999999403953552, 0.0, 0.0, -0.0,
+ 0.0, 0.9999998807907104, 0.0, -0.0,
+ 0.0, 0.0, 0.9999999403953552, -0.0,
+ 0.0, 0.0, 0.0, 1.0,
+};
+
+static float tri_ps_alu_consts[] = {
+ 1.0, 1.0, 1.0, 1.0,
+};
+
+void pm4play_tri_test_r6xx(adapter_t *adapt)
+{
+ uint32_t fs[] = {
+ // CF INST 0
+ CF_DWORD0(ADDR(2)),
+ CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ COUNT(2),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_VTX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ // CF INST 1
+ CF_DWORD0(ADDR(0)),
+ CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ COUNT(0),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ // VTX clause INST 0
+ VTX_DWORD0(VTX_INST(SQ_VTX_INST_SEMANTIC),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(160),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(16)),
+ VTX_DWORD1_SEM(SEMANTIC_ID(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)),
+ VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1)),
+ VTX_DWORD_PAD,
+ // VTX clause INST 1
+ VTX_DWORD0(VTX_INST(SQ_VTX_INST_SEMANTIC),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(160),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(4)),
+ VTX_DWORD1_SEM(SEMANTIC_ID(10),
+ DST_SEL_X(SQ_SEL_Z),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_X),
+ DST_SEL_W(SQ_SEL_W),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_8_8_8_8),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_UNSIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)),
+ VTX_DWORD2(OFFSET(12),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0)),
+ VTX_DWORD_PAD,
+ };
+
+ uint32_t vs[] = {
+ // CF INST 0
+ CF_ALU_DWORD0(ADDR(5),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(0)),
+ CF_ALU_DWORD1(KCACHE_MODE1(0),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ COUNT(8),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ // CF INST 1
+ CF_DWORD0(ADDR(0)),
+ CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ COUNT(0),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_CALL_FS),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0)),
+ // CF INST 2
+ CF_ALU_DWORD0(ADDR(13),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(0)),
+ CF_ALU_DWORD1(KCACHE_MODE1(0),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ COUNT(36),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ // CF INST 3
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ // CF INST 4
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0)),
+ // ALU clause INST 0
+ ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ // ALU clause INST 1
+ ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ // ALU clause INST 2
+ ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ // ALU clause INST 3
+ ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ // ALU clause INST 4
+ ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ // ALU clause INST 5
+ ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ // ALU clause INST 6
+ ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ // ALU clause INST 7
+ ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ // ALU clause INST 0
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(272), //cfile
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(127),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(272), //cfile
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(272), //cfile
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(272),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1)),
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(273),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(273),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)
+ ),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(127),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(273),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(273),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1)),
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(274),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(274),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(274),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(127),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(274),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1)),
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(275),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(275),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(275),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(275),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(127),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1)),
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(268),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(268),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(268),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)
+ ),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_PV),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(268),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(269),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(269),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(269),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(269),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(270),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(270),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(270),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(270),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(271),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(271),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(271),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(271),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ };
+
+ uint32_t ps[] = {
+ // CF INST 0
+ CF_ALU_DWORD0(ADDR(2),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP)),
+ CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ COUNT(4),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ // CF INST 1
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ // ALU clause INST 0
+ ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1)),
+ // ALU clause INST 1
+ ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1)),
+ // ALU clause INST 2
+ ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1)),
+ // ALU clause INST 3
+ ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1)),
+ };
+
+ uint32_t vb[] = {
+ 0x3F000000,0x00000000,0x00000000,0xFFFF0000, // X, Y, Z, ARGB
+ 0x00000000,0x3F800000,0x00000000,0xFF00FF00, // X, Y, Z, ARGB
+ 0x3F800000,0x3F800000,0x00000000,0xFF0000FF, // X, Y, Z, ARGB
+ };
+
+
+ int surface_w = adapt->color_pitch;
+ int surface_h = adapt->color_height;
+ int i;
+ uint64_t vb_addr;
+ uint64_t fs_addr, vs_addr, ps_addr;
+
+ draw_config_t draw_conf;
+ uint32_t indices[3] = { 0, 1, 2 };
+
+
+ vtx_resource_t vtx_res;
+ cb_config_t cb_conf;
+ tex_resource_t tex_res;
+ tex_sampler_t tex_samp;
+ shader_config_t fs_conf, vs_conf, ps_conf;
+
+
+ printf ("\n* PM4 Play Tri Test r6xx\n\n");
+
+ memset(&fs_conf, 0, sizeof(shader_config_t));
+ memset(&vs_conf, 0, sizeof(shader_config_t));
+ memset(&ps_conf, 0, sizeof(shader_config_t));
+ memset(&cb_conf, 0, sizeof(cb_config_t));
+ memset(&vtx_res, 0, sizeof(vtx_resource_t));
+ memset(&tex_res, 0, sizeof(tex_resource_t));
+ memset(&tex_samp, 0, sizeof(tex_sampler_t));
+
+ draw_conf.prim_type = 0x4; // 0x4 triangle list
+ // 0x11 rect list
+ draw_conf.vgt_draw_initiator = 1;
+ draw_conf.num_instances = 1;
+ draw_conf.num_indices = 3;
+ draw_conf.index_type = 0; // 0 = 16 bit, 1= 32 bit indicies
+
+ fs_addr = upload (adapt, fs, sizeof(fs), 0);
+ vs_addr = upload (adapt, vs, sizeof(vs), 4096);
+ ps_addr = upload (adapt, ps, sizeof(ps), 8192);
+ vb_addr = upload (adapt, vb, sizeof(vb), 12288);
+
+ vtx_res.id = SQ_VTX_RESOURCE_fs;
+ vtx_res.vtx_size_dw = 4;
+ vtx_res.vtx_num_entries = 1024;
+ vtx_res.mem_req_size = 1;
+ vtx_res.vb_addr = vb_addr;
+
+ printf("win tri r6xx\n");
+
+ start_3d(adapt);
+
+ set_default_state(adapt);
+
+ // PS resources
+ for (i = 0; i < 16; i++) {
+ tex_res.id = i;
+ tex_res.dst_sel_x = 4;
+ tex_res.dst_sel_y = 4;
+ tex_res.dst_sel_z = 4;
+ tex_res.dst_sel_w = 5;
+ set_tex_resource(adapt, &tex_res);
+ tex_samp.id = i;
+ set_tex_sampler (adapt, &tex_samp);
+ }
+
+ // VS resources
+ for (i = 0; i < 4; i++) {
+ tex_res.id = 160 + i;
+ tex_res.dst_sel_x = 4;
+ tex_res.dst_sel_y = 4;
+ tex_res.dst_sel_z = 4;
+ tex_res.dst_sel_w = 5;
+ set_tex_resource(adapt, &tex_res);
+ tex_samp.id = 18 + i; /* ??? */
+ set_tex_sampler (adapt, &tex_samp);
+ }
+
+ // PS alu constants
+ set_alu_consts(0x3e0 >> 2, sizeof(tri_ps_alu_consts) / SQ_ALU_CONSTANT_offset, tri_ps_alu_consts);
+
+ ereg (CB_TARGET_MASK, 0x0000FFFF);
+ pack0 (CB_CLEAR_RED, 4);
+ e32 (0x00000000);
+ e32 (0x00000000);
+ e32 (0x00000000);
+ e32 (0x00000000);
+
+ ereg (SX_ALPHA_TEST_CONTROL, 0x00000007);
+ ereg (SX_ALPHA_REF, 0x00000000);
+
+ ereg (DB_ALPHA_TO_MASK, 0x0000AA00);
+ ereg (DB_DEPTH_CONTROL, 0x00700700);
+ ereg (DB_STENCILREFMASK, 0x00FFFF00);
+ ereg (DB_STENCILREFMASK_BF, 0x00FFFF00);
+ ereg (DB_DEPTH_CONTROL, 0x00700700);
+ ereg (DB_DEPTH_CONTROL, 0x00700704);
+ ereg (DB_DEPTH_CONTROL, 0x00700774);
+
+ ereg (PA_SU_SC_MODE_CNTL, 0x00000244);
+ ereg (PA_SU_POLY_OFFSET_BACK_OFFSET, 0x00000000);
+ ereg (PA_SU_POLY_OFFSET_FRONT_OFFSET, 0x00000000);
+ ereg (PA_SU_SC_MODE_CNTL, 0x00000244);
+ ereg (PA_SU_POLY_OFFSET_BACK_SCALE, 0x00000000);
+ ereg (PA_SU_POLY_OFFSET_FRONT_SCALE, 0x00000000);
+
+ set_screen_scissor(0, 0, 8192, 8192);
+ set_vport_scissor(0, 0, 0, 8192, 8192);
+ set_generic_scissor(0, 0, 256, 256);
+ set_window_scissor(0, 0, 256, 256);
+
+ ereg (PA_SC_AA_MASK, 0xFFFFFFFF);
+ ereg (PA_SC_MODE_CNTL, 0x00004010);
+ ereg (PA_SC_AA_CONFIG, 0x00000000);
+
+ ereg (PA_CL_VTE_CNTL, (VPORT_X_SCALE_ENA_bit |
+ VPORT_X_OFFSET_ENA_bit |
+ VPORT_Y_SCALE_ENA_bit |
+ VPORT_Y_OFFSET_ENA_bit |
+ VPORT_Z_SCALE_ENA_bit |
+ VPORT_Z_OFFSET_ENA_bit |
+ //VTX_XY_FMT_bit |
+ //VTX_Z_FMT_bit |
+ VTX_W0_FMT_bit |
+ 0));
+
+ set_viewport(adapt, 256, 256, 0);
+
+ pack0 (PA_CL_GB_VERT_CLIP_ADJ, 4);
+ efloat (3.0f);
+ efloat (1.0f);
+ efloat (3.0f);
+ efloat (1.0f);
+ ereg (PA_CL_CLIP_CNTL, 0x00000000);
+
+ ereg (SPI_FOG_CNTL, 0x00000000);
+ ereg (SPI_INPUT_Z, 0x00000000);
+
+ pack0 (SPI_VS_OUT_ID_0, 10);
+ e32 (0xFFFFFF0A);
+ for (i = 1; i < 10; i++)
+ e32 (0xFFFFFFFF);
+
+ pack0 (CB_FOG_RED, 3);
+ e32 (0x00000000);
+ e32 (0x00000000);
+ e32 (0x00000000);
+
+ ereg (SPI_FOG_CNTL, 0x00000000);
+ ereg (SPI_INPUT_Z, 0x00000000);
+
+ ereg (PA_SU_POINT_SIZE, 0x00080008);
+ ereg (PA_SU_POINT_MINMAX, 0x08000008);
+ ereg (PA_CL_CLIP_CNTL, 0x00080000);
+
+ ereg (VGT_MAX_VTX_INDX, draw_conf.num_indices);
+ ereg (VGT_MIN_VTX_INDX, 0);
+ ereg (VGT_INDX_OFFSET, 0x00000000);
+
+ set_vtx_resource(adapt, &vtx_res);
+
+ ereg (VGT_OUTPUT_PATH_CNTL, 0x00000000);
+
+ fs_conf.shader_addr = fs_addr;
+ fs_setup(adapt, &fs_conf);
+
+ ereg (VGT_INSTANCE_STEP_RATE_0, 0x00000000);
+ ereg (VGT_INSTANCE_STEP_RATE_1, 0x00000000);
+
+ vs_conf.shader_addr = vs_addr;
+ vs_conf.num_gprs = 4;
+ vs_conf.stack_size = 1;
+ vs_setup(adapt, &vs_conf);
+
+ ereg (SQ_VTX_SEMANTIC_CLEAR, 0xFFFFFFFC);
+ ereg (SQ_VTX_SEMANTIC_0, 0x00000000);
+ ereg (SQ_VTX_SEMANTIC_0 + (1 << 2), 0x0000000A);
+
+
+ ereg (VGT_VERTEX_REUSE_BLOCK_CNTL, 0x0000000E);
+ ereg (VGT_OUT_DEALLOC_CNTL, 0x00000010);
+
+ ereg (PA_CL_VS_OUT_CNTL, 0x00000000);
+
+ // VS alu constants
+ set_alu_consts(0x400 >> 2, sizeof(tri_vs_alu_consts) / SQ_ALU_CONSTANT_offset, tri_vs_alu_consts);
+
+ pack0 (SQ_BOOL_CONST_0, 2);
+ e32 (0x00000001);
+ e32 (0x00000000);
+
+ ps_conf.shader_addr = ps_addr;
+ ps_conf.num_gprs = 1;
+ ps_conf.stack_size = 0;
+ ps_conf.uncached_first_inst = 1;
+ ps_conf.clamp_consts = 1;
+ ps_conf.export_mode = 2;
+ ps_setup(adapt, &ps_conf);
+
+ ereg (CB_SHADER_MASK, 0x0000000F);
+
+ ereg (R7xx_CB_SHADER_CONTROL, RT0_ENABLE_bit);
+ ereg (SPI_PS_IN_CONTROL_0, (((2 - 1) << NUM_INTERP_shift) |
+ ((1 << BARYC_SAMPLE_CNTL_shift)) |
+ PERSP_GRADIENT_ENA_bit));
+ ereg (SPI_PS_IN_CONTROL_1, 0);
+ ereg (SPI_PS_INPUT_CNTL_0, 0x00000F0A);
+
+ ereg (CB_COLOR_CONTROL, 0x00CC0000);
+
+ ereg (PA_SU_SC_MODE_CNTL, 0x00000244);
+ ereg (SPI_INTERP_CONTROL_0, 0x00000868);
+ ereg (PA_SC_LINE_CNTL, 0x00000400);
+
+ cb_conf.id = 0;
+ cb_conf.w = surface_w;
+ cb_conf.h = surface_h;
+ cb_conf.base = adapt->display_gpu;
+ cb_conf.format = 0x1a;
+ cb_conf.comp_swap = 1;
+ cb_conf.source_format = 1;
+ cb_conf.blend_clamp = 1;
+ set_render_target(adapt, &cb_conf);
+
+ ereg (PA_SC_GENERIC_SCISSOR_TL, 0x80000000);
+ ereg (PA_SC_GENERIC_SCISSOR_BR, 0x01000100);
+
+ ereg (DB_DEPTH_INFO, 0x00000000);
+ ereg (DB_RENDER_OVERRIDE, 0x00000040);
+ ereg (DB_SHADER_CONTROL, 0x00000200);
+ ereg (PA_SU_SC_MODE_CNTL, 0x00000244);
+
+ ereg (PA_SU_SC_MODE_CNTL, 0x00000244);
+
+ wait_3d_idle();
+
+ ereg (DB_SHADER_CONTROL, 0x00000210);
+
+ draw_immd(adapt, &draw_conf, indices);
+
+ wait_3d_idle_clean();
+
+}
+
+void pm4play_tri_test_r7xx(adapter_t *adapt)
+{
+
+ uint32_t fs[] = {
+ // CF INST 0
+ CF_DWORD0(ADDR(2)),
+ CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ COUNT(2),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_VTX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ // CF INST 1
+ CF_DWORD0(ADDR(0)),
+ CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ COUNT(1),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ // VTX INST 0
+ VTX_DWORD0(VTX_INST(SQ_VTX_INST_SEMANTIC),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(160),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(16)),
+ VTX_DWORD1_SEM(SEMANTIC_ID(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)),
+ VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1)),
+ // VTX INST 1
+ VTX_DWORD_PAD,
+ VTX_DWORD0(VTX_INST(SQ_VTX_INST_SEMANTIC),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(160),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(4)),
+ VTX_DWORD1_SEM(SEMANTIC_ID(10),
+ DST_SEL_X(SQ_SEL_Z),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_X),
+ DST_SEL_W(SQ_SEL_W),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_8_8_8_8),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_UNSIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)),
+ VTX_DWORD2(OFFSET(12),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0)),
+ VTX_DWORD_PAD
+ };
+
+ uint32_t vs[] = {
+ // CF INST 0
+ CF_DWORD0(ADDR(0)),
+ CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ COUNT(1),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_CALL_FS),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0)),
+ // CF INST 1
+ CF_ALU_DWORD0(ADDR(4),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP)),
+ CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ COUNT(36),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ // CF INST 2
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ // CF INST 3
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0)),
+ // ALU INST 0
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(272),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(127),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ // ALU INST 1
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(272),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ // ALU INST 2
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(272),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ // ALU INST 3
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(272),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ // ALU INST 4
+ ALU_DWORD0(SRC0_SEL(2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1)),
+ // ALU INST 5
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(273),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ // ALU INST 6
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(273),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(127),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ // ALU INST 7
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(273),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ // ALU INST 8
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(273),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ // ALU INST 9
+ ALU_DWORD0(SRC0_SEL(2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1)),
+ // ALU INST 10
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(274),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ // ALU INST 11
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(274),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ // ALU INST 12
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(274),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(127),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ // ALU INST 13
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(274),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ // ALU INST 14
+ ALU_DWORD0(SRC0_SEL(2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1)),
+ // ALU INST 15
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(275),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ // ALU INST 16
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(275),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ // ALU INST 17
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(275),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ // ALU INST 18
+ ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(275),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(127),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ // ALU INST 19
+ ALU_DWORD0(SRC0_SEL(2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1)),
+ // ALU INST 20
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(268),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ // ALU INST 21
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(268),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ // ALU INST 22
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(268),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ // ALU INST 23
+ ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_PV),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(268),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ // ALU INST 24
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(269),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ // ALU INST 25
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(269),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ // ALU INST 26
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(269),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ // ALU INST 27
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(269),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ // ALU INST 28
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(270),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ // ALU INST 29
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(270),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ // ALU INST 30
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(270),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ // ALU INST 31
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(270),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ // ALU INST 32
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(271),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ // ALU INST 33
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(271),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ // ALU INST 34
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(271),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ // ALU INST 35
+ ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(271),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_PRED_SETE_PUSH),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ };
+
+ uint32_t ps[] = {
+ // CF INST 0
+ CF_ALU_DWORD0(ADDR(2),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP)),
+ CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ COUNT(4),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ // CF INST 1
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ // ALU INST 0
+ ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1)),
+ // ALU INST 1
+ ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1)),
+ // ALU INST 2
+ ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1)),
+ // ALU INST 3
+ ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1)),
+ };
+
+ uint32_t vb[] = {
+ 0x3F000000,0x00000000,0x00000000,0xFFFF0000, // X, Y, Z, ARGB
+ 0x00000000,0x3F800000,0x00000000,0xFF00FF00, // X, Y, Z, ARGB
+ 0x3F800000,0x3F800000,0x00000000,0xFF0000FF, // X, Y, Z, ARGB
+ };
+
+ int surface_w = adapt->color_pitch;
+ int surface_h = adapt->color_height;
+ int i;
+ uint64_t vb_addr;
+ uint64_t fs_addr, vs_addr, ps_addr;
+
+ draw_config_t draw_conf;
+ uint32_t indices[3] = { 0, 1, 2 };
+
+ cb_config_t cb_conf;
+ vtx_resource_t vtx_res;
+ tex_resource_t tex_res;
+ tex_sampler_t tex_samp;
+ shader_config_t fs_conf, vs_conf, ps_conf;
+
+
+ printf ("\n* PM4 Play Tri Test r7xx\n\n");
+
+ memset(&fs_conf, 0, sizeof(shader_config_t));
+ memset(&vs_conf, 0, sizeof(shader_config_t));
+ memset(&ps_conf, 0, sizeof(shader_config_t));
+ memset(&cb_conf, 0, sizeof(cb_config_t));
+ memset(&vtx_res, 0, sizeof(vtx_resource_t));
+ memset(&tex_res, 0, sizeof(tex_resource_t));
+ memset(&tex_samp, 0, sizeof(tex_sampler_t));
+
+ draw_conf.prim_type = 0x4; // 0x4 triangle list
+ // 0x11 rect list
+ draw_conf.vgt_draw_initiator = 1;
+ draw_conf.num_instances = 1;
+ draw_conf.num_indices = 3;
+ draw_conf.index_type = 0; // 0 = 16 bit, 1= 32 bit indicies
+
+ fs_addr = upload (adapt, fs, sizeof(fs), 0);
+ vs_addr = upload (adapt, vs, sizeof(vs), 4096);
+ ps_addr = upload (adapt, ps, sizeof(ps), 8192);
+ vb_addr = upload (adapt, vb, sizeof(vb), 12288);
+
+ vtx_res.id = SQ_VTX_RESOURCE_fs;
+ vtx_res.vtx_size_dw = 4;
+ vtx_res.vtx_num_entries = 1024;
+ vtx_res.mem_req_size = 1;
+ vtx_res.vb_addr = vb_addr;
+
+ printf("win tri r7xx\n");
+
+
+ start_3d(adapt);
+
+ pack3 (IT_EVENT_WRITE, 1);
+ e32 (0x0000002C);
+
+ wait_3d_idle();
+
+ pack3 (IT_CONTEXT_CONTROL, 2);
+ e32 (0x80000000);
+ e32 (0x80000000);
+
+ wait_3d_idle();
+
+ set_default_state(adapt);
+
+ pack3 (IT_CONTEXT_CONTROL, 2);
+ e32 (0x80000000);
+ e32 (0x80000000);
+
+ // PS resources
+ for (i = 0; i < 16; i++) {
+ tex_res.id = i;
+ tex_res.dst_sel_x = 4;
+ tex_res.dst_sel_y = 4;
+ tex_res.dst_sel_z = 4;
+ tex_res.dst_sel_w = 5;
+ set_tex_resource(adapt, &tex_res);
+ tex_samp.id = i;
+ set_tex_sampler (adapt, &tex_samp);
+ }
+
+ // VS resources
+ for (i = 0; i < 4; i++) {
+ tex_res.id = 160 + i;
+ tex_res.dst_sel_x = 4;
+ tex_res.dst_sel_y = 4;
+ tex_res.dst_sel_z = 4;
+ tex_res.dst_sel_w = 5;
+ set_tex_resource(adapt, &tex_res);
+ tex_samp.id = 18 + i;
+ set_tex_sampler (adapt, &tex_samp);
+ }
+
+ // PS alu constants
+ set_alu_consts(0x3e0 >> 2, sizeof(tri_ps_alu_consts) / SQ_ALU_CONSTANT_offset, tri_ps_alu_consts);
+
+ ereg (CB_TARGET_MASK, 0x0000FFFF);
+
+ ereg (SX_ALPHA_TEST_CONTROL, 0x00000007);
+ ereg (SX_ALPHA_REF, 0x00000000);
+
+ ereg (DB_ALPHA_TO_MASK, 0x0000AA00);
+ ereg (DB_DEPTH_CONTROL, 0x00700700);
+ ereg (DB_STENCILREFMASK, 0x00FFFF00);
+ ereg (DB_STENCILREFMASK_BF, 0x00FFFF00);
+ ereg (DB_DEPTH_CONTROL, 0x00700700);
+ ereg (DB_DEPTH_CONTROL, 0x00700704);
+ ereg (DB_DEPTH_CONTROL, 0x00700774);
+
+ ereg (PA_SU_SC_MODE_CNTL, 0x00000244);
+ ereg (PA_SU_POLY_OFFSET_BACK_OFFSET, 0x00000000);
+ ereg (PA_SU_POLY_OFFSET_FRONT_OFFSET, 0x00000000);
+ ereg (PA_SU_SC_MODE_CNTL, 0x00000244);
+ ereg (PA_SU_POLY_OFFSET_BACK_SCALE, 0x00000000);
+ ereg (PA_SU_POLY_OFFSET_FRONT_SCALE, 0x00000000);
+
+ set_screen_scissor(0, 0, 8192, 8192);
+ set_vport_scissor(0, 0, 0, 8192, 8192);
+ set_generic_scissor(0, 0, 256, 256);
+ set_window_scissor(0, 0, 256, 256);
+
+ ereg (PA_SC_AA_MASK, 0xFFFFFFFF);
+ ereg (PA_SC_MODE_CNTL, 0x00514000);
+ ereg (PA_SC_AA_CONFIG, 0x00000000);
+
+ pack0 (SPI_VS_OUT_ID_0, 10);
+ e32 (0xFFFFFF0A);
+ for (i = 1; i < 10; i++)
+ e32 (0xFFFFFFFF);
+
+ ereg (SPI_FOG_CNTL, 0x00000000);
+ ereg (SPI_INPUT_Z, 0x00000000);
+ ereg (SPI_FOG_CNTL, 0x00000000);
+ ereg (SPI_INPUT_Z, 0x00000000);
+
+ ereg (PA_SU_POINT_SIZE, 0x00080008);
+ ereg (PA_SU_POINT_MINMAX, 0x08000008);
+ ereg (PA_CL_CLIP_CNTL, 0x00000000);
+
+ ereg (VGT_MAX_VTX_INDX, draw_conf.num_indices);
+ ereg (VGT_MIN_VTX_INDX, 0);
+ ereg (VGT_INDX_OFFSET, 0x00000000);
+
+ set_vtx_resource(adapt, &vtx_res);
+
+ ereg (VGT_OUTPUT_PATH_CNTL, 0x00000000);
+
+ fs_conf.shader_addr = fs_addr;
+ fs_setup(adapt, &fs_conf);
+
+ ereg (VGT_INSTANCE_STEP_RATE_0, 0x00000000);
+ ereg (VGT_INSTANCE_STEP_RATE_1, 0x00000000);
+
+ vs_conf.shader_addr = vs_addr;
+ vs_conf.num_gprs = 4;
+ vs_conf.stack_size = 1;
+ vs_setup(adapt, &vs_conf);
+
+ ereg (SQ_VTX_SEMANTIC_CLEAR, 0xFFFFFFFC);
+ ereg (SQ_VTX_SEMANTIC_0, 0x00000000);
+ ereg (SQ_VTX_SEMANTIC_0 + (1 << 2), 0x0000000A);
+
+ ereg (VGT_VERTEX_REUSE_BLOCK_CNTL, 0x0000000E);
+ ereg (VGT_OUT_DEALLOC_CNTL, 0x00000010);
+
+ ereg (PA_CL_VS_OUT_CNTL, 0x00000000);
+
+ // VS alu constants
+ set_alu_consts(0x400 >> 2, sizeof(tri_vs_alu_consts) / SQ_ALU_CONSTANT_offset, tri_vs_alu_consts);
+
+ pack0 (SQ_BOOL_CONST_0, 2);
+ e32 (0x00000001);
+ e32 (0x00000000);
+
+ ereg (PA_CL_VTE_CNTL, (VPORT_X_SCALE_ENA_bit |
+ VPORT_X_OFFSET_ENA_bit |
+ VPORT_Y_SCALE_ENA_bit |
+ VPORT_Y_OFFSET_ENA_bit |
+ VPORT_Z_SCALE_ENA_bit |
+ VPORT_Z_OFFSET_ENA_bit |
+ //VTX_XY_FMT_bit |
+ //VTX_Z_FMT_bit |
+ VTX_W0_FMT_bit |
+ 0));
+
+ set_viewport(adapt, 256, 256, 0);
+
+ pack0 (0x00028C0C, 4);
+ efloat (62.9921875f); // PA_CL_GB_VERT_CLIP_ADJ = 0x0000A303
+ efloat (1.0f); // PA_CL_GB_VERT_DISC_ADJ = 0x0000A304
+ efloat (62.9921875f); // PA_CL_GB_HORZ_CLIP_ADJ = 0x0000A305
+ efloat (1.0f); // PA_CL_GB_HORZ_DISC_ADJ = 0x0000A306
+ ereg (PA_CL_CLIP_CNTL, 0x00080000);
+
+ ps_conf.shader_addr = ps_addr;
+ ps_conf.num_gprs = 1;
+ ps_conf.stack_size = 0;
+ ps_conf.clamp_consts = 1;
+ ps_conf.export_mode = 2;
+ ps_setup(adapt, &ps_conf);
+
+ ereg (CB_SHADER_MASK, 0x0000000F);
+ ereg (R7xx_CB_SHADER_CONTROL, RT0_ENABLE_bit);
+
+ ereg (SPI_PS_IN_CONTROL_0, (((2 - 1) << NUM_INTERP_shift) |
+ ((1 << BARYC_SAMPLE_CNTL_shift)) |
+ PERSP_GRADIENT_ENA_bit));
+ ereg (SPI_PS_IN_CONTROL_1, 0);
+ ereg (SPI_PS_INPUT_CNTL_0, 0x00000F0A);
+ ereg (CB_COLOR_CONTROL, 0x00CC0000);
+
+ ereg (PA_SU_SC_MODE_CNTL, 0x00000244);
+
+ ereg (SPI_INTERP_CONTROL_0, 0x00000868);
+
+ ereg (PA_SC_LINE_CNTL, 0x00000400);
+
+ cb_conf.id = 0;
+ cb_conf.w = surface_w;
+ cb_conf.h = surface_h;
+ cb_conf.base = adapt->display_gpu;
+ cb_conf.format = 0x1a;
+ cb_conf.comp_swap = 1;
+ cb_conf.source_format = 1;
+ cb_conf.blend_clamp = 1;
+ set_render_target(adapt, &cb_conf);
+
+ set_generic_scissor(0, 0, 256, 256);
+
+ ereg (DB_DEPTH_INFO, 0x00000000);
+ ereg (DB_RENDER_OVERRIDE, 0x00000000);
+ ereg (DB_SHADER_CONTROL, 0x00000210);
+
+ draw_immd(adapt, &draw_conf, indices);
+
+ wait_3d_idle_clean();
+
+}
+
+void pm4play_tri_test(adapter_t *adapt)
+{
+ if (adapt->chipset <= CHIPSET_RV670)
+ pm4play_tri_test_r6xx(adapt);
+ else
+ pm4play_tri_test_r7xx(adapt);
+}
+
+
+/*
+ * Clear test
+ */
+
+void pm4play_clear_test_r6xx(adapter_t *adapt)
+{
+ int surface_w = adapt->color_pitch;
+ int surface_h = adapt->color_height;
+ uint64_t vs_addr, ps_addr;
+ draw_config_t draw_conf;
+ cb_config_t cb_conf;
+ db_config_t db_conf;
+ shader_config_t vs_conf, ps_conf;
+
+ // 4 DWs per const
+ float vs_alu_consts[] = {
+ 0.0, 0.0, 0.0, 1.0, // X, Y, Z, W
+ 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ 256.0, 0.0, 0.0, 1.0, // X, Y, Z, W
+ 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ 256.0, 256.0, 0.0, 1.0, // X, Y, Z, W
+ 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ 0.0, 256.0, 0.0, 1.0, // X, Y, Z, W
+ 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ };
+
+ uint32_t vs[] = {
+ CF_ALU_DWORD0(ADDR(3),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP)),
+ CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ COUNT(8),
+ USES_WATERFALL(1),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_MASK),
+ SRC_SEL_Y(SQ_SEL_MASK),
+ SRC_SEL_Z(SQ_SEL_MASK),
+ SRC_SEL_W(SQ_SEL_MASK),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ // (float) R0.X -> PS
+ ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INT_TO_FLT),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ // MAD PS SRC_LITERAL.X PS -> R127.X, PV
+ ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_PS),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_LITERAL),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PS),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(127),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ // SRC_LITERAL.X (2.0)
+ 0x40000000,
+ // SRC_LITERAL.Y (0.0)
+ 0x00000000,
+ // MOVA_FLOOR PV.X -> AR
+ ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_PV),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOVA_FLOOR),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ // MOV C[0+AR].X -> R0.X
+ ALU_DWORD0(SRC0_SEL(256), //cfile
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ // MOV C[0+AR].Y -> R0.Y
+ ALU_DWORD0(SRC0_SEL(256), //cfile
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ // MOV C[0+AR].Z -> R0.Z
+ ALU_DWORD0(SRC0_SEL(256), //cfile
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ // MOV C[0+AR].W -> R0.W
+ ALU_DWORD0(SRC0_SEL(256), //cfile
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)
+ ),
+ };
+
+ uint32_t ps[] = {
+ CF_ALU_DWORD0(ADDR(2),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP)),
+ CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ COUNT(4),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ // MOV C[0].X -> R0.X
+ ALU_DWORD0(SRC0_SEL(256), //cfile
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ // MOV C[0].Y -> R0.Y
+ ALU_DWORD0(SRC0_SEL(256), //cfile
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ // MOV C[0].Z -> R0.Z
+ ALU_DWORD0(SRC0_SEL(256), //cfile
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ // MOV C[0].W -> R0.W
+ ALU_DWORD0(SRC0_SEL(256), //cfile
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ };
+
+ printf ("\n* PM4 Play Clear Test r6xx\n\n");
+
+ memset(&vs_conf, 0, sizeof(shader_config_t));
+ memset(&ps_conf, 0, sizeof(shader_config_t));
+ memset(&cb_conf, 0, sizeof(cb_config_t));
+ memset(&db_conf, 0, sizeof(db_config_t));
+
+ draw_conf.prim_type = DI_PT_RECTLIST;
+ draw_conf.num_indices = 4;
+ draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
+ draw_conf.index_type = DI_INDEX_SIZE_32_BIT;
+ draw_conf.num_instances = 1;
+
+ vs_addr = upload (adapt, vs, sizeof(vs), 0);
+ ps_addr = upload (adapt, ps, sizeof(ps), 4096);
+
+ printf("win clear r6xx: requires tiled framebuffer!!!\n");
+
+ start_3d(adapt);
+
+ wait_3d_idle_clean();
+
+ cp_set_surface_sync();
+
+ set_default_state(adapt);
+
+ db_conf.base = adapt->display_gpu;
+ db_conf.w = surface_w;
+ db_conf.h = surface_h;
+ db_conf.format = 6;
+ db_conf.array_mode = 4; //2;
+ set_depth_target(adapt, &db_conf);
+
+ ereg (DB_PREFETCH_LIMIT, ((surface_h / 8) - 1));
+ ereg (DB_HTILE_DATA_BASE, 0);
+ ereg (DB_HTILE_SURFACE, 0);
+ ereg (DB_PRELOAD_CONTROL, 0);
+
+ set_generic_scissor(0, 0, 8192, 8192);
+
+ vs_conf.shader_addr = vs_addr;
+ vs_conf.num_gprs = 1;
+ vs_conf.stack_size = 0;
+ vs_setup(adapt, &vs_conf);
+
+ ereg (SPI_VS_OUT_CONFIG, (VS_PER_COMPONENT_bit |
+ ((1 - 1) << VS_EXPORT_COUNT_shift)));
+
+ ereg (VGT_VERTEX_REUSE_BLOCK_CNTL, (14 << VTX_REUSE_DEPTH_shift));
+ ereg (VGT_OUT_DEALLOC_CNTL, (16 << DEALLOC_DIST_shift));
+
+ ps_conf.shader_addr = ps_addr;
+ ps_conf.num_gprs = 1;
+ ps_conf.stack_size = 0;
+ ps_conf.uncached_first_inst = 1;
+ ps_setup(adapt, &ps_conf);
+
+ ereg (SPI_PS_IN_CONTROL_0, (((1 - 1) << NUM_INTERP_shift) |
+ (1 << BARYC_SAMPLE_CNTL_shift)));
+ ereg (SPI_PS_IN_CONTROL_1, 0);
+
+ ereg (CB_SHADER_MASK, OUTPUT0_ENABLE_mask);
+ ereg (DB_SHADER_CONTROL, (1 << Z_ORDER_shift)); /* EARLY_Z_THEN_LATE_Z */
+
+ wait_3d_idle();
+
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x01000003);
+
+ ereg (DB_DEBUG, PREZ_MUST_WAIT_FOR_POSTZ_DONE);
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x01000003);
+
+ ereg (PA_SC_MULTI_CHIP_CNTL, 0);
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x02000003);
+ ereg (PA_SC_MULTI_CHIP_CNTL, 0);
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x04000003);
+ ereg (PA_SC_MULTI_CHIP_CNTL, 0);
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x08000003);
+ ereg (PA_SC_MULTI_CHIP_CNTL, 0);
+
+ ereg (VGT_OUTPUT_PATH_CNTL, (0 << PATH_SELECT_shift)); // VGT_OUTPATH_VTX_REUSE
+ ereg (VGT_PRIMITIVEID_EN, 0);
+ ereg (VGT_MULTI_PRIM_IB_RESET_EN, 0); // IB-based prims disabled
+ ereg (VGT_STRMOUT_EN, 0); // strmout off
+ ereg (VGT_REUSE_OFF, 0); // reuse on
+ ereg (VGT_VTX_CNT_EN, 0); // auto index gen off
+ ereg (VGT_STRMOUT_BUFFER_EN, 0); // all strmout buffers disabled
+ ereg (VGT_GS_MODE, 0); // GS off
+ ereg (VGT_MAX_VTX_INDX, draw_conf.num_indices);
+ ereg (VGT_MIN_VTX_INDX, 0);
+ ereg (VGT_INDX_OFFSET, 0);
+ ereg (VGT_INSTANCE_STEP_RATE_0, 0);
+ ereg (VGT_INSTANCE_STEP_RATE_1, 0);
+ ereg (VGT_MULTI_PRIM_IB_RESET_INDX, 0);
+
+ set_screen_scissor(0, 0, 256, 256);
+ ereg (PA_SC_WINDOW_OFFSET, 0);
+ set_window_scissor(0, 0, 8192, 8192);
+
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x01000003);
+ ereg (PA_SC_MODE_CNTL, FORCE_EOV_CNTDWN_ENABLE_bit);
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x02000003);
+ ereg (PA_SC_MODE_CNTL, FORCE_EOV_CNTDWN_ENABLE_bit);
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x04000003);
+ ereg (PA_SC_MODE_CNTL, FORCE_EOV_CNTDWN_ENABLE_bit);
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x08000003);
+ ereg (PA_SC_MODE_CNTL, FORCE_EOV_CNTDWN_ENABLE_bit);
+ ereg (PA_SC_AA_CONFIG, 0);
+ ereg (PA_SC_AA_MASK, 0xFFFFFFFF);
+ ereg (PA_SC_CLIPRECT_RULE, 0x0000FFFF);
+
+ set_vport_scissor(0, 0, 0, 8192, 8192);
+
+ ereg (PA_SC_VPORT_ZMIN_0, 0x00000000); // 0.0
+ ereg (PA_SC_VPORT_ZMAX_0, 0x3F800000); // 1.0
+ ereg (PA_CL_CLIP_CNTL, CLIP_DISABLE_bit);
+ ereg (PA_SU_SC_MODE_CNTL, FACE_bit);
+ ereg (PA_CL_VTE_CNTL, (VTX_XY_FMT_bit |
+ VTX_Z_FMT_bit));
+
+ ereg (PA_SU_POINT_SIZE, 0);
+ ereg (PA_SU_POINT_MINMAX, 0);
+ pack0 (0x00028C0C, 4);
+ efloat(1.0f); // PA_CL_GB_VERT_CLIP_ADJ = 0x0000A303
+ efloat(1.0f); // PA_CL_GB_VERT_DISC_ADJ = 0x0000A304
+ efloat(1.0f); // PA_CL_GB_HORZ_CLIP_ADJ = 0x0000A305
+ efloat(1.0f); // PA_CL_GB_HORZ_DISC_ADJ = 0x0000A306
+ ereg (PA_SC_LINE_STIPPLE, 0);
+ ereg (PA_SC_MPASS_PS_CNTL, 0);
+ ereg (PA_SU_LINE_CNTL, (8 << PA_SU_LINE_CNTL__WIDTH_shift)); // 16.0 fixed
+ ereg (PA_SU_VTX_CNTL, ((PIX_CENTER_bit) |
+ (2 << PA_SU_VTX_CNTL__ROUND_MODE_shift)));
+ ereg (PA_SU_POLY_OFFSET_CLAMP, 0);
+ ereg (PA_CL_VS_OUT_CNTL, 0);
+
+ ereg (DB_ALPHA_TO_MASK, 0);
+ ereg (DB_DEPTH_CONTROL, ((Z_ENABLE_bit) |
+ (Z_WRITE_ENABLE_bit) |
+ (7 << ZFUNC_shift))); // FRAG_ALWAYS
+ ereg (DB_RENDER_OVERRIDE, DISABLE_VIEWPORT_CLAMP_bit);
+ ereg (DB_SHADER_CONTROL, DUAL_EXPORT_ENABLE_bit);
+ ereg (DB_STENCILREFMASK, 0);
+ ereg (DB_STENCILREFMASK_BF, 0);
+ ereg (DB_DEPTH_CLEAR, 0); // float 0.0
+ ereg (DB_STENCIL_CLEAR, 0);
+ ereg (DB_RENDER_CONTROL, ZPASS_INCREMENT_DISABLE_bit);
+ ereg (DB_SRESULTS_COMPARE_STATE0, 0);
+ ereg (DB_SRESULTS_COMPARE_STATE1, 0);
+
+ ereg (CB_TARGET_MASK, 0);
+ ereg (CB_SHADER_MASK, 0);
+ ereg (CB_CLRCMP_CONTROL, ((0 << CLRCMP_FCN_SRC_shift) |
+ (0 << CLRCMP_FCN_DST_shift) |
+ (2 << CLRCMP_FCN_SEL_shift)));
+ ereg (CB_CLRCMP_DST, 0x00000000);
+ ereg (CB_CLRCMP_MSK, 0xFFFFFFFF);
+ pack0 (CB_BLEND_RED, 4);
+ efloat(0.0f); // CB_BLEND_RED = 0x0000A105
+ efloat(0.0f); // CB_BLEND_GREEN = 0x0000A106
+ efloat(0.0f); // CB_BLEND_BLUE = 0x0000A107
+ efloat(0.0f); // CB_BLEND_ALPHA = 0x0000A108
+
+ ereg (SPI_FOG_FUNC_BIAS + (39 << 2), 0x00000001); // SPI_FOG_FUNC_BIAS + 0x27
+ ereg (SPI_FOG_FUNC_BIAS + (40 << 2), 0x00000001); // SPI_FOG_FUNC_BIAS + 0x28
+ ereg (SPI_FOG_FUNC_BIAS + (41 << 2), 0x00000001); // SPI_FOG_FUNC_BIAS + 0x29
+ ereg (SPI_FOG_FUNC_BIAS + (42 << 2), 0x00000001); // SPI_FOG_FUNC_BIAS + 0x2A
+ ereg (SPI_FOG_FUNC_BIAS + (43 << 2), 0x00000001); // SPI_FOG_FUNC_BIAS + 0x2B
+ ereg (SPI_FOG_FUNC_BIAS + (44 << 2), 0x00000001); // SPI_FOG_FUNC_BIAS + 0x2C
+ ereg (SPI_FOG_FUNC_BIAS + (45 << 2), 0x00000001); // SPI_FOG_FUNC_BIAS + 0x2D
+ ereg (SPI_FOG_FUNC_BIAS + (46 << 2), 0x00000001); // SPI_FOG_FUNC_BIAS + 0x2E
+
+ ereg (CB_COLOR_CONTROL, (0xcc << ROP3_shift)); // copy
+
+ // VS alu constants
+ set_alu_consts(0x400 >> 2, sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
+
+ draw_auto(adapt, &draw_conf);
+
+ wait_3d_idle_clean();
+
+}
+
+void pm4play_clear_test_r7xx(adapter_t *adapt)
+{
+ int surface_w = adapt->color_pitch;
+ int surface_h = adapt->color_height;
+ uint64_t vs_addr, ps_addr;
+ draw_config_t draw_conf;
+ cb_config_t cb_conf;
+ db_config_t db_conf;
+ shader_config_t vs_conf, ps_conf;
+ tex_sampler_t tex_samp;
+ int i;
+
+ // 4 DWs per const
+ float ps_alu_consts[] = {
+ 0.6509804129600525, 0.7921569347381592, 0.9411765336990356, 0.0,
+ };
+
+ float vs_alu_consts[] = {
+ 0.0, 0.0, 0.0, 1.0,
+ 0.03921568766236305, 0.1411764770746231, 0.41568630933761597, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ 256.0, 0.0, 0.0, 1.0,
+ 0.6509804129600525, 0.7921569347381592, 0.9411765336990356, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ 256.0, 256.0, 0.0, 1.0,
+ 0.6509804129600525, 0.7921569347381592, 0.9411765336990356, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ 0.0, 256.0, 0.0, 1.0,
+ 0.03921568766236305, 0.1411764770746231, 0.41568630933761597, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ };
+
+ uint32_t vs[] = {
+ CF_ALU_DWORD0(ADDR(3),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP)),
+ CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ COUNT(8),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_MASK),
+ SRC_SEL_Y(SQ_SEL_MASK),
+ SRC_SEL_Z(SQ_SEL_MASK),
+ SRC_SEL_W(SQ_SEL_MASK),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MAX_INT),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_PS),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_LITERAL),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PS),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(127),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ 0x40000000,
+ 0x00000000,
+ ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_PV),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_SETNE),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ };
+
+ uint32_t ps[] = {
+ CF_ALU_DWORD0(ADDR(2),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP)),
+ CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ COUNT(4),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ };
+
+ printf ("\n* PM4 Play Clear Test r7xx\n\n");
+
+ memset(&vs_conf, 0, sizeof(shader_config_t));
+ memset(&ps_conf, 0, sizeof(shader_config_t));
+ memset(&cb_conf, 0, sizeof(cb_config_t));
+ memset(&db_conf, 0, sizeof(db_config_t));
+ memset(&tex_samp, 0, sizeof(tex_sampler_t));
+
+ draw_conf.prim_type = DI_PT_RECTLIST;
+ draw_conf.num_indices = 4;
+ draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
+ draw_conf.index_type = DI_INDEX_SIZE_32_BIT;
+ draw_conf.num_instances = 1;
+
+ vs_addr = upload (adapt, vs, sizeof(vs), 0);
+ ps_addr = upload (adapt, ps, sizeof(ps), 4096);
+
+ printf("win clear r7xx: requires tiled framebuffer!!!\n");
+
+ start_3d(adapt);
+
+ wait_3d_idle_clean();
+
+ cp_set_surface_sync();
+
+ set_default_state(adapt);
+
+ db_conf.base = adapt->display_gpu;
+ db_conf.w = surface_w;
+ db_conf.h = surface_h;
+ db_conf.format = 6;
+ db_conf.array_mode = 4; //2;
+ set_depth_target(adapt, &db_conf);
+
+ ereg (DB_PREFETCH_LIMIT, ((surface_h / 8) - 1));
+ ereg (DB_HTILE_DATA_BASE, 0);
+ ereg (DB_HTILE_SURFACE, 0);
+ ereg (DB_PRELOAD_CONTROL, 0);
+
+ set_generic_scissor(0, 0, 8192, 8192);
+
+ vs_conf.shader_addr = vs_addr;
+ vs_conf.num_gprs = 1;
+ vs_conf.stack_size = 0;
+ vs_setup(adapt, &vs_conf);
+
+ ereg (SPI_VS_OUT_CONFIG, (VS_PER_COMPONENT_bit |
+ ((1 - 1) << VS_EXPORT_COUNT_shift)));
+
+ ereg (VGT_VERTEX_REUSE_BLOCK_CNTL, (14 << VTX_REUSE_DEPTH_shift));
+ ereg (VGT_OUT_DEALLOC_CNTL, (16 << DEALLOC_DIST_shift));
+
+ ps_conf.shader_addr = ps_addr;
+ ps_conf.num_gprs = 1;
+ ps_conf.stack_size = 0;
+ ps_conf.uncached_first_inst = 1;
+ ps_conf.export_mode = 2;
+ ps_setup(adapt, &ps_conf);
+
+ ereg (SPI_PS_IN_CONTROL_0, (((1 - 1) << NUM_INTERP_shift) |
+ ((1 << BARYC_SAMPLE_CNTL_shift))));
+ ereg (SPI_PS_IN_CONTROL_1, 0);
+
+ ereg (CB_SHADER_MASK, OUTPUT0_ENABLE_mask);
+ ereg (R7xx_CB_SHADER_CONTROL, RT0_ENABLE_bit);
+
+ ereg (DB_SHADER_CONTROL, (1 << Z_ORDER_shift)); /* EARLY_Z_THEN_LATE_Z */
+
+ wait_3d_idle();
+
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x01000003);
+ ereg (DB_DEBUG, 0);
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x01000003);
+
+ ereg (PA_SC_MULTI_CHIP_CNTL, 0);
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x02000003);
+ ereg (PA_SC_MULTI_CHIP_CNTL, 0);
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x04000003);
+ ereg (PA_SC_MULTI_CHIP_CNTL, 0);
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x08000003);
+ ereg (PA_SC_MULTI_CHIP_CNTL, 0);
+
+ ereg (R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit);
+
+ ereg (VGT_OUTPUT_PATH_CNTL, (0 << PATH_SELECT_shift)); // VGT_OUTPATH_VTX_REUSE
+ ereg (VGT_PRIMITIVEID_EN, 0);
+ ereg (VGT_MULTI_PRIM_IB_RESET_EN, 0); // IB-based prims disabled
+ ereg (VGT_STRMOUT_EN, 0); // strmout off
+ ereg (VGT_REUSE_OFF, 0); // reuse on
+ ereg (VGT_VTX_CNT_EN, 0); // auto index gen off
+ ereg (VGT_STRMOUT_BUFFER_EN, 0); // all strmout buffers disabled
+ ereg (VGT_GS_MODE, 0); // GS off
+ ereg (VGT_MAX_VTX_INDX, draw_conf.num_indices);
+ ereg (VGT_MIN_VTX_INDX, 0);
+ ereg (VGT_INDX_OFFSET, 0);
+ ereg (VGT_INSTANCE_STEP_RATE_0, 0);
+ ereg (VGT_INSTANCE_STEP_RATE_1, 0);
+ ereg (VGT_MULTI_PRIM_IB_RESET_INDX, 0);
+
+ set_screen_scissor(0, 0, 256, 256);
+ ereg (PA_SC_WINDOW_OFFSET, 0);
+ set_window_scissor(0, 0, 8192, 8192);
+
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x01000003);
+ ereg (PA_SC_MODE_CNTL, (FORCE_EOV_CNTDWN_ENABLE_bit |
+ FORCE_EOV_REZ_ENABLE_bit));
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x02000003);
+ ereg (PA_SC_MODE_CNTL, (FORCE_EOV_CNTDWN_ENABLE_bit |
+ FORCE_EOV_REZ_ENABLE_bit));
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x04000003);
+ ereg (PA_SC_MODE_CNTL, (FORCE_EOV_CNTDWN_ENABLE_bit |
+ FORCE_EOV_REZ_ENABLE_bit));
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x08000003);
+ ereg (PA_SC_MODE_CNTL, (FORCE_EOV_CNTDWN_ENABLE_bit |
+ FORCE_EOV_REZ_ENABLE_bit));
+ ereg (PA_SC_AA_CONFIG, 0);
+ ereg (PA_SC_AA_MASK, 0xFFFFFFFF);
+ ereg (PA_SC_CLIPRECT_RULE, 0x0000FFFF);
+
+ set_vport_scissor(0, 0, 0, 8192, 8192);
+
+ ereg (PA_SC_VPORT_ZMIN_0, 0x00000000); // 0.0
+ ereg (PA_SC_VPORT_ZMAX_0, 0x3F800000); // 1.0
+ ereg (PA_CL_CLIP_CNTL, CLIP_DISABLE_bit);
+ ereg (PA_SU_SC_MODE_CNTL, FACE_bit);
+ ereg (PA_CL_VTE_CNTL, (VTX_XY_FMT_bit |
+ VTX_Z_FMT_bit));
+
+ ereg (PA_SU_POINT_SIZE, 0);
+ ereg (PA_SU_POINT_MINMAX, 0);
+ pack0 (0x00028C0C, 4);
+ efloat(1.0f); // PA_CL_GB_VERT_CLIP_ADJ = 0x0000A303
+ efloat(1.0f); // PA_CL_GB_VERT_DISC_ADJ = 0x0000A304
+ efloat(1.0f); // PA_CL_GB_HORZ_CLIP_ADJ = 0x0000A305
+ efloat(1.0f); // PA_CL_GB_HORZ_DISC_ADJ = 0x0000A306
+ ereg (PA_SC_LINE_STIPPLE, 0);
+ ereg (PA_SC_MPASS_PS_CNTL, 0);
+ ereg (PA_SU_LINE_CNTL, (8 << PA_SU_LINE_CNTL__WIDTH_shift)); // 16.0 fixed
+ ereg (PA_SU_VTX_CNTL, ((PIX_CENTER_bit) |
+ (2 << PA_SU_VTX_CNTL__ROUND_MODE_shift)));
+ ereg (PA_SU_POLY_OFFSET_CLAMP, 0);
+ ereg (PA_CL_VS_OUT_CNTL, 0);
+
+ ereg (SPI_INTERP_CONTROL_0, 0);
+ ereg (SPI_INPUT_Z, 0);
+ ereg (SPI_FOG_CNTL, 0);
+ ereg (SPI_FOG_FUNC_SCALE, 0x00000000);
+ ereg (SPI_FOG_FUNC_BIAS, 0x00000000);
+
+ ereg (SX_ALPHA_TEST_CONTROL, 0);
+ ereg (SX_ALPHA_REF, 0x00000000); // 0.0
+
+ ereg (DB_ALPHA_TO_MASK, 0);
+ ereg (DB_DEPTH_CONTROL, ((Z_ENABLE_bit) |
+ (Z_WRITE_ENABLE_bit) |
+ (7 << ZFUNC_shift))); // FRAG_ALWAYS
+ ereg (DB_RENDER_OVERRIDE, DISABLE_VIEWPORT_CLAMP_bit);
+ ereg (DB_SHADER_CONTROL, DUAL_EXPORT_ENABLE_bit);
+ ereg (DB_STENCILREFMASK, 0);
+ ereg (DB_STENCILREFMASK_BF, 0);
+ ereg (DB_DEPTH_CLEAR, 0); // float 0.0
+ ereg (DB_STENCIL_CLEAR, 0);
+ ereg (DB_RENDER_CONTROL, ZPASS_INCREMENT_DISABLE_bit);
+ ereg (DB_SRESULTS_COMPARE_STATE0, 0);
+ ereg (DB_SRESULTS_COMPARE_STATE1, 0);
+
+ ereg (CB_TARGET_MASK, 0);
+ ereg (CB_SHADER_MASK, 0);
+ ereg (CB_CLRCMP_CONTROL, ((0 << CLRCMP_FCN_SRC_shift) |
+ (0 << CLRCMP_FCN_DST_shift) |
+ (2 << CLRCMP_FCN_SEL_shift)));
+ ereg (CB_CLRCMP_DST, 0x00000000);
+ ereg (CB_CLRCMP_MSK, 0xFFFFFFFF);
+ pack0 (0x00028414, 4);
+ efloat(0.0f); // CB_BLEND_RED = 0x0000A105
+ efloat(0.0f); // CB_BLEND_GREEN = 0x0000A106
+ efloat(0.0f); // CB_BLEND_BLUE = 0x0000A107
+ efloat(0.0f); // CB_BLEND_ALPHA = 0x0000A108
+
+ for (i = 0; i < CB_BLEND0_CONTROL_num; i++)
+ ereg (CB_BLEND0_CONTROL + (i << 2), (1 << COLOR_SRCBLEND_shift)); // BLEND_ONE
+ ereg (CB_COLOR_CONTROL, ((1 << SPECIAL_OP_shift) | // SPECIAL_DISABLE
+ (0xcc << ROP3_shift))); // copy
+
+ ereg (R7xx_PA_SC_EDGERULE, 0xAAAAAAAA);
+ ereg (R7xx_CB_SHADER_CONTROL, RT0_ENABLE_bit);
+
+ ereg (R7xx_SPI_THREAD_GROUPING, (1 << PS_GROUPING_shift));
+
+ tex_samp.id = 0;
+ set_tex_sampler (adapt, &tex_samp);
+
+ // PS alu constants
+ set_alu_consts(0x0 >> 2, sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
+
+ // VS alu constants
+ set_alu_consts(0x400 >> 2, sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
+
+ draw_auto(adapt, &draw_conf);
+
+ wait_3d_idle_clean();
+
+}
+
+void pm4play_clear_test(adapter_t *adapt)
+{
+ if (adapt->chipset <= CHIPSET_RV670)
+ pm4play_clear_test_r6xx(adapt);
+ else
+ pm4play_clear_test_r7xx(adapt);
+}
+
+/*
+ * Blit test
+ */
+
+void pm4play_blit_test_r6xx(adapter_t *adapt)
+{
+ int surface_w = adapt->color_pitch;
+ int surface_h = adapt->color_height;
+ int tex_w = TEX_WIDTH;
+ int tex_h = TEX_HEIGHT;
+ int tex_p = TEX_PITCH;
+ uint32_t *tex;
+ uint64_t vs_addr, ps_addr, tex_addr;
+ draw_config_t draw_conf;
+ cb_config_t cb_conf;
+ tex_resource_t tex_res;
+ tex_sampler_t tex_samp;
+ shader_config_t vs_conf, ps_conf;
+
+ // 4 DWs per const
+ float vs_alu_consts[] = {
+ 3.0, 22.0, 0.0, 1.0, // X, Y, Z, W
+ 0.0, 0.0, 0.0, 0.0, // S, T
+ 0.0, 0.0, 0.0, 0.0,
+ 259.0, 22.0, 0.0, 1.0, // X, Y, Z, W
+ 1.0, 0.0, 0.0, 0.0, // S, T
+ 0.0, 0.0, 0.0, 0.0,
+ 259.0, 278.0, 0.0, 1.0, // X, Y, Z, W
+ 1.0, 1.0, 0.0, 0.0, // S, T
+ 0.0, 0.0, 0.0, 0.0,
+ 3.0, 278.0, 0.0, 1.0, // X, Y, Z, W
+ 0.0, 1.0, 0.0, 0.0, // S, T
+ 0.0, 0.0, 0.0, 0.0,
+ };
+
+ uint32_t vs[] = {
+ CF_ALU_DWORD0(ADDR(3),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP)),
+ CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ COUNT(12),
+ USES_WATERFALL(1),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0)),
+ // (float) R0.X -> PS
+ ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INT_TO_FLT),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ // MAD PS SRC_LITERAL.X PS -> R127.X, PV
+ ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_PS),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_LITERAL),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PS),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(127),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ // SRC_LITERAL.X (2.0)
+ 0x40000000,
+ // SRC_LITERAL.Y (0.0)
+ 0x00000000,
+ // MOVA_FLOOR PV.X -> AR
+ ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_PV),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOVA_FLOOR),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ // MOV (C[0+AR].X) -> R0.X
+ ALU_DWORD0(SRC0_SEL(256), // cfile
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ // MOV (C[0+AR].Y) -> R0.Y
+ ALU_DWORD0(SRC0_SEL(256), //cfile
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ // MOV (C[0+AR].Z) -> R0.Z
+ ALU_DWORD0(SRC0_SEL(256), //cfile
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ // MOV (C[0+AR].W) -> R0.W
+ ALU_DWORD0(SRC0_SEL(256), //cfile
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ // MOV (C[1+AR].X) -> R1.X
+ ALU_DWORD0(SRC0_SEL(257), //cfile
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ // MOV (C[1+AR].Y) -> R1.Y
+ ALU_DWORD0(SRC0_SEL(257), //cfile
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ // MOV (C[1+AR].Z) -> R1.Z
+ ALU_DWORD0(SRC0_SEL(257), //cfile
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ // MOV (C[1+AR].W) -> R1.W
+ ALU_DWORD0(SRC0_SEL(257), //cfile
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ };
+
+ uint32_t ps[] = {
+ CF_DWORD0(ADDR(2)),
+ CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ COUNT(1),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(1),
+ CF_INST(SQ_CF_INST_TEX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ BC_FRAC_MODE(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ R7xx_ALT_CONST(0)),
+ TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED)),
+ TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_X),
+ SRC_SEL_W(SQ_SEL_X)),
+ TEX_DWORD_PAD,
+ };
+
+ printf ("\n* PM4 Play Blit Test r6xx\n\n");
+
+ memset(&vs_conf, 0, sizeof(shader_config_t));
+ memset(&ps_conf, 0, sizeof(shader_config_t));
+ memset(&cb_conf, 0, sizeof(cb_config_t));
+ memset(&tex_res, 0, sizeof(tex_resource_t));
+ memset(&tex_samp, 0, sizeof(tex_sampler_t));
+
+ draw_conf.prim_type = DI_PT_RECTLIST;
+ draw_conf.num_indices = 4;
+ draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
+ draw_conf.index_type = DI_INDEX_SIZE_32_BIT;
+ draw_conf.num_instances = 1;
+
+ vs_addr = upload (adapt, vs, sizeof(vs), 0);
+ ps_addr = upload (adapt, ps, sizeof(ps), 4096);
+
+ tex = create_sample_texture (tex_w, tex_h, tex_p);
+ tex_addr = upload (adapt, tex, tex_p * tex_h * sizeof(uint32_t), 8192);
+ free (tex);
+
+ printf("win blit r6xx\n");
+
+ start_3d(adapt);
+
+ wait_3d_idle_clean();
+
+ cp_set_surface_sync();
+
+ set_default_state(adapt);
+
+ ereg (CB_TARGET_MASK, TARGET0_ENABLE_mask);
+ ereg (CB_SHADER_MASK, OUTPUT0_ENABLE_mask);
+
+
+ cb_conf.id = 0;
+ cb_conf.w = surface_w;
+ cb_conf.h = surface_h;
+ cb_conf.base = adapt->display_gpu;
+ cb_conf.format = 0x1a;
+ cb_conf.comp_swap = 1;
+ cb_conf.source_format = 1;
+ cb_conf.blend_clamp = 1;
+ set_render_target(adapt, &cb_conf);
+
+ ereg (DB_DEPTH_INFO, 0);
+
+ tex_res.id = 0;
+ tex_res.w = tex_w;
+ tex_res.h = tex_h;
+ tex_res.pitch = tex_p;
+ tex_res.depth = 0;
+ tex_res.dim = 1; //2D
+ tex_res.base = tex_addr;
+ tex_res.mip_base = tex_addr;
+ tex_res.format = FMT_8_8_8_8;
+ tex_res.request_size = 2;
+ tex_res.dst_sel_x = 0;
+ tex_res.dst_sel_y = 1;
+ tex_res.dst_sel_z = 2;
+ tex_res.dst_sel_w = 3;
+ tex_res.last_level = 1;
+ tex_res.perf_modulation = 1;
+ set_tex_resource(adapt, &tex_res);
+
+ set_generic_scissor(0, 0, 8192, 8192);
+
+ vs_conf.shader_addr = vs_addr;
+ vs_conf.num_gprs = 2;
+ vs_conf.stack_size = 0;
+ vs_setup(adapt, &vs_conf);
+
+ ereg (SPI_VS_OUT_CONFIG, (VS_PER_COMPONENT_bit |
+ ((1 - 1) << VS_EXPORT_COUNT_shift)));
+
+ ereg (VGT_VERTEX_REUSE_BLOCK_CNTL, (14 << VTX_REUSE_DEPTH_shift));
+ ereg (VGT_OUT_DEALLOC_CNTL, (16 << DEALLOC_DIST_shift));
+
+ ereg (SPI_VS_OUT_ID_0, 0);
+
+ ps_conf.shader_addr = ps_addr;
+ ps_conf.num_gprs = 1;
+ ps_conf.stack_size = 0;
+ ps_conf.uncached_first_inst = 1;
+ ps_conf.export_mode = 2;
+ ps_setup(adapt, &ps_conf);
+
+ ereg (SPI_PS_IN_CONTROL_0, (((2 - 1) << NUM_INTERP_shift) |
+ ((1 << BARYC_SAMPLE_CNTL_shift)) |
+ PERSP_GRADIENT_ENA_bit));
+ ereg (SPI_PS_IN_CONTROL_1, 0);
+
+ ereg (CB_SHADER_MASK, OUTPUT0_ENABLE_mask);
+
+ ereg (DB_SHADER_CONTROL, (1 << Z_ORDER_shift)); /* EARLY_Z_THEN_LATE_Z */
+
+ ereg (SPI_PS_INPUT_CNTL_0, (1 << DEFAULT_VAL_shift));
+
+ tex_samp.id = 0;
+ set_tex_sampler (adapt, &tex_samp);
+
+ wait_3d_idle();
+
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x01000003);
+ ereg (DB_DEBUG, PREZ_MUST_WAIT_FOR_POSTZ_DONE);
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x01000003);
+ ereg (PA_SC_MULTI_CHIP_CNTL, 0);
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x02000003);
+ ereg (PA_SC_MULTI_CHIP_CNTL, 0);
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x04000003);
+ ereg (PA_SC_MULTI_CHIP_CNTL, 0);
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x08000003);
+ ereg (PA_SC_MULTI_CHIP_CNTL, 0);
+
+ ereg (VGT_OUTPUT_PATH_CNTL, (0 << PATH_SELECT_shift)); // VGT_OUTPATH_VTX_REUSE
+ ereg (VGT_PRIMITIVEID_EN, 0);
+ ereg (VGT_MULTI_PRIM_IB_RESET_EN, 0); // IB-based prims disabled
+ ereg (VGT_STRMOUT_EN, 0); // strmout off
+ ereg (VGT_REUSE_OFF, 0); // reuse on
+ ereg (VGT_VTX_CNT_EN, 0); // auto index gen off
+ ereg (VGT_STRMOUT_BUFFER_EN, 0); // all strmout buffers disabled
+ ereg (VGT_GS_MODE, 0); // GS off
+ ereg (VGT_MAX_VTX_INDX, draw_conf.num_indices);
+ ereg (VGT_MIN_VTX_INDX, 0);
+ ereg (VGT_INDX_OFFSET, 0);
+ ereg (VGT_INSTANCE_STEP_RATE_0, 0);
+ ereg (VGT_INSTANCE_STEP_RATE_1, 0);
+ ereg (VGT_MULTI_PRIM_IB_RESET_INDX, 0);
+
+ set_screen_scissor(0, 0, 8192, 8192);
+ ereg (PA_SC_WINDOW_OFFSET, 0);
+ set_window_scissor(0, 0, 8192, 8192);
+
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x01000003);
+ ereg (PA_SC_MODE_CNTL, FORCE_EOV_CNTDWN_ENABLE_bit);
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x02000003);
+ ereg (PA_SC_MODE_CNTL, FORCE_EOV_CNTDWN_ENABLE_bit);
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x04000003);
+ ereg (PA_SC_MODE_CNTL, FORCE_EOV_CNTDWN_ENABLE_bit);
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x08000003);
+ ereg (PA_SC_MODE_CNTL, FORCE_EOV_CNTDWN_ENABLE_bit);
+ ereg (PA_SC_AA_CONFIG, 0);
+ ereg (PA_SC_AA_MASK, 0xFFFFFFFF);
+ ereg (PA_SC_CLIPRECT_RULE, 0x0000FFFF);
+
+ set_vport_scissor(0, 0, 0, 8192, 8192);
+
+ ereg (PA_SC_VPORT_ZMIN_0, 0x00000000); // 0.0
+ ereg (PA_SC_VPORT_ZMAX_0, 0x3F800000); // 1.0
+ ereg (PA_CL_CLIP_CNTL, CLIP_DISABLE_bit);
+ ereg (PA_SU_SC_MODE_CNTL, FACE_bit);
+ ereg (PA_CL_VTE_CNTL, (VTX_XY_FMT_bit |
+ VTX_Z_FMT_bit));
+
+ ereg (PA_SU_POINT_SIZE, 0);
+ ereg (PA_SU_POINT_MINMAX, 0);
+ pack0 (0x00028C0C, 4);
+ efloat(1.0f); // PA_CL_GB_VERT_CLIP_ADJ = 0x0000A303
+ efloat(1.0f); // PA_CL_GB_VERT_DISC_ADJ = 0x0000A304
+ efloat(1.0f); // PA_CL_GB_HORZ_CLIP_ADJ = 0x0000A305
+ efloat(1.0f); // PA_CL_GB_HORZ_DISC_ADJ = 0x0000A306
+ ereg (PA_SC_LINE_STIPPLE, 0);
+ ereg (PA_SC_MPASS_PS_CNTL, 0);
+ ereg (PA_SU_LINE_CNTL, (8 << PA_SU_LINE_CNTL__WIDTH_shift)); // 16.0 fixed
+ ereg (PA_SU_VTX_CNTL, ((PIX_CENTER_bit) |
+ (2 << PA_SU_VTX_CNTL__ROUND_MODE_shift)));
+ ereg (PA_SU_POLY_OFFSET_CLAMP, 0);
+ ereg (PA_CL_VS_OUT_CNTL, 0);
+
+ ereg (SPI_INTERP_CONTROL_0, 0);
+
+ ereg (DB_ALPHA_TO_MASK, 0);
+ ereg (DB_DEPTH_CONTROL, BACKFACE_ENABLE_bit);
+ ereg (DB_RENDER_OVERRIDE, 0);
+ ereg (DB_SHADER_CONTROL, DUAL_EXPORT_ENABLE_bit);
+ ereg (DB_STENCILREFMASK, 0);
+ ereg (DB_STENCILREFMASK_BF, 0);
+ ereg (DB_DEPTH_CLEAR, 0); // float 0.0
+ ereg (DB_STENCIL_CLEAR, 0);
+ ereg (DB_RENDER_CONTROL, ZPASS_INCREMENT_DISABLE_bit);
+ ereg (DB_SRESULTS_COMPARE_STATE0, 0);
+ ereg (DB_SRESULTS_COMPARE_STATE1, 0);
+
+ ereg (CB_TARGET_MASK, TARGET0_ENABLE_mask);
+ ereg (CB_SHADER_MASK, OUTPUT0_ENABLE_mask);
+ ereg (CB_CLRCMP_CONTROL, ((0 << CLRCMP_FCN_SRC_shift) |
+ (0 << CLRCMP_FCN_DST_shift) |
+ (2 << CLRCMP_FCN_SEL_shift)));
+ ereg (CB_CLRCMP_DST, 0x00000000);
+ ereg (CB_CLRCMP_MSK, 0xFFFFFFFF);
+
+ ereg (SPI_FOG_FUNC_BIAS + (39 << 2), 0x00000001); // SPI_FOG_FUNC_BIAS + 0x27
+ ereg (SPI_FOG_FUNC_BIAS + (40 << 2), 0x00000001); // SPI_FOG_FUNC_BIAS + 0x28
+ ereg (SPI_FOG_FUNC_BIAS + (41 << 2), 0x00000001); // SPI_FOG_FUNC_BIAS + 0x29
+ ereg (SPI_FOG_FUNC_BIAS + (42 << 2), 0x00000001); // SPI_FOG_FUNC_BIAS + 0x2A
+ ereg (SPI_FOG_FUNC_BIAS + (43 << 2), 0x00000001); // SPI_FOG_FUNC_BIAS + 0x2B
+ ereg (SPI_FOG_FUNC_BIAS + (44 << 2), 0x00000001); // SPI_FOG_FUNC_BIAS + 0x2C
+ ereg (SPI_FOG_FUNC_BIAS + (45 << 2), 0x00000001); // SPI_FOG_FUNC_BIAS + 0x2D
+ ereg (SPI_FOG_FUNC_BIAS + (46 << 2), 0x00000001); // SPI_FOG_FUNC_BIAS + 0x2E
+
+ ereg (CB_COLOR_CONTROL, (0xcc << ROP3_shift)); // copy
+
+ // VS alu constants
+ set_alu_consts(0x400 >> 2, sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
+
+ draw_auto(adapt, &draw_conf);
+
+ wait_3d_idle_clean();
+
+}
+
+void pm4play_blit_test_r7xx(adapter_t *adapt)
+{
+ int surface_w = adapt->color_pitch;
+ int surface_h = adapt->color_height;
+ int tex_w = TEX_WIDTH;
+ int tex_h = TEX_HEIGHT;
+ int tex_p = TEX_PITCH;
+ uint32_t *tex;
+ uint64_t vs_addr, ps_addr, tex_addr;
+ draw_config_t draw_conf;
+ cb_config_t cb_conf;
+ tex_resource_t tex_res;
+ tex_sampler_t tex_samp;
+ shader_config_t vs_conf, ps_conf;
+ int i;
+
+ // 4 DWs per const
+ float ps_alu_consts[] = {
+ 0.6509804129600525, 0.7921569347381592, 0.9411765336990356, 0.0,
+ };
+
+ float vs_alu_consts[] = {
+ 3.0, 22.0, 0.0, 1.0,
+ 0.0, 0.0, 0.41568630933761597, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ 259.0, 22.0, 0.0, 1.0,
+ 1.0, 0.0, 0.9411765336990356, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ 259.0, 278.0, 0.0, 1.0,
+ 1.0, 1.0, 0.9411765336990356, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ 3.0, 278.0, 0.0, 1.0,
+ 0.0, 1.0, 0.41568630933761597, 0.0,
+ 0.0, 0.0, 0.0, 0.0,
+ };
+
+ uint32_t vs[] = {
+ CF_ALU_DWORD0(ADDR(3),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP)),
+ CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ COUNT(12),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0)),
+ ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MAX_INT),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_PS),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_LITERAL),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PS),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(127),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ 0x40000000,
+ 0x00000000,
+ ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_PV),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_SETNE),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(257),
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(257),
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(257),
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0)),
+ ALU_DWORD0(SRC0_SEL(257),
+ SRC0_REL(RELATIVE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1)),
+ ALU_DWORD1_OP2(CHIPSET_R600, /* FIXME: Macro arguments are wrong for CHIPSET_RV770 */
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_M4),
+ ALU_INST(SQ_OP2_INST_SETE_DX10),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0)),
+ };
+
+ uint32_t ps[] = {
+ CF_DWORD0(ADDR(2)),
+ CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ COUNT(1),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(1),
+ CF_INST(SQ_CF_INST_TEX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ BC_FRAC_MODE(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ R7xx_ALT_CONST(0)),
+ TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED)),
+ TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_X),
+ SRC_SEL_W(SQ_SEL_X)),
+ TEX_DWORD_PAD,
+ };
+
+ printf ("\n* PM4 Play Blit Test r7xx\n\n");
+
+ memset(&vs_conf, 0, sizeof(shader_config_t));
+ memset(&ps_conf, 0, sizeof(shader_config_t));
+ memset(&cb_conf, 0, sizeof(cb_config_t));
+ memset(&tex_res, 0, sizeof(tex_resource_t));
+ memset(&tex_samp, 0, sizeof(tex_sampler_t));
+
+ draw_conf.prim_type = DI_PT_RECTLIST;
+ draw_conf.num_indices = 4;
+ draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
+ draw_conf.index_type = DI_INDEX_SIZE_32_BIT;
+ draw_conf.num_instances = 1;
+
+ vs_addr = upload (adapt, vs, sizeof(vs), 0);
+ ps_addr = upload (adapt, ps, sizeof(ps), 4096);
+
+ tex = create_sample_texture (tex_w, tex_h, tex_p);
+ tex_addr = upload (adapt, tex, tex_p * tex_h * sizeof(uint32_t), 8192);
+ free (tex);
+
+ printf("win blit r7xx\n");
+
+ start_3d(adapt);
+
+ wait_3d_idle_clean();
+
+ cp_set_surface_sync();
+
+ set_default_state(adapt);
+
+ ereg (CB_TARGET_MASK, TARGET0_ENABLE_mask);
+ ereg (CB_SHADER_MASK, OUTPUT0_ENABLE_mask);
+
+ cb_conf.id = 0;
+ cb_conf.w = surface_w;
+ cb_conf.h = surface_h;
+ cb_conf.base = adapt->display_gpu;
+ cb_conf.format = 0x1a;
+ cb_conf.comp_swap = 1;
+ cb_conf.source_format = 1;
+ cb_conf.blend_clamp = 1;
+ set_render_target(adapt, &cb_conf);
+
+ ereg (DB_DEPTH_INFO, 0);
+
+ tex_res.id = 0;
+ tex_res.w = tex_w;
+ tex_res.h = tex_h;
+ tex_res.pitch = tex_p;
+ tex_res.depth = 0;
+ tex_res.dim = 1; //2D
+ tex_res.base = tex_addr;
+ tex_res.mip_base = tex_addr;
+ tex_res.format = FMT_8_8_8_8;
+ tex_res.request_size = 2;
+ tex_res.dst_sel_x = 0;
+ tex_res.dst_sel_y = 1;
+ tex_res.dst_sel_z = 2;
+ tex_res.dst_sel_w = 3;
+ tex_res.last_level = 1;
+ tex_res.perf_modulation = 1;
+ set_tex_resource(adapt, &tex_res);
+
+ set_generic_scissor(0, 0, 8192, 8192);
+
+ vs_conf.shader_addr = vs_addr;
+ vs_conf.num_gprs = 2;
+ vs_conf.stack_size = 0;
+ vs_setup(adapt, &vs_conf);
+
+ ereg (SPI_VS_OUT_CONFIG, (VS_PER_COMPONENT_bit |
+ ((1 - 1) << VS_EXPORT_COUNT_shift)));
+
+ ereg (VGT_VERTEX_REUSE_BLOCK_CNTL, (14 << VTX_REUSE_DEPTH_shift));
+ ereg (VGT_OUT_DEALLOC_CNTL, (16 << DEALLOC_DIST_shift));
+
+ ereg (SPI_VS_OUT_ID_0, 0x00000000);
+
+ ps_conf.shader_addr = ps_addr;
+ ps_conf.num_gprs = 1;
+ ps_conf.stack_size = 0;
+ ps_conf.uncached_first_inst = 1;
+ ps_conf.export_mode = 2;
+ ps_setup(adapt, &ps_conf);
+
+ ereg (SPI_PS_IN_CONTROL_0, (((1 - 1) << NUM_INTERP_shift) |
+ (1 << BARYC_SAMPLE_CNTL_shift)));
+ ereg (SPI_PS_IN_CONTROL_1, 0);
+
+ ereg (CB_SHADER_MASK, OUTPUT0_ENABLE_mask);
+ ereg (R7xx_CB_SHADER_CONTROL, RT0_ENABLE_bit);
+
+ ereg (DB_SHADER_CONTROL, (1 << Z_ORDER_shift)); /* EARLY_Z_THEN_LATE_Z */
+
+ ereg (SPI_PS_INPUT_CNTL_0, (1 << DEFAULT_VAL_shift));
+
+ tex_samp.id = 0;
+ set_tex_sampler (adapt, &tex_samp);
+
+ wait_3d_idle();
+
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x01000003);
+
+ ereg (DB_DEBUG, 0);
+
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x01000003);
+
+ ereg (PA_SC_MULTI_CHIP_CNTL, 0);
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x02000003);
+ ereg (PA_SC_MULTI_CHIP_CNTL, 0);
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x04000003);
+ ereg (PA_SC_MULTI_CHIP_CNTL, 0);
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x08000003);
+ ereg (PA_SC_MULTI_CHIP_CNTL, 0);
+
+
+ ereg (R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit);
+
+ ereg (VGT_OUTPUT_PATH_CNTL, (0 << PATH_SELECT_shift)); // VGT_OUTPATH_VTX_REUSE
+ ereg (VGT_PRIMITIVEID_EN, 0);
+ ereg (VGT_MULTI_PRIM_IB_RESET_EN, 0); // IB-based prims disabled
+ ereg (VGT_STRMOUT_EN, 0); // strmout off
+ ereg (VGT_REUSE_OFF, 0); // reuse on
+ ereg (VGT_VTX_CNT_EN, 0); // auto index gen off
+ ereg (VGT_STRMOUT_BUFFER_EN, 0); // all strmout buffers disabled
+ ereg (VGT_GS_MODE, 0); // GS off
+ ereg (VGT_MAX_VTX_INDX, draw_conf.num_indices);
+ ereg (VGT_MIN_VTX_INDX, 0);
+ ereg (VGT_INDX_OFFSET, 0);
+ ereg (VGT_INSTANCE_STEP_RATE_0, 0);
+ ereg (VGT_INSTANCE_STEP_RATE_1, 0);
+ ereg (VGT_MULTI_PRIM_IB_RESET_INDX, 0);
+
+ set_screen_scissor(0, 0, 8192, 8192);
+ ereg (PA_SC_WINDOW_OFFSET, 0);
+ set_window_scissor(0, 0, 8192, 8192);
+
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x01000003);
+ ereg (PA_SC_MODE_CNTL, (FORCE_EOV_CNTDWN_ENABLE_bit |
+ FORCE_EOV_REZ_ENABLE_bit));
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x02000003);
+ ereg (PA_SC_MODE_CNTL, (FORCE_EOV_CNTDWN_ENABLE_bit |
+ FORCE_EOV_REZ_ENABLE_bit));
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x04000003);
+ ereg (PA_SC_MODE_CNTL, (FORCE_EOV_CNTDWN_ENABLE_bit |
+ FORCE_EOV_REZ_ENABLE_bit));
+ pack3 (IT_PRED_EXEC, 1);
+ e32 (0x08000003);
+ ereg (PA_SC_MODE_CNTL, (FORCE_EOV_CNTDWN_ENABLE_bit |
+ FORCE_EOV_REZ_ENABLE_bit));
+ ereg (PA_SC_AA_CONFIG, 0);
+ ereg (PA_SC_AA_MASK, 0xFFFFFFFF);
+ ereg (PA_SC_CLIPRECT_RULE, 0x0000FFFF);
+
+ set_vport_scissor(0, 0, 0, 8192, 8192);
+
+ ereg (PA_SC_VPORT_ZMIN_0, 0x00000000); // 0.0
+ ereg (PA_SC_VPORT_ZMAX_0, 0x3F800000); // 1.0
+ ereg (PA_CL_CLIP_CNTL, CLIP_DISABLE_bit);
+ ereg (PA_SU_SC_MODE_CNTL, FACE_bit);
+ ereg (PA_CL_VTE_CNTL, (VTX_XY_FMT_bit |
+ VTX_Z_FMT_bit));
+
+ ereg (PA_SU_POINT_SIZE, 0);
+ ereg (PA_SU_POINT_MINMAX, 0);
+ pack0 (0x00028C0C, 4);
+ efloat(1.0f); // PA_CL_GB_VERT_CLIP_ADJ = 0x0000A303
+ efloat(1.0f); // PA_CL_GB_VERT_DISC_ADJ = 0x0000A304
+ efloat(1.0f); // PA_CL_GB_HORZ_CLIP_ADJ = 0x0000A305
+ efloat(1.0f); // PA_CL_GB_HORZ_DISC_ADJ = 0x0000A306
+ ereg (PA_SC_LINE_STIPPLE, 0);
+ ereg (PA_SC_MPASS_PS_CNTL, 0);
+ ereg (PA_SU_LINE_CNTL, (8 << PA_SU_LINE_CNTL__WIDTH_shift)); // 16.0 fixed
+ ereg (PA_SU_VTX_CNTL, ((PIX_CENTER_bit) |
+ (2 << PA_SU_VTX_CNTL__ROUND_MODE_shift)));
+ ereg (PA_SU_POLY_OFFSET_CLAMP, 0);
+ ereg (PA_CL_VS_OUT_CNTL, 0);
+
+ ereg (SPI_INTERP_CONTROL_0, 0);
+ ereg (SPI_INPUT_Z, 0);
+ ereg (SPI_FOG_CNTL, 0);
+ ereg (SPI_FOG_FUNC_SCALE, 0x00000000);
+ ereg (SPI_FOG_FUNC_BIAS, 0x00000000);
+
+ ereg (SX_ALPHA_TEST_CONTROL, 0);
+ ereg (SX_ALPHA_REF, 0x00000000); // 0.0
+
+ ereg (DB_ALPHA_TO_MASK, 0);
+ ereg (DB_DEPTH_CONTROL, BACKFACE_ENABLE_bit);
+ ereg (DB_RENDER_OVERRIDE, 0);
+ ereg (DB_SHADER_CONTROL, DUAL_EXPORT_ENABLE_bit);
+ ereg (DB_STENCILREFMASK, 0);
+ ereg (DB_STENCILREFMASK_BF, 0);
+ ereg (DB_DEPTH_CLEAR, 0); // float 0.0
+ ereg (DB_STENCIL_CLEAR, 0);
+ ereg (DB_RENDER_CONTROL, ZPASS_INCREMENT_DISABLE_bit);
+ ereg (DB_SRESULTS_COMPARE_STATE0, 0);
+ ereg (DB_SRESULTS_COMPARE_STATE1, 0);
+
+ ereg (CB_TARGET_MASK, TARGET0_ENABLE_mask);
+ ereg (CB_SHADER_MASK, OUTPUT0_ENABLE_mask);
+ ereg (CB_CLRCMP_CONTROL, ((0 << CLRCMP_FCN_SRC_shift) |
+ (0 << CLRCMP_FCN_DST_shift) |
+ (2 << CLRCMP_FCN_SEL_shift)));
+ ereg (CB_CLRCMP_DST, 0x00000000);
+ ereg (CB_CLRCMP_MSK, 0xFFFFFFFF);
+ pack0 (0x00028414, 4);
+ efloat(0.0f); // CB_BLEND_RED = 0x0000A105
+ efloat(0.0f); // CB_BLEND_GREEN = 0x0000A106
+ efloat(0.0f); // CB_BLEND_BLUE = 0x0000A107
+ efloat(0.0f); // CB_BLEND_ALPHA = 0x0000A108
+
+ for (i = 0; i < CB_BLEND0_CONTROL_num; i++)
+ ereg (CB_BLEND0_CONTROL + (i << 2), (1 << COLOR_SRCBLEND_shift)); // BLEND_ONE
+ ereg (CB_COLOR_CONTROL, (0xcc << ROP3_shift)); // copy
+
+ ereg (R7xx_PA_SC_EDGERULE, 0xAAAAAAAA);
+ ereg (R7xx_CB_SHADER_CONTROL, RT0_ENABLE_bit);
+
+ ereg (R7xx_SPI_THREAD_GROUPING, (1 << PS_GROUPING_shift));
+
+ // PS alu constants
+ set_alu_consts(0x0 >> 2, sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
+
+ // VS alu constants
+ set_alu_consts(0x400 >> 2, sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
+
+ draw_auto(adapt, &draw_conf);
+
+ wait_3d_idle_clean();
+
+}
+
+void pm4play_blit_test(adapter_t *adapt)
+{
+ if (adapt->chipset <= CHIPSET_RV670)
+ pm4play_blit_test_r6xx(adapt);
+ else
+ pm4play_blit_test_r7xx(adapt);
+}
diff --git a/r600_reg.h b/r600_reg.h
new file mode 100644
index 0000000..43b2d45
--- /dev/null
+++ b/r600_reg.h
@@ -0,0 +1,126 @@
+/*
+ * RadeonHD R6xx, R7xx Register documentation
+ *
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _R600_REG_H_
+#define _R600_REG_H_
+
+/*
+ * Register definitions
+ */
+
+#include "r600_reg_auto_r6xx.h"
+#include "r600_reg_r6xx.h"
+#include "r600_reg_r7xx.h"
+
+
+/* SET_*_REG offsets + ends */
+enum {
+ SET_CONFIG_REG_offset = 0x00008000,
+ SET_CONFIG_REG_end = 0x0000ac00,
+ SET_CONTEXT_REG_offset = 0x00028000,
+ SET_CONTEXT_REG_end = 0x00029000,
+ SET_ALU_CONST_offset = 0x00030000,
+ SET_ALU_CONST_end = 0x00032000,
+ SET_RESOURCE_offset = 0x00038000,
+ SET_RESOURCE_end = 0x0003c000,
+ SET_SAMPLER_offset = 0x0003c000,
+ SET_SAMPLER_end = 0x0003cff0,
+ SET_CTL_CONST_offset = 0x0003cff0,
+ SET_CTL_CONST_end = 0x0003e200,
+ SET_LOOP_CONST_offset = 0x0003e200,
+ SET_LOOP_CONST_end = 0x0003e380,
+ SET_BOOL_CONST_offset = 0x0003e380,
+ SET_BOOL_CONST_end = 0x00040000,
+} ;
+
+/* packet3 IT_SURFACE_BASE_UPDATE bits */
+enum {
+ DEPTH_BASE = (1 << 0),
+ COLOR0_BASE = (1 << 1),
+ COLOR1_BASE = (1 << 2),
+ COLOR2_BASE = (1 << 3),
+ COLOR3_BASE = (1 << 4),
+ COLOR4_BASE = (1 << 5),
+ COLOR5_BASE = (1 << 6),
+ COLOR6_BASE = (1 << 7),
+ COLOR7_BASE = (1 << 8),
+ STRMOUT_BASE0 = (1 << 9),
+ STRMOUT_BASE1 = (1 << 10),
+ STRMOUT_BASE2 = (1 << 11),
+ STRMOUT_BASE3 = (1 << 12),
+ COHER_BASE0 = (1 << 13),
+ COHER_BASE1 = (1 << 14),
+};
+
+/* CP packet types */
+enum {
+ RADEON_CP_PACKET0 = 0x00000000,
+ RADEON_CP_PACKET1 = 0x40000000,
+ RADEON_CP_PACKET2 = 0x80000000,
+ RADEON_CP_PACKET3 = 0xC0000000,
+};
+
+/* Packet3 commands */
+enum {
+ IT_NOP = 0x10,
+ IT_INDIRECT_BUFFER_END = 0x17,
+ IT_SET_PREDICATION = 0x20,
+ IT_REG_RMW = 0x21,
+ IT_COND_EXEC = 0x22,
+ IT_PRED_EXEC = 0x23,
+ IT_START_3D_CMDBUF = 0x24,
+ IT_DRAW_INDEX_2 = 0x27,
+ IT_CONTEXT_CONTROL = 0x28,
+ IT_DRAW_INDEX_IMMD_BE = 0x29,
+ IT_INDEX_TYPE = 0x2A,
+ IT_DRAW_INDEX = 0x2B,
+ IT_DRAW_INDEX_AUTO = 0x2D,
+ IT_DRAW_INDEX_IMMD = 0x2E,
+ IT_NUM_INSTANCES = 0x2F,
+ IT_STRMOUT_BUFFER_UPDATE = 0x34,
+ IT_INDIRECT_BUFFER_MP = 0x38,
+ IT_MEM_SEMAPHORE = 0x39,
+ IT_MPEG_INDEX = 0x3A,
+ IT_WAIT_REG_MEM = 0x3C,
+ IT_MEM_WRITE = 0x3D,
+ IT_INDIRECT_BUFFER = 0x32,
+ IT_CP_INTERRUPT = 0x40,
+ IT_SURFACE_SYNC = 0x43,
+ IT_ME_INITIALIZE = 0x44,
+ IT_COND_WRITE = 0x45,
+ IT_EVENT_WRITE = 0x46,
+ IT_EVENT_WRITE_EOP = 0x47,
+ IT_ONE_REG_WRITE = 0x57,
+ IT_SET_CONFIG_REG = 0x68,
+ IT_SET_CONTEXT_REG = 0x69,
+ IT_SET_ALU_CONST = 0x6A,
+ IT_SET_BOOL_CONST = 0x6B,
+ IT_SET_LOOP_CONST = 0x6C,
+ IT_SET_RESOURCE = 0x6D,
+ IT_SET_SAMPLER = 0x6E,
+ IT_SET_CTL_CONST = 0x6F,
+ IT_SURFACE_BASE_UPDATE = 0x73,
+} ;
+
+#endif
diff --git a/r600_reg_auto_r6xx.h b/r600_reg_auto_r6xx.h
new file mode 100644
index 0000000..9d5aa3c
--- /dev/null
+++ b/r600_reg_auto_r6xx.h
@@ -0,0 +1,3087 @@
+/*
+ * RadeonHD R6xx, R7xx Register documentation
+ *
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _AUTOREGS
+#define _AUTOREGS
+
+enum {
+
+ VGT_VTX_VECT_EJECT_REG = 0x000088b0,
+ PRIM_COUNT_mask = 0x3ff << 0,
+ PRIM_COUNT_shift = 0,
+ VGT_LAST_COPY_STATE = 0x000088c0,
+ SRC_STATE_ID_mask = 0x07 << 0,
+ SRC_STATE_ID_shift = 0,
+ DST_STATE_ID_mask = 0x07 << 16,
+ DST_STATE_ID_shift = 16,
+ VGT_CACHE_INVALIDATION = 0x000088c4,
+ CACHE_INVALIDATION_mask = 0x03 << 0,
+ CACHE_INVALIDATION_shift = 0,
+ VC_ONLY = 0x00,
+ TC_ONLY = 0x01,
+ VC_AND_TC = 0x02,
+ VS_NO_EXTRA_BUFFER_bit = 1 << 5,
+ VGT_GS_PER_ES = 0x000088c8,
+ VGT_ES_PER_GS = 0x000088cc,
+ VGT_GS_VERTEX_REUSE = 0x000088d4,
+ VERT_REUSE_mask = 0x1f << 0,
+ VERT_REUSE_shift = 0,
+ VGT_MC_LAT_CNTL = 0x000088d8,
+ MC_TIME_STAMP_RES_mask = 0x03 << 0,
+ MC_TIME_STAMP_RES_shift = 0,
+ X_0_992_MAX_LATENCY = 0x00,
+ X_0_496_MAX_LATENCY = 0x01,
+ X_0_248_MAX_LATENCY = 0x02,
+ X_0_124_MAX_LATENCY = 0x03,
+ VGT_GS_PER_VS = 0x000088e8,
+ GS_PER_VS_mask = 0x0f << 0,
+ GS_PER_VS_shift = 0,
+ VGT_CNTL_STATUS = 0x000088f0,
+ VGT_OUT_INDX_BUSY_bit = 1 << 0,
+ VGT_OUT_BUSY_bit = 1 << 1,
+ VGT_PT_BUSY_bit = 1 << 2,
+ VGT_TE_BUSY_bit = 1 << 3,
+ VGT_VR_BUSY_bit = 1 << 4,
+ VGT_GRP_BUSY_bit = 1 << 5,
+ VGT_DMA_REQ_BUSY_bit = 1 << 6,
+ VGT_DMA_BUSY_bit = 1 << 7,
+ VGT_GS_BUSY_bit = 1 << 8,
+ VGT_BUSY_bit = 1 << 9,
+ VGT_PRIMITIVE_TYPE = 0x00008958,
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask = 0x3f << 0,
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift = 0,
+ DI_PT_NONE = 0x00,
+ DI_PT_POINTLIST = 0x01,
+ DI_PT_LINELIST = 0x02,
+ DI_PT_LINESTRIP = 0x03,
+ DI_PT_TRILIST = 0x04,
+ DI_PT_TRIFAN = 0x05,
+ DI_PT_TRISTRIP = 0x06,
+ DI_PT_UNUSED_0 = 0x07,
+ DI_PT_UNUSED_1 = 0x08,
+ DI_PT_UNUSED_2 = 0x09,
+ DI_PT_LINELIST_ADJ = 0x0a,
+ DI_PT_LINESTRIP_ADJ = 0x0b,
+ DI_PT_TRILIST_ADJ = 0x0c,
+ DI_PT_TRISTRIP_ADJ = 0x0d,
+ DI_PT_UNUSED_3 = 0x0e,
+ DI_PT_UNUSED_4 = 0x0f,
+ DI_PT_TRI_WITH_WFLAGS = 0x10,
+ DI_PT_RECTLIST = 0x11,
+ DI_PT_LINELOOP = 0x12,
+ DI_PT_QUADLIST = 0x13,
+ DI_PT_QUADSTRIP = 0x14,
+ DI_PT_POLYGON = 0x15,
+ DI_PT_2D_COPY_RECT_LIST_V0 = 0x16,
+ DI_PT_2D_COPY_RECT_LIST_V1 = 0x17,
+ DI_PT_2D_COPY_RECT_LIST_V2 = 0x18,
+ DI_PT_2D_COPY_RECT_LIST_V3 = 0x19,
+ DI_PT_2D_FILL_RECT_LIST = 0x1a,
+ DI_PT_2D_LINE_STRIP = 0x1b,
+ DI_PT_2D_TRI_STRIP = 0x1c,
+ VGT_INDEX_TYPE = 0x0000895c,
+ INDEX_TYPE_mask = 0x03 << 0,
+ INDEX_TYPE_shift = 0,
+ DI_INDEX_SIZE_16_BIT = 0x00,
+ DI_INDEX_SIZE_32_BIT = 0x01,
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_0 = 0x00008960,
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_1 = 0x00008964,
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_2 = 0x00008968,
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_3 = 0x0000896c,
+ VGT_NUM_INDICES = 0x00008970,
+ VGT_NUM_INSTANCES = 0x00008974,
+ PA_CL_CNTL_STATUS = 0x00008a10,
+ CL_BUSY_bit = 1 << 31,
+ PA_CL_ENHANCE = 0x00008a14,
+ CLIP_VTX_REORDER_ENA_bit = 1 << 0,
+ NUM_CLIP_SEQ_mask = 0x03 << 1,
+ NUM_CLIP_SEQ_shift = 1,
+ CLIPPED_PRIM_SEQ_STALL_bit = 1 << 3,
+ VE_NAN_PROC_DISABLE_bit = 1 << 4,
+ PA_SU_CNTL_STATUS = 0x00008a50,
+ SU_BUSY_bit = 1 << 31,
+ PA_SC_LINE_STIPPLE_STATE = 0x00008b10,
+ CURRENT_PTR_mask = 0x0f << 0,
+ CURRENT_PTR_shift = 0,
+ CURRENT_COUNT_mask = 0xff << 8,
+ CURRENT_COUNT_shift = 8,
+ PA_SC_MULTI_CHIP_CNTL = 0x00008b20,
+ LOG2_NUM_CHIPS_mask = 0x07 << 0,
+ LOG2_NUM_CHIPS_shift = 0,
+ MULTI_CHIP_TILE_SIZE_mask = 0x03 << 3,
+ MULTI_CHIP_TILE_SIZE_shift = 3,
+ X_16_X_16_PIXEL_TILE_PER_CHIP = 0x00,
+ X_32_X_32_PIXEL_TILE_PER_CHIP = 0x01,
+ X_64_X_64_PIXEL_TILE_PER_CHIP = 0x02,
+ X_128X128_PIXEL_TILE_PER_CHIP = 0x03,
+ CHIP_TILE_X_LOC_mask = 0x07 << 5,
+ CHIP_TILE_X_LOC_shift = 5,
+ CHIP_TILE_Y_LOC_mask = 0x07 << 8,
+ CHIP_TILE_Y_LOC_shift = 8,
+ CHIP_SUPER_TILE_B_bit = 1 << 11,
+ PA_SC_AA_SAMPLE_LOCS_2S = 0x00008b40,
+ S0_X_mask = 0x0f << 0,
+ S0_X_shift = 0,
+ S0_Y_mask = 0x0f << 4,
+ S0_Y_shift = 4,
+ S1_X_mask = 0x0f << 8,
+ S1_X_shift = 8,
+ S1_Y_mask = 0x0f << 12,
+ S1_Y_shift = 12,
+ PA_SC_AA_SAMPLE_LOCS_4S = 0x00008b44,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+ S2_X_mask = 0x0f << 16,
+ S2_X_shift = 16,
+ S2_Y_mask = 0x0f << 20,
+ S2_Y_shift = 20,
+ S3_X_mask = 0x0f << 24,
+ S3_X_shift = 24,
+ S3_Y_mask = 0x0f << 28,
+ S3_Y_shift = 28,
+ PA_SC_AA_SAMPLE_LOCS_8S_WD0 = 0x00008b48,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+/* S2_X_mask = 0x0f << 16, */
+/* S2_X_shift = 16, */
+/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_shift = 20, */
+/* S3_X_mask = 0x0f << 24, */
+/* S3_X_shift = 24, */
+/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_shift = 28, */
+ PA_SC_AA_SAMPLE_LOCS_8S_WD1 = 0x00008b4c,
+ S4_X_mask = 0x0f << 0,
+ S4_X_shift = 0,
+ S4_Y_mask = 0x0f << 4,
+ S4_Y_shift = 4,
+ S5_X_mask = 0x0f << 8,
+ S5_X_shift = 8,
+ S5_Y_mask = 0x0f << 12,
+ S5_Y_shift = 12,
+ S6_X_mask = 0x0f << 16,
+ S6_X_shift = 16,
+ S6_Y_mask = 0x0f << 20,
+ S6_Y_shift = 20,
+ S7_X_mask = 0x0f << 24,
+ S7_X_shift = 24,
+ S7_Y_mask = 0x0f << 28,
+ S7_Y_shift = 28,
+ PA_SC_CNTL_STATUS = 0x00008be0,
+ MPASS_OVERFLOW_bit = 1 << 30,
+ PA_SC_ENHANCE = 0x00008bf0,
+ FORCE_EOV_MAX_CLK_CNT_mask = 0xfff << 0,
+ FORCE_EOV_MAX_CLK_CNT_shift = 0,
+ FORCE_EOV_MAX_TILE_CNT_mask = 0xfff << 12,
+ FORCE_EOV_MAX_TILE_CNT_shift = 12,
+ SQ_CONFIG = 0x00008c00,
+ VC_ENABLE_bit = 1 << 0,
+ EXPORT_SRC_C_bit = 1 << 1,
+ DX9_CONSTS_bit = 1 << 2,
+ ALU_INST_PREFER_VECTOR_bit = 1 << 3,
+ SQ_CONFIG__DX10_CLAMP_bit = 1 << 4,
+ ALU_PREFER_ONE_WATERFALL_bit = 1 << 5,
+ ALU_MAX_ONE_WATERFALL_bit = 1 << 6,
+ CLAUSE_SEQ_PRIO_mask = 0x03 << 8,
+ CLAUSE_SEQ_PRIO_shift = 8,
+ SQ_CL_PRIO_RND_ROBIN = 0x00,
+ SQ_CL_PRIO_MACRO_SEQ = 0x01,
+ SQ_CL_PRIO_NONE = 0x02,
+ PS_PRIO_mask = 0x03 << 24,
+ PS_PRIO_shift = 24,
+ VS_PRIO_mask = 0x03 << 26,
+ VS_PRIO_shift = 26,
+ GS_PRIO_mask = 0x03 << 28,
+ GS_PRIO_shift = 28,
+ ES_PRIO_mask = 0x03 << 30,
+ ES_PRIO_shift = 30,
+ SQ_GPR_RESOURCE_MGMT_1 = 0x00008c04,
+ NUM_PS_GPRS_mask = 0xff << 0,
+ NUM_PS_GPRS_shift = 0,
+ NUM_VS_GPRS_mask = 0xff << 16,
+ NUM_VS_GPRS_shift = 16,
+ NUM_CLAUSE_TEMP_GPRS_mask = 0x0f << 28,
+ NUM_CLAUSE_TEMP_GPRS_shift = 28,
+ SQ_GPR_RESOURCE_MGMT_2 = 0x00008c08,
+ NUM_GS_GPRS_mask = 0xff << 0,
+ NUM_GS_GPRS_shift = 0,
+ NUM_ES_GPRS_mask = 0xff << 16,
+ NUM_ES_GPRS_shift = 16,
+ SQ_THREAD_RESOURCE_MGMT = 0x00008c0c,
+ NUM_PS_THREADS_mask = 0xff << 0,
+ NUM_PS_THREADS_shift = 0,
+ NUM_VS_THREADS_mask = 0xff << 8,
+ NUM_VS_THREADS_shift = 8,
+ NUM_GS_THREADS_mask = 0xff << 16,
+ NUM_GS_THREADS_shift = 16,
+ NUM_ES_THREADS_mask = 0xff << 24,
+ NUM_ES_THREADS_shift = 24,
+ SQ_STACK_RESOURCE_MGMT_1 = 0x00008c10,
+ NUM_PS_STACK_ENTRIES_mask = 0xfff << 0,
+ NUM_PS_STACK_ENTRIES_shift = 0,
+ NUM_VS_STACK_ENTRIES_mask = 0xfff << 16,
+ NUM_VS_STACK_ENTRIES_shift = 16,
+ SQ_STACK_RESOURCE_MGMT_2 = 0x00008c14,
+ NUM_GS_STACK_ENTRIES_mask = 0xfff << 0,
+ NUM_GS_STACK_ENTRIES_shift = 0,
+ NUM_ES_STACK_ENTRIES_mask = 0xfff << 16,
+ NUM_ES_STACK_ENTRIES_shift = 16,
+ SQ_ESGS_RING_BASE = 0x00008c40,
+ SQ_ESGS_RING_SIZE = 0x00008c44,
+ SQ_GSVS_RING_BASE = 0x00008c48,
+ SQ_GSVS_RING_SIZE = 0x00008c4c,
+ SQ_ESTMP_RING_BASE = 0x00008c50,
+ SQ_ESTMP_RING_SIZE = 0x00008c54,
+ SQ_GSTMP_RING_BASE = 0x00008c58,
+ SQ_GSTMP_RING_SIZE = 0x00008c5c,
+ SQ_VSTMP_RING_BASE = 0x00008c60,
+ SQ_VSTMP_RING_SIZE = 0x00008c64,
+ SQ_PSTMP_RING_BASE = 0x00008c68,
+ SQ_PSTMP_RING_SIZE = 0x00008c6c,
+ SQ_FBUF_RING_BASE = 0x00008c70,
+ SQ_FBUF_RING_SIZE = 0x00008c74,
+ SQ_REDUC_RING_BASE = 0x00008c78,
+ SQ_REDUC_RING_SIZE = 0x00008c7c,
+ SQ_ALU_WORD1_OP3 = 0x00008dfc,
+ SRC2_SEL_mask = 0x1ff << 0,
+ SRC2_SEL_shift = 0,
+ SQ_ALU_SRC_0 = 0xf8,
+ SQ_ALU_SRC_1 = 0xf9,
+ SQ_ALU_SRC_1_INT = 0xfa,
+ SQ_ALU_SRC_M_1_INT = 0xfb,
+ SQ_ALU_SRC_0_5 = 0xfc,
+ SQ_ALU_SRC_LITERAL = 0xfd,
+ SQ_ALU_SRC_PV = 0xfe,
+ SQ_ALU_SRC_PS = 0xff,
+ SRC2_REL_bit = 1 << 9,
+ SRC2_CHAN_mask = 0x03 << 10,
+ SRC2_CHAN_shift = 10,
+ SQ_CHAN_X = 0x00,
+ SQ_CHAN_Y = 0x01,
+ SQ_CHAN_Z = 0x02,
+ SQ_CHAN_W = 0x03,
+ SRC2_NEG_bit = 1 << 12,
+ SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13,
+ SQ_ALU_WORD1_OP3__ALU_INST_shift = 13,
+ SQ_OP3_INST_MUL_LIT = 0x0c,
+ SQ_OP3_INST_MUL_LIT_M2 = 0x0d,
+ SQ_OP3_INST_MUL_LIT_M4 = 0x0e,
+ SQ_OP3_INST_MUL_LIT_D2 = 0x0f,
+ SQ_OP3_INST_MULADD = 0x10,
+ SQ_OP3_INST_MULADD_M2 = 0x11,
+ SQ_OP3_INST_MULADD_M4 = 0x12,
+ SQ_OP3_INST_MULADD_D2 = 0x13,
+ SQ_OP3_INST_MULADD_IEEE = 0x14,
+ SQ_OP3_INST_MULADD_IEEE_M2 = 0x15,
+ SQ_OP3_INST_MULADD_IEEE_M4 = 0x16,
+ SQ_OP3_INST_MULADD_IEEE_D2 = 0x17,
+ SQ_OP3_INST_CNDE = 0x18,
+ SQ_OP3_INST_CNDGT = 0x19,
+ SQ_OP3_INST_CNDGE = 0x1a,
+ SQ_OP3_INST_CNDE_INT = 0x1c,
+ SQ_OP3_INST_CNDGT_INT = 0x1d,
+ SQ_OP3_INST_CNDGE_INT = 0x1e,
+ SQ_TEX_WORD2 = 0x00008dfc,
+ OFFSET_X_mask = 0x1f << 0,
+ OFFSET_X_shift = 0,
+ OFFSET_Y_mask = 0x1f << 5,
+ OFFSET_Y_shift = 5,
+ OFFSET_Z_mask = 0x1f << 10,
+ OFFSET_Z_shift = 10,
+ SAMPLER_ID_mask = 0x1f << 15,
+ SAMPLER_ID_shift = 15,
+ SQ_TEX_WORD2__SRC_SEL_X_mask = 0x07 << 20,
+ SQ_TEX_WORD2__SRC_SEL_X_shift = 20,
+ SQ_SEL_X = 0x00,
+ SQ_SEL_Y = 0x01,
+ SQ_SEL_Z = 0x02,
+ SQ_SEL_W = 0x03,
+ SQ_SEL_0 = 0x04,
+ SQ_SEL_1 = 0x05,
+ SRC_SEL_Y_mask = 0x07 << 23,
+ SRC_SEL_Y_shift = 23,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SRC_SEL_Z_mask = 0x07 << 26,
+ SRC_SEL_Z_shift = 26,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SRC_SEL_W_mask = 0x07 << 29,
+ SRC_SEL_W_shift = 29,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc,
+ BURST_COUNT_mask = 0x0f << 17,
+ BURST_COUNT_shift = 17,
+ END_OF_PROGRAM_bit = 1 << 21,
+ VALID_PIXEL_MODE_bit = 1 << 22,
+ SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0x7f << 23,
+ SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_shift = 23,
+ SQ_CF_INST_MEM_STREAM0 = 0x20,
+ SQ_CF_INST_MEM_STREAM1 = 0x21,
+ SQ_CF_INST_MEM_STREAM2 = 0x22,
+ SQ_CF_INST_MEM_STREAM3 = 0x23,
+ SQ_CF_INST_MEM_SCRATCH = 0x24,
+ SQ_CF_INST_MEM_REDUCTION = 0x25,
+ SQ_CF_INST_MEM_RING = 0x26,
+ SQ_CF_INST_EXPORT = 0x27,
+ SQ_CF_INST_EXPORT_DONE = 0x28,
+ WHOLE_QUAD_MODE_bit = 1 << 30,
+ BARRIER_bit = 1 << 31,
+ SQ_CF_ALU_WORD1 = 0x00008dfc,
+ KCACHE_MODE1_mask = 0x03 << 0,
+ KCACHE_MODE1_shift = 0,
+ SQ_CF_KCACHE_NOP = 0x00,
+ SQ_CF_KCACHE_LOCK_1 = 0x01,
+ SQ_CF_KCACHE_LOCK_2 = 0x02,
+ SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03,
+ KCACHE_ADDR0_mask = 0xff << 2,
+ KCACHE_ADDR0_shift = 2,
+ KCACHE_ADDR1_mask = 0xff << 10,
+ KCACHE_ADDR1_shift = 10,
+ SQ_CF_ALU_WORD1__COUNT_mask = 0x7f << 18,
+ SQ_CF_ALU_WORD1__COUNT_shift = 18,
+ SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25,
+ SQ_CF_ALU_WORD1__CF_INST_mask = 0x0f << 26,
+ SQ_CF_ALU_WORD1__CF_INST_shift = 26,
+ SQ_CF_INST_ALU = 0x08,
+ SQ_CF_INST_ALU_PUSH_BEFORE = 0x09,
+ SQ_CF_INST_ALU_POP_AFTER = 0x0a,
+ SQ_CF_INST_ALU_POP2_AFTER = 0x0b,
+ SQ_CF_INST_ALU_CONTINUE = 0x0d,
+ SQ_CF_INST_ALU_BREAK = 0x0e,
+ SQ_CF_INST_ALU_ELSE_AFTER = 0x0f,
+/* WHOLE_QUAD_MODE_bit = 1 << 30, */
+/* BARRIER_bit = 1 << 31, */
+ SQ_TEX_WORD1 = 0x00008dfc,
+ SQ_TEX_WORD1__DST_GPR_mask = 0x7f << 0,
+ SQ_TEX_WORD1__DST_GPR_shift = 0,
+ SQ_TEX_WORD1__DST_REL_bit = 1 << 7,
+ SQ_TEX_WORD1__DST_SEL_X_mask = 0x07 << 9,
+ SQ_TEX_WORD1__DST_SEL_X_shift = 9,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_SEL_MASK = 0x07,
+ SQ_TEX_WORD1__DST_SEL_Y_mask = 0x07 << 12,
+ SQ_TEX_WORD1__DST_SEL_Y_shift = 12,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_TEX_WORD1__DST_SEL_Z_mask = 0x07 << 15,
+ SQ_TEX_WORD1__DST_SEL_Z_shift = 15,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_TEX_WORD1__DST_SEL_W_mask = 0x07 << 18,
+ SQ_TEX_WORD1__DST_SEL_W_shift = 18,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_TEX_WORD1__LOD_BIAS_mask = 0x7f << 21,
+ SQ_TEX_WORD1__LOD_BIAS_shift = 21,
+ COORD_TYPE_X_bit = 1 << 28,
+ COORD_TYPE_Y_bit = 1 << 29,
+ COORD_TYPE_Z_bit = 1 << 30,
+ COORD_TYPE_W_bit = 1 << 31,
+ SQ_VTX_WORD0 = 0x00008dfc,
+ VTX_INST_mask = 0x1f << 0,
+ VTX_INST_shift = 0,
+ SQ_VTX_INST_FETCH = 0x00,
+ SQ_VTX_INST_SEMANTIC = 0x01,
+ FETCH_TYPE_mask = 0x03 << 5,
+ FETCH_TYPE_shift = 5,
+ SQ_VTX_FETCH_VERTEX_DATA = 0x00,
+ SQ_VTX_FETCH_INSTANCE_DATA = 0x01,
+ SQ_VTX_FETCH_NO_INDEX_OFFSET = 0x02,
+ FETCH_WHOLE_QUAD_bit = 1 << 7,
+ BUFFER_ID_mask = 0xff << 8,
+ BUFFER_ID_shift = 8,
+ SRC_GPR_mask = 0x7f << 16,
+ SRC_GPR_shift = 16,
+ SRC_REL_bit = 1 << 23,
+ SQ_VTX_WORD0__SRC_SEL_X_mask = 0x03 << 24,
+ SQ_VTX_WORD0__SRC_SEL_X_shift = 24,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+ MEGA_FETCH_COUNT_mask = 0x3f << 26,
+ MEGA_FETCH_COUNT_shift = 26,
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ = 0x00008dfc,
+ SEL_X_mask = 0x07 << 0,
+ SEL_X_shift = 0,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SEL_Y_mask = 0x07 << 3,
+ SEL_Y_shift = 3,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SEL_Z_mask = 0x07 << 6,
+ SEL_Z_shift = 6,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SEL_W_mask = 0x07 << 9,
+ SEL_W_shift = 9,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_ALU_WORD1 = 0x00008dfc,
+ ENCODING_mask = 0x07 << 15,
+ ENCODING_shift = 15,
+ BANK_SWIZZLE_mask = 0x07 << 18,
+ BANK_SWIZZLE_shift = 18,
+ SQ_ALU_VEC_012 = 0x00,
+ SQ_ALU_VEC_021 = 0x01,
+ SQ_ALU_VEC_120 = 0x02,
+ SQ_ALU_VEC_102 = 0x03,
+ SQ_ALU_VEC_201 = 0x04,
+ SQ_ALU_VEC_210 = 0x05,
+ SQ_ALU_WORD1__DST_GPR_mask = 0x7f << 21,
+ SQ_ALU_WORD1__DST_GPR_shift = 21,
+ SQ_ALU_WORD1__DST_REL_bit = 1 << 28,
+ DST_CHAN_mask = 0x03 << 29,
+ DST_CHAN_shift = 29,
+ CHAN_X = 0x00,
+ CHAN_Y = 0x01,
+ CHAN_Z = 0x02,
+ CHAN_W = 0x03,
+ SQ_ALU_WORD1__CLAMP_bit = 1 << 31,
+ SQ_CF_ALU_WORD0 = 0x00008dfc,
+ SQ_CF_ALU_WORD0__ADDR_mask = 0x3fffff << 0,
+ SQ_CF_ALU_WORD0__ADDR_shift = 0,
+ KCACHE_BANK0_mask = 0x0f << 22,
+ KCACHE_BANK0_shift = 22,
+ KCACHE_BANK1_mask = 0x0f << 26,
+ KCACHE_BANK1_shift = 26,
+ KCACHE_MODE0_mask = 0x03 << 30,
+ KCACHE_MODE0_shift = 30,
+/* SQ_CF_KCACHE_NOP = 0x00, */
+/* SQ_CF_KCACHE_LOCK_1 = 0x01, */
+/* SQ_CF_KCACHE_LOCK_2 = 0x02, */
+/* SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, */
+ SQ_VTX_WORD2 = 0x00008dfc,
+ SQ_VTX_WORD2__OFFSET_mask = 0xffff << 0,
+ SQ_VTX_WORD2__OFFSET_shift = 0,
+ SQ_VTX_WORD2__ENDIAN_SWAP_mask = 0x03 << 16,
+ SQ_VTX_WORD2__ENDIAN_SWAP_shift = 16,
+ SQ_ENDIAN_NONE = 0x00,
+ SQ_ENDIAN_8IN16 = 0x01,
+ SQ_ENDIAN_8IN32 = 0x02,
+ CONST_BUF_NO_STRIDE_bit = 1 << 18,
+ MEGA_FETCH_bit = 1 << 19,
+ SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20,
+ SQ_ALU_WORD1_OP2_V2 = 0x00008dfc,
+ SRC0_ABS_bit = 1 << 0,
+ SRC1_ABS_bit = 1 << 1,
+ UPDATE_EXECUTE_MASK_bit = 1 << 2,
+ UPDATE_PRED_bit = 1 << 3,
+ WRITE_MASK_bit = 1 << 4,
+ SQ_ALU_WORD1_OP2_V2__OMOD_mask = 0x03 << 5,
+ SQ_ALU_WORD1_OP2_V2__OMOD_shift = 5,
+ SQ_ALU_OMOD_OFF = 0x00,
+ SQ_ALU_OMOD_M2 = 0x01,
+ SQ_ALU_OMOD_M4 = 0x02,
+ SQ_ALU_OMOD_D2 = 0x03,
+ SQ_ALU_WORD1_OP2_V2__ALU_INST_mask = 0x7ff << 7,
+ SQ_ALU_WORD1_OP2_V2__ALU_INST_shift = 7,
+ SQ_OP2_INST_ADD = 0x00,
+ SQ_OP2_INST_MUL = 0x01,
+ SQ_OP2_INST_MUL_IEEE = 0x02,
+ SQ_OP2_INST_MAX = 0x03,
+ SQ_OP2_INST_MIN = 0x04,
+ SQ_OP2_INST_MAX_DX10 = 0x05,
+ SQ_OP2_INST_MIN_DX10 = 0x06,
+ SQ_OP2_INST_SETE = 0x08,
+ SQ_OP2_INST_SETGT = 0x09,
+ SQ_OP2_INST_SETGE = 0x0a,
+ SQ_OP2_INST_SETNE = 0x0b,
+ SQ_OP2_INST_SETE_DX10 = 0x0c,
+ SQ_OP2_INST_SETGT_DX10 = 0x0d,
+ SQ_OP2_INST_SETGE_DX10 = 0x0e,
+ SQ_OP2_INST_SETNE_DX10 = 0x0f,
+ SQ_OP2_INST_FRACT = 0x10,
+ SQ_OP2_INST_TRUNC = 0x11,
+ SQ_OP2_INST_CEIL = 0x12,
+ SQ_OP2_INST_RNDNE = 0x13,
+ SQ_OP2_INST_FLOOR = 0x14,
+ SQ_OP2_INST_MOVA = 0x15,
+ SQ_OP2_INST_MOVA_FLOOR = 0x16,
+ SQ_OP2_INST_MOVA_INT = 0x18,
+ SQ_OP2_INST_MOV = 0x19,
+ SQ_OP2_INST_NOP = 0x1a,
+ SQ_OP2_INST_PRED_SETGT_UINT = 0x1e,
+ SQ_OP2_INST_PRED_SETGE_UINT = 0x1f,
+ SQ_OP2_INST_PRED_SETE = 0x20,
+ SQ_OP2_INST_PRED_SETGT = 0x21,
+ SQ_OP2_INST_PRED_SETGE = 0x22,
+ SQ_OP2_INST_PRED_SETNE = 0x23,
+ SQ_OP2_INST_PRED_SET_INV = 0x24,
+ SQ_OP2_INST_PRED_SET_POP = 0x25,
+ SQ_OP2_INST_PRED_SET_CLR = 0x26,
+ SQ_OP2_INST_PRED_SET_RESTORE = 0x27,
+ SQ_OP2_INST_PRED_SETE_PUSH = 0x28,
+ SQ_OP2_INST_PRED_SETGT_PUSH = 0x29,
+ SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a,
+ SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b,
+ SQ_OP2_INST_KILLE = 0x2c,
+ SQ_OP2_INST_KILLGT = 0x2d,
+ SQ_OP2_INST_KILLGE = 0x2e,
+ SQ_OP2_INST_KILLNE = 0x2f,
+ SQ_OP2_INST_AND_INT = 0x30,
+ SQ_OP2_INST_OR_INT = 0x31,
+ SQ_OP2_INST_XOR_INT = 0x32,
+ SQ_OP2_INST_NOT_INT = 0x33,
+ SQ_OP2_INST_ADD_INT = 0x34,
+ SQ_OP2_INST_SUB_INT = 0x35,
+ SQ_OP2_INST_MAX_INT = 0x36,
+ SQ_OP2_INST_MIN_INT = 0x37,
+ SQ_OP2_INST_MAX_UINT = 0x38,
+ SQ_OP2_INST_MIN_UINT = 0x39,
+ SQ_OP2_INST_SETE_INT = 0x3a,
+ SQ_OP2_INST_SETGT_INT = 0x3b,
+ SQ_OP2_INST_SETGE_INT = 0x3c,
+ SQ_OP2_INST_SETNE_INT = 0x3d,
+ SQ_OP2_INST_SETGT_UINT = 0x3e,
+ SQ_OP2_INST_SETGE_UINT = 0x3f,
+ SQ_OP2_INST_KILLGT_UINT = 0x40,
+ SQ_OP2_INST_KILLGE_UINT = 0x41,
+ SQ_OP2_INST_PRED_SETE_INT = 0x42,
+ SQ_OP2_INST_PRED_SETGT_INT = 0x43,
+ SQ_OP2_INST_PRED_SETGE_INT = 0x44,
+ SQ_OP2_INST_PRED_SETNE_INT = 0x45,
+ SQ_OP2_INST_KILLE_INT = 0x46,
+ SQ_OP2_INST_KILLGT_INT = 0x47,
+ SQ_OP2_INST_KILLGE_INT = 0x48,
+ SQ_OP2_INST_KILLNE_INT = 0x49,
+ SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a,
+ SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b,
+ SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c,
+ SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d,
+ SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e,
+ SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f,
+ SQ_OP2_INST_DOT4 = 0x50,
+ SQ_OP2_INST_DOT4_IEEE = 0x51,
+ SQ_OP2_INST_CUBE = 0x52,
+ SQ_OP2_INST_MAX4 = 0x53,
+ SQ_OP2_INST_MOVA_GPR_INT = 0x60,
+ SQ_OP2_INST_EXP_IEEE = 0x61,
+ SQ_OP2_INST_LOG_CLAMPED = 0x62,
+ SQ_OP2_INST_LOG_IEEE = 0x63,
+ SQ_OP2_INST_RECIP_CLAMPED = 0x64,
+ SQ_OP2_INST_RECIP_FF = 0x65,
+ SQ_OP2_INST_RECIP_IEEE = 0x66,
+ SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x67,
+ SQ_OP2_INST_RECIPSQRT_FF = 0x68,
+ SQ_OP2_INST_RECIPSQRT_IEEE = 0x69,
+ SQ_OP2_INST_SQRT_IEEE = 0x6a,
+ SQ_OP2_INST_FLT_TO_INT = 0x6b,
+ SQ_OP2_INST_INT_TO_FLT = 0x6c,
+ SQ_OP2_INST_UINT_TO_FLT = 0x6d,
+ SQ_OP2_INST_SIN = 0x6e,
+ SQ_OP2_INST_COS = 0x6f,
+ SQ_OP2_INST_ASHR_INT = 0x70,
+ SQ_OP2_INST_LSHR_INT = 0x71,
+ SQ_OP2_INST_LSHL_INT = 0x72,
+ SQ_OP2_INST_MULLO_INT = 0x73,
+ SQ_OP2_INST_MULHI_INT = 0x74,
+ SQ_OP2_INST_MULLO_UINT = 0x75,
+ SQ_OP2_INST_MULHI_UINT = 0x76,
+ SQ_OP2_INST_RECIP_INT = 0x77,
+ SQ_OP2_INST_RECIP_UINT = 0x78,
+ SQ_OP2_INST_FLT_TO_UINT = 0x79,
+ SQ_CF_ALLOC_EXPORT_WORD1_BUF = 0x00008dfc,
+ ARRAY_SIZE_mask = 0xfff << 0,
+ ARRAY_SIZE_shift = 0,
+ COMP_MASK_mask = 0x0f << 12,
+ COMP_MASK_shift = 12,
+ SQ_CF_WORD0 = 0x00008dfc,
+ SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc,
+ ARRAY_BASE_mask = 0x1fff << 0,
+ ARRAY_BASE_shift = 0,
+ SQ_CF_ALLOC_EXPORT_WORD0__TYPE_mask = 0x03 << 13,
+ SQ_CF_ALLOC_EXPORT_WORD0__TYPE_shift = 13,
+ SQ_EXPORT_PIXEL = 0x00,
+ SQ_EXPORT_POS = 0x01,
+ SQ_EXPORT_PARAM = 0x02,
+ X_UNUSED_FOR_SX_EXPORTS = 0x03,
+ RW_GPR_mask = 0x7f << 15,
+ RW_GPR_shift = 15,
+ RW_REL_bit = 1 << 22,
+ INDEX_GPR_mask = 0x7f << 23,
+ INDEX_GPR_shift = 23,
+ ELEM_SIZE_mask = 0x03 << 30,
+ ELEM_SIZE_shift = 30,
+ SQ_VTX_WORD1 = 0x00008dfc,
+ SQ_VTX_WORD1__DST_SEL_X_mask = 0x07 << 9,
+ SQ_VTX_WORD1__DST_SEL_X_shift = 9,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_VTX_WORD1__DST_SEL_Y_mask = 0x07 << 12,
+ SQ_VTX_WORD1__DST_SEL_Y_shift = 12,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_VTX_WORD1__DST_SEL_Z_mask = 0x07 << 15,
+ SQ_VTX_WORD1__DST_SEL_Z_shift = 15,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_VTX_WORD1__DST_SEL_W_mask = 0x07 << 18,
+ SQ_VTX_WORD1__DST_SEL_W_shift = 18,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ USE_CONST_FIELDS_bit = 1 << 21,
+ SQ_VTX_WORD1__DATA_FORMAT_mask = 0x3f << 22,
+ SQ_VTX_WORD1__DATA_FORMAT_shift = 22,
+ SQ_VTX_WORD1__NUM_FORMAT_ALL_mask = 0x03 << 28,
+ SQ_VTX_WORD1__NUM_FORMAT_ALL_shift = 28,
+ SQ_NUM_FORMAT_NORM = 0x00,
+ SQ_NUM_FORMAT_INT = 0x01,
+ SQ_NUM_FORMAT_SCALED = 0x02,
+ SQ_VTX_WORD1__FORMAT_COMP_ALL_bit = 1 << 30,
+ SQ_VTX_WORD1__SRF_MODE_ALL_bit = 1 << 31,
+ SQ_ALU_WORD1_OP2 = 0x00008dfc,
+/* SRC0_ABS_bit = 1 << 0, */
+/* SRC1_ABS_bit = 1 << 1, */
+/* UPDATE_EXECUTE_MASK_bit = 1 << 2, */
+/* UPDATE_PRED_bit = 1 << 3, */
+/* WRITE_MASK_bit = 1 << 4, */
+ FOG_MERGE_bit = 1 << 5,
+ SQ_ALU_WORD1_OP2__OMOD_mask = 0x03 << 6,
+ SQ_ALU_WORD1_OP2__OMOD_shift = 6,
+/* SQ_ALU_OMOD_OFF = 0x00, */
+/* SQ_ALU_OMOD_M2 = 0x01, */
+/* SQ_ALU_OMOD_M4 = 0x02, */
+/* SQ_ALU_OMOD_D2 = 0x03, */
+ SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x3ff << 8,
+ SQ_ALU_WORD1_OP2__ALU_INST_shift = 8,
+/* SQ_OP2_INST_ADD = 0x00, */
+/* SQ_OP2_INST_MUL = 0x01, */
+/* SQ_OP2_INST_MUL_IEEE = 0x02, */
+/* SQ_OP2_INST_MAX = 0x03, */
+/* SQ_OP2_INST_MIN = 0x04, */
+/* SQ_OP2_INST_MAX_DX10 = 0x05, */
+/* SQ_OP2_INST_MIN_DX10 = 0x06, */
+/* SQ_OP2_INST_SETE = 0x08, */
+/* SQ_OP2_INST_SETGT = 0x09, */
+/* SQ_OP2_INST_SETGE = 0x0a, */
+/* SQ_OP2_INST_SETNE = 0x0b, */
+/* SQ_OP2_INST_SETE_DX10 = 0x0c, */
+/* SQ_OP2_INST_SETGT_DX10 = 0x0d, */
+/* SQ_OP2_INST_SETGE_DX10 = 0x0e, */
+/* SQ_OP2_INST_SETNE_DX10 = 0x0f, */
+/* SQ_OP2_INST_FRACT = 0x10, */
+/* SQ_OP2_INST_TRUNC = 0x11, */
+/* SQ_OP2_INST_CEIL = 0x12, */
+/* SQ_OP2_INST_RNDNE = 0x13, */
+/* SQ_OP2_INST_FLOOR = 0x14, */
+/* SQ_OP2_INST_MOVA = 0x15, */
+/* SQ_OP2_INST_MOVA_FLOOR = 0x16, */
+/* SQ_OP2_INST_MOVA_INT = 0x18, */
+/* SQ_OP2_INST_MOV = 0x19, */
+/* SQ_OP2_INST_NOP = 0x1a, */
+/* SQ_OP2_INST_PRED_SETGT_UINT = 0x1e, */
+/* SQ_OP2_INST_PRED_SETGE_UINT = 0x1f, */
+/* SQ_OP2_INST_PRED_SETE = 0x20, */
+/* SQ_OP2_INST_PRED_SETGT = 0x21, */
+/* SQ_OP2_INST_PRED_SETGE = 0x22, */
+/* SQ_OP2_INST_PRED_SETNE = 0x23, */
+/* SQ_OP2_INST_PRED_SET_INV = 0x24, */
+/* SQ_OP2_INST_PRED_SET_POP = 0x25, */
+/* SQ_OP2_INST_PRED_SET_CLR = 0x26, */
+/* SQ_OP2_INST_PRED_SET_RESTORE = 0x27, */
+/* SQ_OP2_INST_PRED_SETE_PUSH = 0x28, */
+/* SQ_OP2_INST_PRED_SETGT_PUSH = 0x29, */
+/* SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a, */
+/* SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b, */
+/* SQ_OP2_INST_KILLE = 0x2c, */
+/* SQ_OP2_INST_KILLGT = 0x2d, */
+/* SQ_OP2_INST_KILLGE = 0x2e, */
+/* SQ_OP2_INST_KILLNE = 0x2f, */
+/* SQ_OP2_INST_AND_INT = 0x30, */
+/* SQ_OP2_INST_OR_INT = 0x31, */
+/* SQ_OP2_INST_XOR_INT = 0x32, */
+/* SQ_OP2_INST_NOT_INT = 0x33, */
+/* SQ_OP2_INST_ADD_INT = 0x34, */
+/* SQ_OP2_INST_SUB_INT = 0x35, */
+/* SQ_OP2_INST_MAX_INT = 0x36, */
+/* SQ_OP2_INST_MIN_INT = 0x37, */
+/* SQ_OP2_INST_MAX_UINT = 0x38, */
+/* SQ_OP2_INST_MIN_UINT = 0x39, */
+/* SQ_OP2_INST_SETE_INT = 0x3a, */
+/* SQ_OP2_INST_SETGT_INT = 0x3b, */
+/* SQ_OP2_INST_SETGE_INT = 0x3c, */
+/* SQ_OP2_INST_SETNE_INT = 0x3d, */
+/* SQ_OP2_INST_SETGT_UINT = 0x3e, */
+/* SQ_OP2_INST_SETGE_UINT = 0x3f, */
+/* SQ_OP2_INST_KILLGT_UINT = 0x40, */
+/* SQ_OP2_INST_KILLGE_UINT = 0x41, */
+/* SQ_OP2_INST_PRED_SETE_INT = 0x42, */
+/* SQ_OP2_INST_PRED_SETGT_INT = 0x43, */
+/* SQ_OP2_INST_PRED_SETGE_INT = 0x44, */
+/* SQ_OP2_INST_PRED_SETNE_INT = 0x45, */
+/* SQ_OP2_INST_KILLE_INT = 0x46, */
+/* SQ_OP2_INST_KILLGT_INT = 0x47, */
+/* SQ_OP2_INST_KILLGE_INT = 0x48, */
+/* SQ_OP2_INST_KILLNE_INT = 0x49, */
+/* SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a, */
+/* SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b, */
+/* SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c, */
+/* SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d, */
+/* SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e, */
+/* SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f, */
+/* SQ_OP2_INST_DOT4 = 0x50, */
+/* SQ_OP2_INST_DOT4_IEEE = 0x51, */
+/* SQ_OP2_INST_CUBE = 0x52, */
+/* SQ_OP2_INST_MAX4 = 0x53, */
+/* SQ_OP2_INST_MOVA_GPR_INT = 0x60, */
+/* SQ_OP2_INST_EXP_IEEE = 0x61, */
+/* SQ_OP2_INST_LOG_CLAMPED = 0x62, */
+/* SQ_OP2_INST_LOG_IEEE = 0x63, */
+/* SQ_OP2_INST_RECIP_CLAMPED = 0x64, */
+/* SQ_OP2_INST_RECIP_FF = 0x65, */
+/* SQ_OP2_INST_RECIP_IEEE = 0x66, */
+/* SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x67, */
+/* SQ_OP2_INST_RECIPSQRT_FF = 0x68, */
+/* SQ_OP2_INST_RECIPSQRT_IEEE = 0x69, */
+/* SQ_OP2_INST_SQRT_IEEE = 0x6a, */
+/* SQ_OP2_INST_FLT_TO_INT = 0x6b, */
+/* SQ_OP2_INST_INT_TO_FLT = 0x6c, */
+/* SQ_OP2_INST_UINT_TO_FLT = 0x6d, */
+/* SQ_OP2_INST_SIN = 0x6e, */
+/* SQ_OP2_INST_COS = 0x6f, */
+/* SQ_OP2_INST_ASHR_INT = 0x70, */
+/* SQ_OP2_INST_LSHR_INT = 0x71, */
+/* SQ_OP2_INST_LSHL_INT = 0x72, */
+/* SQ_OP2_INST_MULLO_INT = 0x73, */
+/* SQ_OP2_INST_MULHI_INT = 0x74, */
+/* SQ_OP2_INST_MULLO_UINT = 0x75, */
+/* SQ_OP2_INST_MULHI_UINT = 0x76, */
+/* SQ_OP2_INST_RECIP_INT = 0x77, */
+/* SQ_OP2_INST_RECIP_UINT = 0x78, */
+/* SQ_OP2_INST_FLT_TO_UINT = 0x79, */
+ SQ_CF_WORD1 = 0x00008dfc,
+ POP_COUNT_mask = 0x07 << 0,
+ POP_COUNT_shift = 0,
+ CF_CONST_mask = 0x1f << 3,
+ CF_CONST_shift = 3,
+ COND_mask = 0x03 << 8,
+ COND_shift = 8,
+ SQ_CF_COND_ACTIVE = 0x00,
+ SQ_CF_COND_FALSE = 0x01,
+ SQ_CF_COND_BOOL = 0x02,
+ SQ_CF_COND_NOT_BOOL = 0x03,
+ SQ_CF_WORD1__COUNT_mask = 0x07 << 10,
+ SQ_CF_WORD1__COUNT_shift = 10,
+ CALL_COUNT_mask = 0x3f << 13,
+ CALL_COUNT_shift = 13,
+ COUNT_3_bit = 1 << 19,
+/* END_OF_PROGRAM_bit = 1 << 21, */
+/* VALID_PIXEL_MODE_bit = 1 << 22, */
+ SQ_CF_WORD1__CF_INST_mask = 0x7f << 23,
+ SQ_CF_WORD1__CF_INST_shift = 23,
+ SQ_CF_INST_NOP = 0x00,
+ SQ_CF_INST_TEX = 0x01,
+ SQ_CF_INST_VTX = 0x02,
+ SQ_CF_INST_VTX_TC = 0x03,
+ SQ_CF_INST_LOOP_START = 0x04,
+ SQ_CF_INST_LOOP_END = 0x05,
+ SQ_CF_INST_LOOP_START_DX10 = 0x06,
+ SQ_CF_INST_LOOP_START_NO_AL = 0x07,
+ SQ_CF_INST_LOOP_CONTINUE = 0x08,
+ SQ_CF_INST_LOOP_BREAK = 0x09,
+ SQ_CF_INST_JUMP = 0x0a,
+ SQ_CF_INST_PUSH = 0x0b,
+ SQ_CF_INST_PUSH_ELSE = 0x0c,
+ SQ_CF_INST_ELSE = 0x0d,
+ SQ_CF_INST_POP = 0x0e,
+ SQ_CF_INST_POP_JUMP = 0x0f,
+ SQ_CF_INST_POP_PUSH = 0x10,
+ SQ_CF_INST_POP_PUSH_ELSE = 0x11,
+ SQ_CF_INST_CALL = 0x12,
+ SQ_CF_INST_CALL_FS = 0x13,
+ SQ_CF_INST_RETURN = 0x14,
+ SQ_CF_INST_EMIT_VERTEX = 0x15,
+ SQ_CF_INST_EMIT_CUT_VERTEX = 0x16,
+ SQ_CF_INST_CUT_VERTEX = 0x17,
+ SQ_CF_INST_KILL = 0x18,
+/* WHOLE_QUAD_MODE_bit = 1 << 30, */
+/* BARRIER_bit = 1 << 31, */
+ SQ_VTX_WORD1_SEM = 0x00008dfc,
+ SEMANTIC_ID_mask = 0xff << 0,
+ SEMANTIC_ID_shift = 0,
+ SQ_TEX_WORD0 = 0x00008dfc,
+ TEX_INST_mask = 0x1f << 0,
+ TEX_INST_shift = 0,
+ SQ_TEX_INST_VTX_FETCH = 0x00,
+ SQ_TEX_INST_VTX_SEMANTIC = 0x01,
+ SQ_TEX_INST_LD = 0x03,
+ SQ_TEX_INST_GET_TEXTURE_RESINFO = 0x04,
+ SQ_TEX_INST_GET_NUMBER_OF_SAMPLES = 0x05,
+ SQ_TEX_INST_GET_LOD = 0x06,
+ SQ_TEX_INST_GET_GRADIENTS_H = 0x07,
+ SQ_TEX_INST_GET_GRADIENTS_V = 0x08,
+ SQ_TEX_INST_GET_LERP = 0x09,
+ SQ_TEX_INST_RESERVED_10 = 0x0a,
+ SQ_TEX_INST_SET_GRADIENTS_H = 0x0b,
+ SQ_TEX_INST_SET_GRADIENTS_V = 0x0c,
+ SQ_TEX_INST_PASS = 0x0d,
+ X_Z_SET_INDEX_FOR_ARRAY_OF_CUBEMAPS = 0x0e,
+ SQ_TEX_INST_SAMPLE = 0x10,
+ SQ_TEX_INST_SAMPLE_L = 0x11,
+ SQ_TEX_INST_SAMPLE_LB = 0x12,
+ SQ_TEX_INST_SAMPLE_LZ = 0x13,
+ SQ_TEX_INST_SAMPLE_G = 0x14,
+ SQ_TEX_INST_SAMPLE_G_L = 0x15,
+ SQ_TEX_INST_SAMPLE_G_LB = 0x16,
+ SQ_TEX_INST_SAMPLE_G_LZ = 0x17,
+ SQ_TEX_INST_SAMPLE_C = 0x18,
+ SQ_TEX_INST_SAMPLE_C_L = 0x19,
+ SQ_TEX_INST_SAMPLE_C_LB = 0x1a,
+ SQ_TEX_INST_SAMPLE_C_LZ = 0x1b,
+ SQ_TEX_INST_SAMPLE_C_G = 0x1c,
+ SQ_TEX_INST_SAMPLE_C_G_L = 0x1d,
+ SQ_TEX_INST_SAMPLE_C_G_LB = 0x1e,
+ SQ_TEX_INST_SAMPLE_C_G_LZ = 0x1f,
+ BC_FRAC_MODE_bit = 1 << 5,
+/* FETCH_WHOLE_QUAD_bit = 1 << 7, */
+ RESOURCE_ID_mask = 0xff << 8,
+ RESOURCE_ID_shift = 8,
+/* SRC_GPR_mask = 0x7f << 16, */
+/* SRC_GPR_shift = 16, */
+/* SRC_REL_bit = 1 << 23, */
+ SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24,
+ SQ_VTX_WORD1_GPR = 0x00008dfc,
+ SQ_VTX_WORD1_GPR__DST_GPR_mask = 0x7f << 0,
+ SQ_VTX_WORD1_GPR__DST_GPR_shift = 0,
+ SQ_VTX_WORD1_GPR__DST_REL_bit = 1 << 7,
+ SQ_ALU_WORD0 = 0x00008dfc,
+ SRC0_SEL_mask = 0x1ff << 0,
+ SRC0_SEL_shift = 0,
+/* SQ_ALU_SRC_0 = 0xf8, */
+/* SQ_ALU_SRC_1 = 0xf9, */
+/* SQ_ALU_SRC_1_INT = 0xfa, */
+/* SQ_ALU_SRC_M_1_INT = 0xfb, */
+/* SQ_ALU_SRC_0_5 = 0xfc, */
+/* SQ_ALU_SRC_LITERAL = 0xfd, */
+/* SQ_ALU_SRC_PV = 0xfe, */
+/* SQ_ALU_SRC_PS = 0xff, */
+ SRC0_REL_bit = 1 << 9,
+ SRC0_CHAN_mask = 0x03 << 10,
+ SRC0_CHAN_shift = 10,
+/* SQ_CHAN_X = 0x00, */
+/* SQ_CHAN_Y = 0x01, */
+/* SQ_CHAN_Z = 0x02, */
+/* SQ_CHAN_W = 0x03, */
+ SRC0_NEG_bit = 1 << 12,
+ SRC1_SEL_mask = 0x1ff << 13,
+ SRC1_SEL_shift = 13,
+/* SQ_ALU_SRC_0 = 0xf8, */
+/* SQ_ALU_SRC_1 = 0xf9, */
+/* SQ_ALU_SRC_1_INT = 0xfa, */
+/* SQ_ALU_SRC_M_1_INT = 0xfb, */
+/* SQ_ALU_SRC_0_5 = 0xfc, */
+/* SQ_ALU_SRC_LITERAL = 0xfd, */
+/* SQ_ALU_SRC_PV = 0xfe, */
+/* SQ_ALU_SRC_PS = 0xff, */
+ SRC1_REL_bit = 1 << 22,
+ SRC1_CHAN_mask = 0x03 << 23,
+ SRC1_CHAN_shift = 23,
+/* SQ_CHAN_X = 0x00, */
+/* SQ_CHAN_Y = 0x01, */
+/* SQ_CHAN_Z = 0x02, */
+/* SQ_CHAN_W = 0x03, */
+ SRC1_NEG_bit = 1 << 25,
+ INDEX_MODE_mask = 0x07 << 26,
+ INDEX_MODE_shift = 26,
+ SQ_INDEX_AR_X = 0x00,
+ SQ_INDEX_AR_Y = 0x01,
+ SQ_INDEX_AR_Z = 0x02,
+ SQ_INDEX_AR_W = 0x03,
+ SQ_INDEX_LOOP = 0x04,
+ PRED_SEL_mask = 0x03 << 29,
+ PRED_SEL_shift = 29,
+ SQ_PRED_SEL_OFF = 0x00,
+ SQ_PRED_SEL_ZERO = 0x02,
+ SQ_PRED_SEL_ONE = 0x03,
+ LAST_bit = 1 << 31,
+ SX_EXPORT_BUFFER_SIZES = 0x0000900c,
+ COLOR_BUFFER_SIZE_mask = 0xff << 0,
+ COLOR_BUFFER_SIZE_shift = 0,
+ POSITION_BUFFER_SIZE_mask = 0xff << 8,
+ POSITION_BUFFER_SIZE_shift = 8,
+ SMX_BUFFER_SIZE_mask = 0xff << 16,
+ SMX_BUFFER_SIZE_shift = 16,
+ SX_MEMORY_EXPORT_BASE = 0x00009010,
+ SX_MEMORY_EXPORT_SIZE = 0x00009014,
+ SPI_CONFIG_CNTL = 0x00009100,
+ GPR_WRITE_PRIORITY_mask = 0x1f << 0,
+ GPR_WRITE_PRIORITY_shift = 0,
+ X_PRIORITY_ORDER = 0x00,
+ X_PRIORITY_ORDER_VS = 0x01,
+ DISABLE_INTERP_1_bit = 1 << 5,
+ DEBUG_THREAD_TYPE_SEL_mask = 0x03 << 6,
+ DEBUG_THREAD_TYPE_SEL_shift = 6,
+ DEBUG_GROUP_SEL_mask = 0x1f << 8,
+ DEBUG_GROUP_SEL_shift = 8,
+ DEBUG_GRBM_OVERRIDE_bit = 1 << 13,
+ SPI_CONFIG_CNTL_1 = 0x0000913c,
+ VTX_DONE_DELAY_mask = 0x0f << 0,
+ VTX_DONE_DELAY_shift = 0,
+ X_DELAY_10_CLKS = 0x00,
+ X_DELAY_11_CLKS = 0x01,
+ X_DELAY_12_CLKS = 0x02,
+ X_DELAY_13_CLKS = 0x03,
+ X_DELAY_14_CLKS = 0x04,
+ X_DELAY_15_CLKS = 0x05,
+ X_DELAY_16_CLKS = 0x06,
+ X_DELAY_17_CLKS = 0x07,
+ X_DELAY_2_CLKS = 0x08,
+ X_DELAY_3_CLKS = 0x09,
+ X_DELAY_4_CLKS = 0x0a,
+ X_DELAY_5_CLKS = 0x0b,
+ X_DELAY_6_CLKS = 0x0c,
+ X_DELAY_7_CLKS = 0x0d,
+ X_DELAY_8_CLKS = 0x0e,
+ X_DELAY_9_CLKS = 0x0f,
+ INTERP_ONE_PRIM_PER_ROW_bit = 1 << 4,
+ TD_FILTER4 = 0x00009400,
+ WEIGHT_1_mask = 0x7ff << 0,
+ WEIGHT_1_shift = 0,
+ WEIGHT_0_mask = 0x7ff << 11,
+ WEIGHT_0_shift = 11,
+ WEIGHT_PAIR_bit = 1 << 22,
+ PHASE_mask = 0x0f << 23,
+ PHASE_shift = 23,
+ DIRECTION_bit = 1 << 27,
+ TD_FILTER4_1 = 0x00009404,
+ TD_FILTER4_1_num = 35,
+/* WEIGHT_1_mask = 0x7ff << 0, */
+/* WEIGHT_1_shift = 0, */
+/* WEIGHT_0_mask = 0x7ff << 11, */
+/* WEIGHT_0_shift = 11, */
+ TD_CNTL = 0x00009490,
+ SYNC_PHASE_SH_mask = 0x03 << 0,
+ SYNC_PHASE_SH_shift = 0,
+ SYNC_PHASE_VC_SMX_mask = 0x03 << 4,
+ SYNC_PHASE_VC_SMX_shift = 4,
+ TD0_CNTL = 0x00009494,
+ TD0_CNTL_num = 4,
+ ID_OVERRIDE_mask = 0x03 << 28,
+ ID_OVERRIDE_shift = 28,
+ TD0_STATUS = 0x000094a4,
+ TD0_STATUS_num = 4,
+ BUSY_bit = 1 << 31,
+ TA_CNTL = 0x00009504,
+ GRADIENT_CREDIT_mask = 0x1f << 0,
+ GRADIENT_CREDIT_shift = 0,
+ WALKER_CREDIT_mask = 0x1f << 8,
+ WALKER_CREDIT_shift = 8,
+ ALIGNER_CREDIT_mask = 0x1f << 16,
+ ALIGNER_CREDIT_shift = 16,
+ TD_FIFO_CREDIT_mask = 0x3ff << 22,
+ TD_FIFO_CREDIT_shift = 22,
+ TA_CNTL_AUX = 0x00009508,
+ DISABLE_CUBE_WRAP_bit = 1 << 0,
+ SYNC_GRADIENT_bit = 1 << 24,
+ SYNC_WALKER_bit = 1 << 25,
+ SYNC_ALIGNER_bit = 1 << 26,
+ BILINEAR_PRECISION_bit = 1 << 31,
+ TA0_CNTL = 0x00009510,
+/* ID_OVERRIDE_mask = 0x03 << 28, */
+/* ID_OVERRIDE_shift = 28, */
+ TA1_CNTL = 0x00009514,
+/* ID_OVERRIDE_mask = 0x03 << 28, */
+/* ID_OVERRIDE_shift = 28, */
+ TA2_CNTL = 0x00009518,
+/* ID_OVERRIDE_mask = 0x03 << 28, */
+/* ID_OVERRIDE_shift = 28, */
+ TA3_CNTL = 0x0000951c,
+/* ID_OVERRIDE_mask = 0x03 << 28, */
+/* ID_OVERRIDE_shift = 28, */
+ TA0_STATUS = 0x00009520,
+ FG_PFIFO_EMPTYB_bit = 1 << 12,
+ FG_LFIFO_EMPTYB_bit = 1 << 13,
+ FG_SFIFO_EMPTYB_bit = 1 << 14,
+ FL_PFIFO_EMPTYB_bit = 1 << 16,
+ FL_LFIFO_EMPTYB_bit = 1 << 17,
+ FL_SFIFO_EMPTYB_bit = 1 << 18,
+ FA_PFIFO_EMPTYB_bit = 1 << 20,
+ FA_LFIFO_EMPTYB_bit = 1 << 21,
+ FA_SFIFO_EMPTYB_bit = 1 << 22,
+ IN_BUSY_bit = 1 << 24,
+ FG_BUSY_bit = 1 << 25,
+ FL_BUSY_bit = 1 << 27,
+ TA_BUSY_bit = 1 << 28,
+ FA_BUSY_bit = 1 << 29,
+ AL_BUSY_bit = 1 << 30,
+/* BUSY_bit = 1 << 31, */
+ TA1_STATUS = 0x00009524,
+/* FG_PFIFO_EMPTYB_bit = 1 << 12, */
+/* FG_LFIFO_EMPTYB_bit = 1 << 13, */
+/* FG_SFIFO_EMPTYB_bit = 1 << 14, */
+/* FL_PFIFO_EMPTYB_bit = 1 << 16, */
+/* FL_LFIFO_EMPTYB_bit = 1 << 17, */
+/* FL_SFIFO_EMPTYB_bit = 1 << 18, */
+/* FA_PFIFO_EMPTYB_bit = 1 << 20, */
+/* FA_LFIFO_EMPTYB_bit = 1 << 21, */
+/* FA_SFIFO_EMPTYB_bit = 1 << 22, */
+/* IN_BUSY_bit = 1 << 24, */
+/* FG_BUSY_bit = 1 << 25, */
+/* FL_BUSY_bit = 1 << 27, */
+/* TA_BUSY_bit = 1 << 28, */
+/* FA_BUSY_bit = 1 << 29, */
+/* AL_BUSY_bit = 1 << 30, */
+/* BUSY_bit = 1 << 31, */
+ TA2_STATUS = 0x00009528,
+/* FG_PFIFO_EMPTYB_bit = 1 << 12, */
+/* FG_LFIFO_EMPTYB_bit = 1 << 13, */
+/* FG_SFIFO_EMPTYB_bit = 1 << 14, */
+/* FL_PFIFO_EMPTYB_bit = 1 << 16, */
+/* FL_LFIFO_EMPTYB_bit = 1 << 17, */
+/* FL_SFIFO_EMPTYB_bit = 1 << 18, */
+/* FA_PFIFO_EMPTYB_bit = 1 << 20, */
+/* FA_LFIFO_EMPTYB_bit = 1 << 21, */
+/* FA_SFIFO_EMPTYB_bit = 1 << 22, */
+/* IN_BUSY_bit = 1 << 24, */
+/* FG_BUSY_bit = 1 << 25, */
+/* FL_BUSY_bit = 1 << 27, */
+/* TA_BUSY_bit = 1 << 28, */
+/* FA_BUSY_bit = 1 << 29, */
+/* AL_BUSY_bit = 1 << 30, */
+/* BUSY_bit = 1 << 31, */
+ TA3_STATUS = 0x0000952c,
+/* FG_PFIFO_EMPTYB_bit = 1 << 12, */
+/* FG_LFIFO_EMPTYB_bit = 1 << 13, */
+/* FG_SFIFO_EMPTYB_bit = 1 << 14, */
+/* FL_PFIFO_EMPTYB_bit = 1 << 16, */
+/* FL_LFIFO_EMPTYB_bit = 1 << 17, */
+/* FL_SFIFO_EMPTYB_bit = 1 << 18, */
+/* FA_PFIFO_EMPTYB_bit = 1 << 20, */
+/* FA_LFIFO_EMPTYB_bit = 1 << 21, */
+/* FA_SFIFO_EMPTYB_bit = 1 << 22, */
+/* IN_BUSY_bit = 1 << 24, */
+/* FG_BUSY_bit = 1 << 25, */
+/* FL_BUSY_bit = 1 << 27, */
+/* TA_BUSY_bit = 1 << 28, */
+/* FA_BUSY_bit = 1 << 29, */
+/* AL_BUSY_bit = 1 << 30, */
+/* BUSY_bit = 1 << 31, */
+ TC_STATUS = 0x00009600,
+ TC_BUSY_bit = 1 << 0,
+ TC_INVALIDATE = 0x00009604,
+ START_bit = 1 << 0,
+ TC_CNTL = 0x00009608,
+ FORCE_HIT_bit = 1 << 0,
+ FORCE_MISS_bit = 1 << 1,
+ L2_SIZE_mask = 0x0f << 5,
+ L2_SIZE_shift = 5,
+ _256K = 0x00,
+ _224K = 0x01,
+ _192K = 0x02,
+ _160K = 0x03,
+ _128K = 0x04,
+ _96K = 0x05,
+ _64K = 0x06,
+ _32K = 0x07,
+ L2_DISABLE_LATE_HIT_bit = 1 << 9,
+ DISABLE_VERT_PERF_bit = 1 << 10,
+ DISABLE_INVAL_BUSY_bit = 1 << 11,
+ DISABLE_INVAL_SAME_SURFACE_bit = 1 << 12,
+ PARTITION_MODE_mask = 0x03 << 13,
+ PARTITION_MODE_shift = 13,
+ X_VERTEX = 0x00,
+ MISS_ARB_MODE_bit = 1 << 15,
+ HIT_ARB_MODE_bit = 1 << 16,
+ DISABLE_WRITE_DELAY_bit = 1 << 17,
+ HIT_FIFO_DEPTH_bit = 1 << 18,
+ VC_CNTL = 0x00009700,
+ L2_INVALIDATE_bit = 1 << 0,
+ RESERVED_bit = 1 << 1,
+ CC_FORCE_MISS_bit = 1 << 2,
+ MI_CHAN_SEL_mask = 0x03 << 3,
+ MI_CHAN_SEL_shift = 3,
+ X_MC0_USES_CH_0_1 = 0x00,
+ X_MC0_USES_CH_0_3 = 0x01,
+ X_VC_MC0_IS_ACTIVE = 0x02,
+ X_VC_MC1_IS_DISABLED = 0x03,
+ MI_STEER_DISABLE_bit = 1 << 5,
+ MI_CREDIT_CTR_mask = 0x0f << 6,
+ MI_CREDIT_CTR_shift = 6,
+ MI_CREDIT_WE_bit = 1 << 10,
+ MI_REQ_STALL_THLD_mask = 0x07 << 11,
+ MI_REQ_STALL_THLD_shift = 11,
+ X_LATENCY_EXCEEDS_399_CLOCKS = 0x00,
+ X_LATENCY_EXCEEDS_415_CLOCKS = 0x01,
+ X_LATENCY_EXCEEDS_431_CLOCKS = 0x02,
+ X_LATENCY_EXCEEDS_447_CLOCKS = 0x03,
+ X_LATENCY_EXCEEDS_463_CLOCKS = 0x04,
+ X_LATENCY_EXCEEDS_479_CLOCKS = 0x05,
+ X_LATENCY_EXCEEDS_495_CLOCKS = 0x06,
+ X_LATENCY_EXCEEDS_511_CLOCKS = 0x07,
+ VC_CNTL__MI_TIMESTAMP_RES_mask = 0x1f << 14,
+ VC_CNTL__MI_TIMESTAMP_RES_shift = 14,
+ X_1X_SYSTEM_CLOCK = 0x00,
+ X_2X_SYSTEM_CLOCK = 0x01,
+ X_4X_SYSTEM_CLOCK = 0x02,
+ X_8X_SYSTEM_CLOCK = 0x03,
+ X_16X_SYSTEM_CLOCK = 0x04,
+ X_32X_SYSTEM_CLOCK = 0x05,
+ X_64X_SYSTEM_CLOCK = 0x06,
+ X_128X_SYSTEM_CLOCK = 0x07,
+ X_256X_SYSTEM_CLOCK = 0x08,
+ X_512X_SYSTEM_CLOCK = 0x09,
+ X_1024X_SYSTEM_CLOCK = 0x0a,
+ X_2048X_SYSTEM_CLOCK = 0x0b,
+ X_4092X_SYSTEM_CLOCK = 0x0c,
+ X_8192X_SYSTEM_CLOCK = 0x0d,
+ X_16384X_SYSTEM_CLOCK = 0x0e,
+ X_32768X_SYSTEM_CLOCK = 0x0f,
+ VC_CNTL_STATUS = 0x00009704,
+ RP_BUSY_bit = 1 << 0,
+ RG_BUSY_bit = 1 << 1,
+ VC_BUSY_bit = 1 << 2,
+ CLAMP_DETECT_bit = 1 << 3,
+ VC_CONFIG = 0x00009718,
+ WRITE_DIS_bit = 1 << 0,
+ GPR_DATA_PHASE_ADJ_mask = 0x07 << 1,
+ GPR_DATA_PHASE_ADJ_shift = 1,
+ X_LATENCY_BASE_0_CYCLES = 0x00,
+ X_LATENCY_BASE_1_CYCLES = 0x01,
+ X_LATENCY_BASE_2_CYCLES = 0x02,
+ X_LATENCY_BASE_3_CYCLES = 0x03,
+ TD_SIMD_SYNC_ADJ_mask = 0x07 << 4,
+ TD_SIMD_SYNC_ADJ_shift = 4,
+ X_0_CYCLES_DELAY = 0x00,
+ X_1_CYCLES_DELAY = 0x01,
+ X_2_CYCLES_DELAY = 0x02,
+ X_3_CYCLES_DELAY = 0x03,
+ X_4_CYCLES_DELAY = 0x04,
+ X_5_CYCLES_DELAY = 0x05,
+ X_6_CYCLES_DELAY = 0x06,
+ X_7_CYCLES_DELAY = 0x07,
+ SMX_DC_CTL0 = 0x0000a020,
+ WR_GATHER_STREAM0_bit = 1 << 0,
+ WR_GATHER_STREAM1_bit = 1 << 1,
+ WR_GATHER_STREAM2_bit = 1 << 2,
+ WR_GATHER_STREAM3_bit = 1 << 3,
+ WR_GATHER_SCRATCH_bit = 1 << 4,
+ WR_GATHER_REDUC_BUF_bit = 1 << 5,
+ WR_GATHER_RING_BUF_bit = 1 << 6,
+ WR_GATHER_F_BUF_bit = 1 << 7,
+ DISABLE_CACHES_bit = 1 << 8,
+ AUTO_FLUSH_INVAL_EN_bit = 1 << 10,
+ AUTO_FLUSH_EN_bit = 1 << 11,
+ AUTO_FLUSH_CNT_mask = 0xffff << 12,
+ AUTO_FLUSH_CNT_shift = 12,
+ MC_RD_STALL_FACTOR_mask = 0x03 << 28,
+ MC_RD_STALL_FACTOR_shift = 28,
+ MC_WR_STALL_FACTOR_mask = 0x03 << 30,
+ MC_WR_STALL_FACTOR_shift = 30,
+ SMX_DC_CTL1 = 0x0000a024,
+ OP_FIFO_SKID_mask = 0x7f << 0,
+ OP_FIFO_SKID_shift = 0,
+ CACHE_LINE_SIZE_bit = 1 << 8,
+ MULTI_FLUSH_MODE_bit = 1 << 9,
+ MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_mask = 0x0f << 10,
+ MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_shift = 10,
+ DISABLE_WR_GATHER_RD_HIT_FORCE_EVICT_bit = 1 << 16,
+ DISABLE_WR_GATHER_RD_HIT_COMP_VLDS_CHECK_bit = 1 << 17,
+ DISABLE_FLUSH_ES_ALSO_INVALS_bit = 1 << 18,
+ DISABLE_FLUSH_GS_ALSO_INVALS_bit = 1 << 19,
+ SMX_DC_CTL2 = 0x0000a028,
+ INVALIDATE_CACHES_bit = 1 << 0,
+ CACHES_INVALID_bit = 1 << 1,
+ CACHES_DIRTY_bit = 1 << 2,
+ FLUSH_ALL_bit = 1 << 4,
+ FLUSH_GS_THREADS_bit = 1 << 8,
+ FLUSH_ES_THREADS_bit = 1 << 9,
+ SMX_DC_MC_INTF_CTL = 0x0000a02c,
+ MC_RD_REQ_CRED_mask = 0xff << 0,
+ MC_RD_REQ_CRED_shift = 0,
+ MC_WR_REQ_CRED_mask = 0xff << 16,
+ MC_WR_REQ_CRED_shift = 16,
+ TD_PS_SAMPLER0_BORDER_RED = 0x0000a400,
+ TD_PS_SAMPLER0_BORDER_RED_num = 18,
+ TD_PS_SAMPLER0_BORDER_RED_offset = 16,
+ TD_PS_SAMPLER0_BORDER_GREEN = 0x0000a404,
+ TD_PS_SAMPLER0_BORDER_GREEN_num = 18,
+ TD_PS_SAMPLER0_BORDER_GREEN_offset = 16,
+ TD_PS_SAMPLER0_BORDER_BLUE = 0x0000a408,
+ TD_PS_SAMPLER0_BORDER_BLUE_num = 18,
+ TD_PS_SAMPLER0_BORDER_BLUE_offset = 16,
+ TD_PS_SAMPLER0_BORDER_ALPHA = 0x0000a40c,
+ TD_PS_SAMPLER0_BORDER_ALPHA_num = 18,
+ TD_PS_SAMPLER0_BORDER_ALPHA_offset = 16,
+ TD_VS_SAMPLER0_BORDER_RED = 0x0000a600,
+ TD_VS_SAMPLER0_BORDER_RED_num = 18,
+ TD_VS_SAMPLER0_BORDER_RED_offset = 16,
+ TD_VS_SAMPLER0_BORDER_GREEN = 0x0000a604,
+ TD_VS_SAMPLER0_BORDER_GREEN_num = 18,
+ TD_VS_SAMPLER0_BORDER_GREEN_offset = 16,
+ TD_VS_SAMPLER0_BORDER_BLUE = 0x0000a608,
+ TD_VS_SAMPLER0_BORDER_BLUE_num = 18,
+ TD_VS_SAMPLER0_BORDER_BLUE_offset = 16,
+ TD_VS_SAMPLER0_BORDER_ALPHA = 0x0000a60c,
+ TD_VS_SAMPLER0_BORDER_ALPHA_num = 18,
+ TD_VS_SAMPLER0_BORDER_ALPHA_offset = 16,
+ TD_GS_SAMPLER0_BORDER_RED = 0x0000a800,
+ TD_GS_SAMPLER0_BORDER_RED_num = 18,
+ TD_GS_SAMPLER0_BORDER_RED_offset = 16,
+ TD_GS_SAMPLER0_BORDER_GREEN = 0x0000a804,
+ TD_GS_SAMPLER0_BORDER_GREEN_num = 18,
+ TD_GS_SAMPLER0_BORDER_GREEN_offset = 16,
+ TD_GS_SAMPLER0_BORDER_BLUE = 0x0000a808,
+ TD_GS_SAMPLER0_BORDER_BLUE_num = 18,
+ TD_GS_SAMPLER0_BORDER_BLUE_offset = 16,
+ TD_GS_SAMPLER0_BORDER_ALPHA = 0x0000a80c,
+ TD_GS_SAMPLER0_BORDER_ALPHA_num = 18,
+ TD_GS_SAMPLER0_BORDER_ALPHA_offset = 16,
+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL = 0x0000aa00,
+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL_num = 18,
+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL__WIDTH_mask = 0x07 << 0,
+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL__WIDTH_shift = 0,
+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_mask = 0x07 << 3,
+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_shift = 3,
+ DB_DEPTH_SIZE = 0x00028000,
+ PITCH_TILE_MAX_mask = 0x3ff << 0,
+ PITCH_TILE_MAX_shift = 0,
+ SLICE_TILE_MAX_mask = 0xfffff << 10,
+ SLICE_TILE_MAX_shift = 10,
+ DB_DEPTH_VIEW = 0x00028004,
+ SLICE_START_mask = 0x7ff << 0,
+ SLICE_START_shift = 0,
+ SLICE_MAX_mask = 0x7ff << 13,
+ SLICE_MAX_shift = 13,
+ DB_DEPTH_BASE = 0x0002800c,
+ DB_DEPTH_INFO = 0x00028010,
+ DB_DEPTH_INFO__FORMAT_mask = 0x07 << 0,
+ DB_DEPTH_INFO__FORMAT_shift = 0,
+ DEPTH_INVALID = 0x00,
+ DEPTH_16 = 0x01,
+ DEPTH_X8_24 = 0x02,
+ DEPTH_8_24 = 0x03,
+ DEPTH_X8_24_FLOAT = 0x04,
+ DEPTH_8_24_FLOAT = 0x05,
+ DEPTH_32_FLOAT = 0x06,
+ DEPTH_X24_8_32_FLOAT = 0x07,
+ DB_DEPTH_INFO__READ_SIZE_bit = 1 << 3,
+ DB_DEPTH_INFO__ARRAY_MODE_mask = 0x0f << 15,
+ DB_DEPTH_INFO__ARRAY_MODE_shift = 15,
+ ARRAY_2D_TILED_THIN1 = 0x04,
+ TILE_SURFACE_ENABLE_bit = 1 << 25,
+ TILE_COMPACT_bit = 1 << 26,
+ ZRANGE_PRECISION_bit = 1 << 31,
+ DB_HTILE_DATA_BASE = 0x00028014,
+ DB_STENCIL_CLEAR = 0x00028028,
+ DB_STENCIL_CLEAR__CLEAR_mask = 0xff << 0,
+ DB_STENCIL_CLEAR__CLEAR_shift = 0,
+ MIN_mask = 0xff << 16,
+ MIN_shift = 16,
+ DB_DEPTH_CLEAR = 0x0002802c,
+ PA_SC_SCREEN_SCISSOR_TL = 0x00028030,
+ PA_SC_SCREEN_SCISSOR_TL__TL_X_mask = 0x7fff << 0,
+ PA_SC_SCREEN_SCISSOR_TL__TL_X_shift = 0,
+ PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask = 0x7fff << 16,
+ PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift = 16,
+ PA_SC_SCREEN_SCISSOR_BR = 0x00028034,
+ PA_SC_SCREEN_SCISSOR_BR__BR_X_mask = 0x7fff << 0,
+ PA_SC_SCREEN_SCISSOR_BR__BR_X_shift = 0,
+ PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask = 0x7fff << 16,
+ PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift = 16,
+ CB_COLOR0_BASE = 0x00028040,
+ CB_COLOR0_BASE_num = 8,
+ CB_COLOR0_SIZE = 0x00028060,
+ CB_COLOR0_SIZE_num = 8,
+/* PITCH_TILE_MAX_mask = 0x3ff << 0, */
+/* PITCH_TILE_MAX_shift = 0, */
+/* SLICE_TILE_MAX_mask = 0xfffff << 10, */
+/* SLICE_TILE_MAX_shift = 10, */
+ CB_COLOR0_VIEW = 0x00028080,
+ CB_COLOR0_VIEW_num = 8,
+/* SLICE_START_mask = 0x7ff << 0, */
+/* SLICE_START_shift = 0, */
+/* SLICE_MAX_mask = 0x7ff << 13, */
+/* SLICE_MAX_shift = 13, */
+ CB_COLOR0_INFO = 0x000280a0,
+ CB_COLOR0_INFO_num = 8,
+ ENDIAN_mask = 0x03 << 0,
+ ENDIAN_shift = 0,
+ ENDIAN_NONE = 0x00,
+ ENDIAN_8IN16 = 0x01,
+ ENDIAN_8IN32 = 0x02,
+ ENDIAN_8IN64 = 0x03,
+ CB_COLOR0_INFO__FORMAT_mask = 0x3f << 2,
+ CB_COLOR0_INFO__FORMAT_shift = 2,
+ COLOR_INVALID = 0x00,
+ COLOR_8 = 0x01,
+ COLOR_4_4 = 0x02,
+ COLOR_3_3_2 = 0x03,
+ COLOR_16 = 0x05,
+ COLOR_16_FLOAT = 0x06,
+ COLOR_8_8 = 0x07,
+ COLOR_5_6_5 = 0x08,
+ COLOR_6_5_5 = 0x09,
+ COLOR_1_5_5_5 = 0x0a,
+ COLOR_4_4_4_4 = 0x0b,
+ COLOR_5_5_5_1 = 0x0c,
+ COLOR_32 = 0x0d,
+ COLOR_32_FLOAT = 0x0e,
+ COLOR_16_16 = 0x0f,
+ COLOR_16_16_FLOAT = 0x10,
+ COLOR_8_24 = 0x11,
+ COLOR_8_24_FLOAT = 0x12,
+ COLOR_24_8 = 0x13,
+ COLOR_24_8_FLOAT = 0x14,
+ COLOR_10_11_11 = 0x15,
+ COLOR_10_11_11_FLOAT = 0x16,
+ COLOR_11_11_10 = 0x17,
+ COLOR_11_11_10_FLOAT = 0x18,
+ COLOR_2_10_10_10 = 0x19,
+ COLOR_8_8_8_8 = 0x1a,
+ COLOR_10_10_10_2 = 0x1b,
+ COLOR_X24_8_32_FLOAT = 0x1c,
+ COLOR_32_32 = 0x1d,
+ COLOR_32_32_FLOAT = 0x1e,
+ COLOR_16_16_16_16 = 0x1f,
+ COLOR_16_16_16_16_FLOAT = 0x20,
+ COLOR_32_32_32_32 = 0x22,
+ COLOR_32_32_32_32_FLOAT = 0x23,
+ CB_COLOR0_INFO__ARRAY_MODE_mask = 0x0f << 8,
+ CB_COLOR0_INFO__ARRAY_MODE_shift = 8,
+ ARRAY_LINEAR_GENERAL = 0x00,
+ ARRAY_LINEAR_ALIGNED = 0x01,
+/* ARRAY_2D_TILED_THIN1 = 0x04, */
+ NUMBER_TYPE_mask = 0x07 << 12,
+ NUMBER_TYPE_shift = 12,
+ NUMBER_UNORM = 0x00,
+ NUMBER_SNORM = 0x01,
+ NUMBER_USCALED = 0x02,
+ NUMBER_SSCALED = 0x03,
+ NUMBER_UINT = 0x04,
+ NUMBER_SINT = 0x05,
+ NUMBER_SRGB = 0x06,
+ NUMBER_FLOAT = 0x07,
+ CB_COLOR0_INFO__READ_SIZE_bit = 1 << 15,
+ COMP_SWAP_mask = 0x03 << 16,
+ COMP_SWAP_shift = 16,
+ SWAP_STD = 0x00,
+ SWAP_ALT = 0x01,
+ SWAP_STD_REV = 0x02,
+ SWAP_ALT_REV = 0x03,
+ CB_COLOR0_INFO__TILE_MODE_mask = 0x03 << 18,
+ CB_COLOR0_INFO__TILE_MODE_shift = 18,
+ TILE_DISABLE = 0x00,
+ TILE_CLEAR_ENABLE = 0x01,
+ TILE_FRAG_ENABLE = 0x02,
+ BLEND_CLAMP_bit = 1 << 20,
+ CLEAR_COLOR_bit = 1 << 21,
+ BLEND_BYPASS_bit = 1 << 22,
+ BLEND_FLOAT32_bit = 1 << 23,
+ SIMPLE_FLOAT_bit = 1 << 24,
+ CB_COLOR0_INFO__ROUND_MODE_bit = 1 << 25,
+/* TILE_COMPACT_bit = 1 << 26, */
+ SOURCE_FORMAT_bit = 1 << 27,
+ CB_COLOR0_TILE = 0x000280c0,
+ CB_COLOR0_TILE_num = 8,
+ CB_COLOR0_FRAG = 0x000280e0,
+ CB_COLOR0_FRAG_num = 8,
+ CB_COLOR0_MASK = 0x00028100,
+ CB_COLOR0_MASK_num = 8,
+ CMASK_BLOCK_MAX_mask = 0xfff << 0,
+ CMASK_BLOCK_MAX_shift = 0,
+ FMASK_TILE_MAX_mask = 0xfffff << 12,
+ FMASK_TILE_MAX_shift = 12,
+ CB_CLEAR_RED = 0x00028120,
+ CB_CLEAR_GREEN = 0x00028124,
+ CB_CLEAR_BLUE = 0x00028128,
+ CB_CLEAR_ALPHA = 0x0002812c,
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0 = 0x00028140,
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0_num = 16,
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_shift = 0,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0 = 0x00028180,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0_num = 16,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_shift = 0,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0 = 0x000281c0,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0_num = 16,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_shift = 0,
+ PA_SC_WINDOW_OFFSET = 0x00028200,
+ WINDOW_X_OFFSET_mask = 0x7fff << 0,
+ WINDOW_X_OFFSET_shift = 0,
+ WINDOW_Y_OFFSET_mask = 0x7fff << 16,
+ WINDOW_Y_OFFSET_shift = 16,
+ PA_SC_WINDOW_SCISSOR_TL = 0x00028204,
+ PA_SC_WINDOW_SCISSOR_TL__TL_X_mask = 0x3fff << 0,
+ PA_SC_WINDOW_SCISSOR_TL__TL_X_shift = 0,
+ PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask = 0x3fff << 16,
+ PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift = 16,
+ WINDOW_OFFSET_DISABLE_bit = 1 << 31,
+ PA_SC_WINDOW_SCISSOR_BR = 0x00028208,
+ PA_SC_WINDOW_SCISSOR_BR__BR_X_mask = 0x3fff << 0,
+ PA_SC_WINDOW_SCISSOR_BR__BR_X_shift = 0,
+ PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask = 0x3fff << 16,
+ PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift = 16,
+ PA_SC_CLIPRECT_RULE = 0x0002820c,
+ CLIP_RULE_mask = 0xffff << 0,
+ CLIP_RULE_shift = 0,
+ PA_SC_CLIPRECT_0_TL = 0x00028210,
+ PA_SC_CLIPRECT_0_TL_num = 4,
+ PA_SC_CLIPRECT_0_TL_offset = 8,
+ PA_SC_CLIPRECT_0_TL__TL_X_mask = 0x3fff << 0,
+ PA_SC_CLIPRECT_0_TL__TL_X_shift = 0,
+ PA_SC_CLIPRECT_0_TL__TL_Y_mask = 0x3fff << 16,
+ PA_SC_CLIPRECT_0_TL__TL_Y_shift = 16,
+ PA_SC_CLIPRECT_0_BR = 0x00028214,
+ PA_SC_CLIPRECT_0_BR_num = 4,
+ PA_SC_CLIPRECT_0_BR_offset = 8,
+ PA_SC_CLIPRECT_0_BR__BR_X_mask = 0x3fff << 0,
+ PA_SC_CLIPRECT_0_BR__BR_X_shift = 0,
+ PA_SC_CLIPRECT_0_BR__BR_Y_mask = 0x3fff << 16,
+ PA_SC_CLIPRECT_0_BR__BR_Y_shift = 16,
+ CB_TARGET_MASK = 0x00028238,
+ TARGET0_ENABLE_mask = 0x0f << 0,
+ TARGET0_ENABLE_shift = 0,
+ TARGET1_ENABLE_mask = 0x0f << 4,
+ TARGET1_ENABLE_shift = 4,
+ TARGET2_ENABLE_mask = 0x0f << 8,
+ TARGET2_ENABLE_shift = 8,
+ TARGET3_ENABLE_mask = 0x0f << 12,
+ TARGET3_ENABLE_shift = 12,
+ TARGET4_ENABLE_mask = 0x0f << 16,
+ TARGET4_ENABLE_shift = 16,
+ TARGET5_ENABLE_mask = 0x0f << 20,
+ TARGET5_ENABLE_shift = 20,
+ TARGET6_ENABLE_mask = 0x0f << 24,
+ TARGET6_ENABLE_shift = 24,
+ TARGET7_ENABLE_mask = 0x0f << 28,
+ TARGET7_ENABLE_shift = 28,
+ CB_SHADER_MASK = 0x0002823c,
+ OUTPUT0_ENABLE_mask = 0x0f << 0,
+ OUTPUT0_ENABLE_shift = 0,
+ OUTPUT1_ENABLE_mask = 0x0f << 4,
+ OUTPUT1_ENABLE_shift = 4,
+ OUTPUT2_ENABLE_mask = 0x0f << 8,
+ OUTPUT2_ENABLE_shift = 8,
+ OUTPUT3_ENABLE_mask = 0x0f << 12,
+ OUTPUT3_ENABLE_shift = 12,
+ OUTPUT4_ENABLE_mask = 0x0f << 16,
+ OUTPUT4_ENABLE_shift = 16,
+ OUTPUT5_ENABLE_mask = 0x0f << 20,
+ OUTPUT5_ENABLE_shift = 20,
+ OUTPUT6_ENABLE_mask = 0x0f << 24,
+ OUTPUT6_ENABLE_shift = 24,
+ OUTPUT7_ENABLE_mask = 0x0f << 28,
+ OUTPUT7_ENABLE_shift = 28,
+ PA_SC_GENERIC_SCISSOR_TL = 0x00028240,
+ PA_SC_GENERIC_SCISSOR_TL__TL_X_mask = 0x3fff << 0,
+ PA_SC_GENERIC_SCISSOR_TL__TL_X_shift = 0,
+ PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask = 0x3fff << 16,
+ PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift = 16,
+/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */
+ PA_SC_GENERIC_SCISSOR_BR = 0x00028244,
+ PA_SC_GENERIC_SCISSOR_BR__BR_X_mask = 0x3fff << 0,
+ PA_SC_GENERIC_SCISSOR_BR__BR_X_shift = 0,
+ PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask = 0x3fff << 16,
+ PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift = 16,
+ PA_SC_VPORT_SCISSOR_0_TL = 0x00028250,
+ PA_SC_VPORT_SCISSOR_0_TL_num = 16,
+ PA_SC_VPORT_SCISSOR_0_TL_offset = 8,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask = 0x3fff << 0,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift = 0,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask = 0x3fff << 16,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift = 16,
+/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */
+ PA_SC_VPORT_SCISSOR_0_BR = 0x00028254,
+ PA_SC_VPORT_SCISSOR_0_BR_num = 16,
+ PA_SC_VPORT_SCISSOR_0_BR_offset = 8,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask = 0x3fff << 0,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift = 0,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask = 0x3fff << 16,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift = 16,
+ PA_SC_VPORT_ZMIN_0 = 0x000282d0,
+ PA_SC_VPORT_ZMIN_0_num = 16,
+ PA_SC_VPORT_ZMIN_0_offset = 8,
+ PA_SC_VPORT_ZMAX_0 = 0x000282d4,
+ PA_SC_VPORT_ZMAX_0_num = 16,
+ PA_SC_VPORT_ZMAX_0_offset = 8,
+ SX_MISC = 0x00028350,
+ MULTIPASS_bit = 1 << 0,
+ SQ_VTX_SEMANTIC_0 = 0x00028380,
+ SQ_VTX_SEMANTIC_0_num = 32,
+/* SEMANTIC_ID_mask = 0xff << 0, */
+/* SEMANTIC_ID_shift = 0, */
+ VGT_MAX_VTX_INDX = 0x00028400,
+ VGT_MIN_VTX_INDX = 0x00028404,
+ VGT_INDX_OFFSET = 0x00028408,
+ VGT_MULTI_PRIM_IB_RESET_INDX = 0x0002840c,
+ SX_ALPHA_TEST_CONTROL = 0x00028410,
+ ALPHA_FUNC_mask = 0x07 << 0,
+ ALPHA_FUNC_shift = 0,
+ REF_NEVER = 0x00,
+ REF_LESS = 0x01,
+ REF_EQUAL = 0x02,
+ REF_LEQUAL = 0x03,
+ REF_GREATER = 0x04,
+ REF_NOTEQUAL = 0x05,
+ REF_GEQUAL = 0x06,
+ REF_ALWAYS = 0x07,
+ ALPHA_TEST_ENABLE_bit = 1 << 3,
+ ALPHA_TEST_BYPASS_bit = 1 << 8,
+ CB_BLEND_RED = 0x00028414,
+ CB_BLEND_GREEN = 0x00028418,
+ CB_BLEND_BLUE = 0x0002841c,
+ CB_BLEND_ALPHA = 0x00028420,
+ CB_FOG_RED = 0x00028424,
+ CB_FOG_GREEN = 0x00028428,
+ CB_FOG_BLUE = 0x0002842c,
+ DB_STENCILREFMASK = 0x00028430,
+ STENCILREF_mask = 0xff << 0,
+ STENCILREF_shift = 0,
+ STENCILMASK_mask = 0xff << 8,
+ STENCILMASK_shift = 8,
+ STENCILWRITEMASK_mask = 0xff << 16,
+ STENCILWRITEMASK_shift = 16,
+ DB_STENCILREFMASK_BF = 0x00028434,
+ STENCILREF_BF_mask = 0xff << 0,
+ STENCILREF_BF_shift = 0,
+ STENCILMASK_BF_mask = 0xff << 8,
+ STENCILMASK_BF_shift = 8,
+ STENCILWRITEMASK_BF_mask = 0xff << 16,
+ STENCILWRITEMASK_BF_shift = 16,
+ SX_ALPHA_REF = 0x00028438,
+ PA_CL_VPORT_XSCALE_0 = 0x0002843c,
+ PA_CL_VPORT_XSCALE_0_num = 16,
+ PA_CL_VPORT_XSCALE_0_offset = 24,
+ PA_CL_VPORT_XOFFSET_0 = 0x00028440,
+ PA_CL_VPORT_XOFFSET_0_num = 16,
+ PA_CL_VPORT_XOFFSET_0_offset = 24,
+ PA_CL_VPORT_YSCALE_0 = 0x00028444,
+ PA_CL_VPORT_YSCALE_0_num = 16,
+ PA_CL_VPORT_YSCALE_0_offset = 24,
+ PA_CL_VPORT_YOFFSET_0 = 0x00028448,
+ PA_CL_VPORT_YOFFSET_0_num = 16,
+ PA_CL_VPORT_YOFFSET_0_offset = 24,
+ PA_CL_VPORT_ZSCALE_0 = 0x0002844c,
+ PA_CL_VPORT_ZSCALE_0_num = 16,
+ PA_CL_VPORT_ZSCALE_0_offset = 24,
+ PA_CL_VPORT_ZOFFSET_0 = 0x00028450,
+ PA_CL_VPORT_ZOFFSET_0_num = 16,
+ PA_CL_VPORT_ZOFFSET_0_offset = 24,
+ SPI_VS_OUT_ID_0 = 0x00028614,
+ SPI_VS_OUT_ID_0_num = 10,
+ SEMANTIC_0_mask = 0xff << 0,
+ SEMANTIC_0_shift = 0,
+ SEMANTIC_1_mask = 0xff << 8,
+ SEMANTIC_1_shift = 8,
+ SEMANTIC_2_mask = 0xff << 16,
+ SEMANTIC_2_shift = 16,
+ SEMANTIC_3_mask = 0xff << 24,
+ SEMANTIC_3_shift = 24,
+ SPI_PS_INPUT_CNTL_0 = 0x00028644,
+ SPI_PS_INPUT_CNTL_0_num = 32,
+ SEMANTIC_mask = 0xff << 0,
+ SEMANTIC_shift = 0,
+ DEFAULT_VAL_mask = 0x03 << 8,
+ DEFAULT_VAL_shift = 8,
+ X_0_0F = 0x00,
+ FLAT_SHADE_bit = 1 << 10,
+ SEL_CENTROID_bit = 1 << 11,
+ SEL_LINEAR_bit = 1 << 12,
+ CYL_WRAP_mask = 0x0f << 13,
+ CYL_WRAP_shift = 13,
+ PT_SPRITE_TEX_bit = 1 << 17,
+ SEL_SAMPLE_bit = 1 << 18,
+ SPI_VS_OUT_CONFIG = 0x000286c4,
+ VS_PER_COMPONENT_bit = 1 << 0,
+ VS_EXPORT_COUNT_mask = 0x1f << 1,
+ VS_EXPORT_COUNT_shift = 1,
+ VS_EXPORTS_FOG_bit = 1 << 8,
+ VS_OUT_FOG_VEC_ADDR_mask = 0x1f << 9,
+ VS_OUT_FOG_VEC_ADDR_shift = 9,
+ SPI_PS_IN_CONTROL_0 = 0x000286cc,
+ NUM_INTERP_mask = 0x3f << 0,
+ NUM_INTERP_shift = 0,
+ POSITION_ENA_bit = 1 << 8,
+ POSITION_CENTROID_bit = 1 << 9,
+ POSITION_ADDR_mask = 0x1f << 10,
+ POSITION_ADDR_shift = 10,
+ PARAM_GEN_mask = 0x0f << 15,
+ PARAM_GEN_shift = 15,
+ PARAM_GEN_ADDR_mask = 0x7f << 19,
+ PARAM_GEN_ADDR_shift = 19,
+ BARYC_SAMPLE_CNTL_mask = 0x03 << 26,
+ BARYC_SAMPLE_CNTL_shift = 26,
+ CENTROIDS_ONLY = 0x00,
+ CENTERS_ONLY = 0x01,
+ CENTROIDS_AND_CENTERS = 0x02,
+ UNDEF = 0x03,
+ PERSP_GRADIENT_ENA_bit = 1 << 28,
+ LINEAR_GRADIENT_ENA_bit = 1 << 29,
+ POSITION_SAMPLE_bit = 1 << 30,
+ BARYC_AT_SAMPLE_ENA_bit = 1 << 31,
+ SPI_PS_IN_CONTROL_1 = 0x000286d0,
+ GEN_INDEX_PIX_bit = 1 << 0,
+ GEN_INDEX_PIX_ADDR_mask = 0x7f << 1,
+ GEN_INDEX_PIX_ADDR_shift = 1,
+ FRONT_FACE_ENA_bit = 1 << 8,
+ FRONT_FACE_CHAN_mask = 0x03 << 9,
+ FRONT_FACE_CHAN_shift = 9,
+ FRONT_FACE_ALL_BITS_bit = 1 << 11,
+ FRONT_FACE_ADDR_mask = 0x1f << 12,
+ FRONT_FACE_ADDR_shift = 12,
+ FOG_ADDR_mask = 0x7f << 17,
+ FOG_ADDR_shift = 17,
+ FIXED_PT_POSITION_ENA_bit = 1 << 24,
+ FIXED_PT_POSITION_ADDR_mask = 0x1f << 25,
+ FIXED_PT_POSITION_ADDR_shift = 25,
+ SPI_INTERP_CONTROL_0 = 0x000286d4,
+ FLAT_SHADE_ENA_bit = 1 << 0,
+ PNT_SPRITE_ENA_bit = 1 << 1,
+ PNT_SPRITE_OVRD_X_mask = 0x07 << 2,
+ PNT_SPRITE_OVRD_X_shift = 2,
+ SPI_PNT_SPRITE_SEL_0 = 0x00,
+ SPI_PNT_SPRITE_SEL_1 = 0x01,
+ SPI_PNT_SPRITE_SEL_S = 0x02,
+ SPI_PNT_SPRITE_SEL_T = 0x03,
+ SPI_PNT_SPRITE_SEL_NONE = 0x04,
+ PNT_SPRITE_OVRD_Y_mask = 0x07 << 5,
+ PNT_SPRITE_OVRD_Y_shift = 5,
+/* SPI_PNT_SPRITE_SEL_0 = 0x00, */
+/* SPI_PNT_SPRITE_SEL_1 = 0x01, */
+/* SPI_PNT_SPRITE_SEL_S = 0x02, */
+/* SPI_PNT_SPRITE_SEL_T = 0x03, */
+/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */
+ PNT_SPRITE_OVRD_Z_mask = 0x07 << 8,
+ PNT_SPRITE_OVRD_Z_shift = 8,
+/* SPI_PNT_SPRITE_SEL_0 = 0x00, */
+/* SPI_PNT_SPRITE_SEL_1 = 0x01, */
+/* SPI_PNT_SPRITE_SEL_S = 0x02, */
+/* SPI_PNT_SPRITE_SEL_T = 0x03, */
+/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */
+ PNT_SPRITE_OVRD_W_mask = 0x07 << 11,
+ PNT_SPRITE_OVRD_W_shift = 11,
+/* SPI_PNT_SPRITE_SEL_0 = 0x00, */
+/* SPI_PNT_SPRITE_SEL_1 = 0x01, */
+/* SPI_PNT_SPRITE_SEL_S = 0x02, */
+/* SPI_PNT_SPRITE_SEL_T = 0x03, */
+/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */
+ PNT_SPRITE_TOP_1_bit = 1 << 14,
+ SPI_INPUT_Z = 0x000286d8,
+ PROVIDE_Z_TO_SPI_bit = 1 << 0,
+ SPI_FOG_CNTL = 0x000286dc,
+ PASS_FOG_THROUGH_PS_bit = 1 << 0,
+ PIXEL_FOG_FUNC_mask = 0x03 << 1,
+ PIXEL_FOG_FUNC_shift = 1,
+ SPI_FOG_NONE = 0x00,
+ SPI_FOG_EXP = 0x01,
+ SPI_FOG_EXP2 = 0x02,
+ SPI_FOG_LINEAR = 0x03,
+ PIXEL_FOG_SRC_SEL_bit = 1 << 3,
+ VS_FOG_CLAMP_DISABLE_bit = 1 << 4,
+ SPI_FOG_FUNC_SCALE = 0x000286e0,
+ SPI_FOG_FUNC_BIAS = 0x000286e4,
+ CB_BLEND0_CONTROL = 0x00028780,
+ CB_BLEND0_CONTROL_num = 8,
+ COLOR_SRCBLEND_mask = 0x1f << 0,
+ COLOR_SRCBLEND_shift = 0,
+ COLOR_COMB_FCN_mask = 0x07 << 5,
+ COLOR_COMB_FCN_shift = 5,
+ COLOR_DESTBLEND_mask = 0x1f << 8,
+ COLOR_DESTBLEND_shift = 8,
+ OPACITY_WEIGHT_bit = 1 << 13,
+ ALPHA_SRCBLEND_mask = 0x1f << 16,
+ ALPHA_SRCBLEND_shift = 16,
+ ALPHA_COMB_FCN_mask = 0x07 << 21,
+ ALPHA_COMB_FCN_shift = 21,
+ ALPHA_DESTBLEND_mask = 0x1f << 24,
+ ALPHA_DESTBLEND_shift = 24,
+ SEPARATE_ALPHA_BLEND_bit = 1 << 29,
+ VGT_DMA_BASE_HI = 0x000287e4,
+ VGT_DMA_BASE_HI__BASE_ADDR_mask = 0xff << 0,
+ VGT_DMA_BASE_HI__BASE_ADDR_shift = 0,
+ VGT_DMA_BASE = 0x000287e8,
+ VGT_DRAW_INITIATOR = 0x000287f0,
+ SOURCE_SELECT_mask = 0x03 << 0,
+ SOURCE_SELECT_shift = 0,
+ DI_SRC_SEL_DMA = 0x00,
+ DI_SRC_SEL_IMMEDIATE = 0x01,
+ DI_SRC_SEL_AUTO_INDEX = 0x02,
+ DI_SRC_SEL_RESERVED = 0x03,
+ MAJOR_MODE_mask = 0x03 << 2,
+ MAJOR_MODE_shift = 2,
+ DI_MAJOR_MODE_0 = 0x00,
+ DI_MAJOR_MODE_1 = 0x01,
+ SPRITE_EN_bit = 1 << 4,
+ NOT_EOP_bit = 1 << 5,
+ USE_OPAQUE_bit = 1 << 6,
+ VGT_IMMED_DATA = 0x000287f4,
+ VGT_EVENT_ADDRESS_REG = 0x000287f8,
+ ADDRESS_LOW_mask = 0xfffffff << 0,
+ ADDRESS_LOW_shift = 0,
+ DB_DEPTH_CONTROL = 0x00028800,
+ STENCIL_ENABLE_bit = 1 << 0,
+ Z_ENABLE_bit = 1 << 1,
+ Z_WRITE_ENABLE_bit = 1 << 2,
+ ZFUNC_mask = 0x07 << 4,
+ ZFUNC_shift = 4,
+ FRAG_NEVER = 0x00,
+ FRAG_LESS = 0x01,
+ FRAG_EQUAL = 0x02,
+ FRAG_LEQUAL = 0x03,
+ FRAG_GREATER = 0x04,
+ FRAG_NOTEQUAL = 0x05,
+ FRAG_GEQUAL = 0x06,
+ FRAG_ALWAYS = 0x07,
+ BACKFACE_ENABLE_bit = 1 << 7,
+ STENCILFUNC_mask = 0x07 << 8,
+ STENCILFUNC_shift = 8,
+/* REF_NEVER = 0x00, */
+/* REF_LESS = 0x01, */
+/* REF_EQUAL = 0x02, */
+/* REF_LEQUAL = 0x03, */
+/* REF_GREATER = 0x04, */
+/* REF_NOTEQUAL = 0x05, */
+/* REF_GEQUAL = 0x06, */
+/* REF_ALWAYS = 0x07, */
+ STENCILFAIL_mask = 0x07 << 11,
+ STENCILFAIL_shift = 11,
+ STENCIL_KEEP = 0x00,
+ STENCIL_ZERO = 0x01,
+ STENCIL_REPLACE = 0x02,
+ STENCIL_INCR_CLAMP = 0x03,
+ STENCIL_DECR_CLAMP = 0x04,
+ STENCIL_INVERT = 0x05,
+ STENCIL_INCR_WRAP = 0x06,
+ STENCIL_DECR_WRAP = 0x07,
+ STENCILZPASS_mask = 0x07 << 14,
+ STENCILZPASS_shift = 14,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ STENCILZFAIL_mask = 0x07 << 17,
+ STENCILZFAIL_shift = 17,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ STENCILFUNC_BF_mask = 0x07 << 20,
+ STENCILFUNC_BF_shift = 20,
+/* REF_NEVER = 0x00, */
+/* REF_LESS = 0x01, */
+/* REF_EQUAL = 0x02, */
+/* REF_LEQUAL = 0x03, */
+/* REF_GREATER = 0x04, */
+/* REF_NOTEQUAL = 0x05, */
+/* REF_GEQUAL = 0x06, */
+/* REF_ALWAYS = 0x07, */
+ STENCILFAIL_BF_mask = 0x07 << 23,
+ STENCILFAIL_BF_shift = 23,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ STENCILZPASS_BF_mask = 0x07 << 26,
+ STENCILZPASS_BF_shift = 26,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ STENCILZFAIL_BF_mask = 0x07 << 29,
+ STENCILZFAIL_BF_shift = 29,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ CB_BLEND_CONTROL = 0x00028804,
+/* COLOR_SRCBLEND_mask = 0x1f << 0, */
+/* COLOR_SRCBLEND_shift = 0, */
+ BLEND_ZERO = 0x00,
+ BLEND_ONE = 0x01,
+ BLEND_SRC_COLOR = 0x02,
+ BLEND_ONE_MINUS_SRC_COLOR = 0x03,
+ BLEND_SRC_ALPHA = 0x04,
+ BLEND_ONE_MINUS_SRC_ALPHA = 0x05,
+ BLEND_DST_ALPHA = 0x06,
+ BLEND_ONE_MINUS_DST_ALPHA = 0x07,
+ BLEND_DST_COLOR = 0x08,
+ BLEND_ONE_MINUS_DST_COLOR = 0x09,
+ BLEND_SRC_ALPHA_SATURATE = 0x0a,
+ BLEND_BOTH_SRC_ALPHA = 0x0b,
+ BLEND_BOTH_INV_SRC_ALPHA = 0x0c,
+ BLEND_CONSTANT_COLOR = 0x0d,
+ BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e,
+ BLEND_SRC1_COLOR = 0x0f,
+ BLEND_INV_SRC1_COLOR = 0x10,
+ BLEND_SRC1_ALPHA = 0x11,
+ BLEND_INV_SRC1_ALPHA = 0x12,
+ BLEND_CONSTANT_ALPHA = 0x13,
+ BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14,
+/* COLOR_COMB_FCN_mask = 0x07 << 5, */
+/* COLOR_COMB_FCN_shift = 5, */
+ COMB_DST_PLUS_SRC = 0x00,
+ COMB_SRC_MINUS_DST = 0x01,
+ COMB_MIN_DST_SRC = 0x02,
+ COMB_MAX_DST_SRC = 0x03,
+ COMB_DST_MINUS_SRC = 0x04,
+/* COLOR_DESTBLEND_mask = 0x1f << 8, */
+/* COLOR_DESTBLEND_shift = 8, */
+/* BLEND_ZERO = 0x00, */
+/* BLEND_ONE = 0x01, */
+/* BLEND_SRC_COLOR = 0x02, */
+/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */
+/* BLEND_SRC_ALPHA = 0x04, */
+/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */
+/* BLEND_DST_ALPHA = 0x06, */
+/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */
+/* BLEND_DST_COLOR = 0x08, */
+/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */
+/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */
+/* BLEND_BOTH_SRC_ALPHA = 0x0b, */
+/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */
+/* BLEND_CONSTANT_COLOR = 0x0d, */
+/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */
+/* BLEND_SRC1_COLOR = 0x0f, */
+/* BLEND_INV_SRC1_COLOR = 0x10, */
+/* BLEND_SRC1_ALPHA = 0x11, */
+/* BLEND_INV_SRC1_ALPHA = 0x12, */
+/* BLEND_CONSTANT_ALPHA = 0x13, */
+/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */
+/* OPACITY_WEIGHT_bit = 1 << 13, */
+/* ALPHA_SRCBLEND_mask = 0x1f << 16, */
+/* ALPHA_SRCBLEND_shift = 16, */
+/* BLEND_ZERO = 0x00, */
+/* BLEND_ONE = 0x01, */
+/* BLEND_SRC_COLOR = 0x02, */
+/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */
+/* BLEND_SRC_ALPHA = 0x04, */
+/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */
+/* BLEND_DST_ALPHA = 0x06, */
+/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */
+/* BLEND_DST_COLOR = 0x08, */
+/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */
+/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */
+/* BLEND_BOTH_SRC_ALPHA = 0x0b, */
+/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */
+/* BLEND_CONSTANT_COLOR = 0x0d, */
+/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */
+/* BLEND_SRC1_COLOR = 0x0f, */
+/* BLEND_INV_SRC1_COLOR = 0x10, */
+/* BLEND_SRC1_ALPHA = 0x11, */
+/* BLEND_INV_SRC1_ALPHA = 0x12, */
+/* BLEND_CONSTANT_ALPHA = 0x13, */
+/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */
+/* ALPHA_COMB_FCN_mask = 0x07 << 21, */
+/* ALPHA_COMB_FCN_shift = 21, */
+/* COMB_DST_PLUS_SRC = 0x00, */
+/* COMB_SRC_MINUS_DST = 0x01, */
+/* COMB_MIN_DST_SRC = 0x02, */
+/* COMB_MAX_DST_SRC = 0x03, */
+/* COMB_DST_MINUS_SRC = 0x04, */
+/* ALPHA_DESTBLEND_mask = 0x1f << 24, */
+/* ALPHA_DESTBLEND_shift = 24, */
+/* BLEND_ZERO = 0x00, */
+/* BLEND_ONE = 0x01, */
+/* BLEND_SRC_COLOR = 0x02, */
+/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */
+/* BLEND_SRC_ALPHA = 0x04, */
+/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */
+/* BLEND_DST_ALPHA = 0x06, */
+/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */
+/* BLEND_DST_COLOR = 0x08, */
+/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */
+/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */
+/* BLEND_BOTH_SRC_ALPHA = 0x0b, */
+/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */
+/* BLEND_CONSTANT_COLOR = 0x0d, */
+/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */
+/* BLEND_SRC1_COLOR = 0x0f, */
+/* BLEND_INV_SRC1_COLOR = 0x10, */
+/* BLEND_SRC1_ALPHA = 0x11, */
+/* BLEND_INV_SRC1_ALPHA = 0x12, */
+/* BLEND_CONSTANT_ALPHA = 0x13, */
+/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */
+/* SEPARATE_ALPHA_BLEND_bit = 1 << 29, */
+ CB_COLOR_CONTROL = 0x00028808,
+ FOG_ENABLE_bit = 1 << 0,
+ MULTIWRITE_ENABLE_bit = 1 << 1,
+ DITHER_ENABLE_bit = 1 << 2,
+ DEGAMMA_ENABLE_bit = 1 << 3,
+ SPECIAL_OP_mask = 0x07 << 4,
+ SPECIAL_OP_shift = 4,
+ SPECIAL_NORMAL = 0x00,
+ SPECIAL_DISABLE = 0x01,
+ SPECIAL_FAST_CLEAR = 0x02,
+ SPECIAL_FORCE_CLEAR = 0x03,
+ SPECIAL_EXPAND_COLOR = 0x04,
+ SPECIAL_EXPAND_TEXTURE = 0x05,
+ SPECIAL_EXPAND_SAMPLES = 0x06,
+ SPECIAL_RESOLVE_BOX = 0x07,
+ PER_MRT_BLEND_bit = 1 << 7,
+ TARGET_BLEND_ENABLE_mask = 0xff << 8,
+ TARGET_BLEND_ENABLE_shift = 8,
+ ROP3_mask = 0xff << 16,
+ ROP3_shift = 16,
+ DB_SHADER_CONTROL = 0x0002880c,
+ Z_EXPORT_ENABLE_bit = 1 << 0,
+ STENCIL_REF_EXPORT_ENABLE_bit = 1 << 1,
+ Z_ORDER_mask = 0x03 << 4,
+ Z_ORDER_shift = 4,
+ LATE_Z = 0x00,
+ EARLY_Z_THEN_LATE_Z = 0x01,
+ RE_Z = 0x02,
+ EARLY_Z_THEN_RE_Z = 0x03,
+ KILL_ENABLE_bit = 1 << 6,
+ COVERAGE_TO_MASK_ENABLE_bit = 1 << 7,
+ MASK_EXPORT_ENABLE_bit = 1 << 8,
+ DUAL_EXPORT_ENABLE_bit = 1 << 9,
+ EXEC_ON_HIER_FAIL_bit = 1 << 10,
+ EXEC_ON_NOOP_bit = 1 << 11,
+ PA_CL_CLIP_CNTL = 0x00028810,
+ UCP_ENA_0_bit = 1 << 0,
+ UCP_ENA_1_bit = 1 << 1,
+ UCP_ENA_2_bit = 1 << 2,
+ UCP_ENA_3_bit = 1 << 3,
+ UCP_ENA_4_bit = 1 << 4,
+ UCP_ENA_5_bit = 1 << 5,
+ PS_UCP_Y_SCALE_NEG_bit = 1 << 13,
+ PS_UCP_MODE_mask = 0x03 << 14,
+ PS_UCP_MODE_shift = 14,
+ CLIP_DISABLE_bit = 1 << 16,
+ UCP_CULL_ONLY_ENA_bit = 1 << 17,
+ BOUNDARY_EDGE_FLAG_ENA_bit = 1 << 18,
+ DX_CLIP_SPACE_DEF_bit = 1 << 19,
+ DIS_CLIP_ERR_DETECT_bit = 1 << 20,
+ VTX_KILL_OR_bit = 1 << 21,
+ DX_LINEAR_ATTR_CLIP_ENA_bit = 1 << 24,
+ VTE_VPORT_PROVOKE_DISABLE_bit = 1 << 25,
+ ZCLIP_NEAR_DISABLE_bit = 1 << 26,
+ ZCLIP_FAR_DISABLE_bit = 1 << 27,
+ PA_SU_SC_MODE_CNTL = 0x00028814,
+ CULL_FRONT_bit = 1 << 0,
+ CULL_BACK_bit = 1 << 1,
+ FACE_bit = 1 << 2,
+ POLY_MODE_mask = 0x03 << 3,
+ POLY_MODE_shift = 3,
+ X_DISABLE_POLY_MODE = 0x00,
+ X_DUAL_MODE = 0x01,
+ POLYMODE_FRONT_PTYPE_mask = 0x07 << 5,
+ POLYMODE_FRONT_PTYPE_shift = 5,
+ X_DRAW_POINTS = 0x00,
+ X_DRAW_LINES = 0x01,
+ X_DRAW_TRIANGLES = 0x02,
+ POLYMODE_BACK_PTYPE_mask = 0x07 << 8,
+ POLYMODE_BACK_PTYPE_shift = 8,
+/* X_DRAW_POINTS = 0x00, */
+/* X_DRAW_LINES = 0x01, */
+/* X_DRAW_TRIANGLES = 0x02, */
+ POLY_OFFSET_FRONT_ENABLE_bit = 1 << 11,
+ POLY_OFFSET_BACK_ENABLE_bit = 1 << 12,
+ POLY_OFFSET_PARA_ENABLE_bit = 1 << 13,
+ VTX_WINDOW_OFFSET_ENABLE_bit = 1 << 16,
+ PROVOKING_VTX_LAST_bit = 1 << 19,
+ PERSP_CORR_DIS_bit = 1 << 20,
+ MULTI_PRIM_IB_ENA_bit = 1 << 21,
+ PA_CL_VTE_CNTL = 0x00028818,
+ VPORT_X_SCALE_ENA_bit = 1 << 0,
+ VPORT_X_OFFSET_ENA_bit = 1 << 1,
+ VPORT_Y_SCALE_ENA_bit = 1 << 2,
+ VPORT_Y_OFFSET_ENA_bit = 1 << 3,
+ VPORT_Z_SCALE_ENA_bit = 1 << 4,
+ VPORT_Z_OFFSET_ENA_bit = 1 << 5,
+ VTX_XY_FMT_bit = 1 << 8,
+ VTX_Z_FMT_bit = 1 << 9,
+ VTX_W0_FMT_bit = 1 << 10,
+ PERFCOUNTER_REF_bit = 1 << 11,
+ PA_CL_VS_OUT_CNTL = 0x0002881c,
+ CLIP_DIST_ENA_0_bit = 1 << 0,
+ CLIP_DIST_ENA_1_bit = 1 << 1,
+ CLIP_DIST_ENA_2_bit = 1 << 2,
+ CLIP_DIST_ENA_3_bit = 1 << 3,
+ CLIP_DIST_ENA_4_bit = 1 << 4,
+ CLIP_DIST_ENA_5_bit = 1 << 5,
+ CLIP_DIST_ENA_6_bit = 1 << 6,
+ CLIP_DIST_ENA_7_bit = 1 << 7,
+ CULL_DIST_ENA_0_bit = 1 << 8,
+ CULL_DIST_ENA_1_bit = 1 << 9,
+ CULL_DIST_ENA_2_bit = 1 << 10,
+ CULL_DIST_ENA_3_bit = 1 << 11,
+ CULL_DIST_ENA_4_bit = 1 << 12,
+ CULL_DIST_ENA_5_bit = 1 << 13,
+ CULL_DIST_ENA_6_bit = 1 << 14,
+ CULL_DIST_ENA_7_bit = 1 << 15,
+ USE_VTX_POINT_SIZE_bit = 1 << 16,
+ USE_VTX_EDGE_FLAG_bit = 1 << 17,
+ USE_VTX_RENDER_TARGET_INDX_bit = 1 << 18,
+ USE_VTX_VIEWPORT_INDX_bit = 1 << 19,
+ USE_VTX_KILL_FLAG_bit = 1 << 20,
+ VS_OUT_MISC_VEC_ENA_bit = 1 << 21,
+ VS_OUT_CCDIST0_VEC_ENA_bit = 1 << 22,
+ VS_OUT_CCDIST1_VEC_ENA_bit = 1 << 23,
+ PA_CL_NANINF_CNTL = 0x00028820,
+ VTE_XY_INF_DISCARD_bit = 1 << 0,
+ VTE_Z_INF_DISCARD_bit = 1 << 1,
+ VTE_W_INF_DISCARD_bit = 1 << 2,
+ VTE_0XNANINF_IS_0_bit = 1 << 3,
+ VTE_XY_NAN_RETAIN_bit = 1 << 4,
+ VTE_Z_NAN_RETAIN_bit = 1 << 5,
+ VTE_W_NAN_RETAIN_bit = 1 << 6,
+ VTE_W_RECIP_NAN_IS_0_bit = 1 << 7,
+ VS_XY_NAN_TO_INF_bit = 1 << 8,
+ VS_XY_INF_RETAIN_bit = 1 << 9,
+ VS_Z_NAN_TO_INF_bit = 1 << 10,
+ VS_Z_INF_RETAIN_bit = 1 << 11,
+ VS_W_NAN_TO_INF_bit = 1 << 12,
+ VS_W_INF_RETAIN_bit = 1 << 13,
+ VS_CLIP_DIST_INF_DISCARD_bit = 1 << 14,
+ VTE_NO_OUTPUT_NEG_0_bit = 1 << 20,
+ SQ_PGM_START_PS = 0x00028840,
+ SQ_PGM_RESOURCES_PS = 0x00028850,
+ NUM_GPRS_mask = 0xff << 0,
+ NUM_GPRS_shift = 0,
+ STACK_SIZE_mask = 0xff << 8,
+ STACK_SIZE_shift = 8,
+ SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit = 1 << 21,
+ FETCH_CACHE_LINES_mask = 0x07 << 24,
+ FETCH_CACHE_LINES_shift = 24,
+ UNCACHED_FIRST_INST_bit = 1 << 28,
+ CLAMP_CONSTS_bit = 1 << 31,
+ SQ_PGM_EXPORTS_PS = 0x00028854,
+ EXPORT_MODE_mask = 0x1f << 0,
+ EXPORT_MODE_shift = 0,
+ SQ_PGM_START_VS = 0x00028858,
+ SQ_PGM_RESOURCES_VS = 0x00028868,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+ SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit = 1 << 21,
+/* FETCH_CACHE_LINES_mask = 0x07 << 24, */
+/* FETCH_CACHE_LINES_shift = 24, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */
+ SQ_PGM_START_GS = 0x0002886c,
+ SQ_PGM_RESOURCES_GS = 0x0002887c,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+ SQ_PGM_RESOURCES_GS__DX10_CLAMP_bit = 1 << 21,
+/* FETCH_CACHE_LINES_mask = 0x07 << 24, */
+/* FETCH_CACHE_LINES_shift = 24, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */
+ SQ_PGM_START_ES = 0x00028880,
+ SQ_PGM_RESOURCES_ES = 0x00028890,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+ SQ_PGM_RESOURCES_ES__DX10_CLAMP_bit = 1 << 21,
+/* FETCH_CACHE_LINES_mask = 0x07 << 24, */
+/* FETCH_CACHE_LINES_shift = 24, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */
+ SQ_PGM_START_FS = 0x00028894,
+ SQ_PGM_RESOURCES_FS = 0x000288a4,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+ SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit = 1 << 21,
+ SQ_ESGS_RING_ITEMSIZE = 0x000288a8,
+ ITEMSIZE_mask = 0x7fff << 0,
+ ITEMSIZE_shift = 0,
+ SQ_GSVS_RING_ITEMSIZE = 0x000288ac,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_ESTMP_RING_ITEMSIZE = 0x000288b0,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_GSTMP_RING_ITEMSIZE = 0x000288b4,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_VSTMP_RING_ITEMSIZE = 0x000288b8,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_PSTMP_RING_ITEMSIZE = 0x000288bc,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_FBUF_RING_ITEMSIZE = 0x000288c0,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_REDUC_RING_ITEMSIZE = 0x000288c4,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_GS_VERT_ITEMSIZE = 0x000288c8,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_PGM_CF_OFFSET_PS = 0x000288cc,
+ PGM_CF_OFFSET_mask = 0xfffff << 0,
+ PGM_CF_OFFSET_shift = 0,
+ SQ_PGM_CF_OFFSET_VS = 0x000288d0,
+/* PGM_CF_OFFSET_mask = 0xfffff << 0, */
+/* PGM_CF_OFFSET_shift = 0, */
+ SQ_PGM_CF_OFFSET_GS = 0x000288d4,
+/* PGM_CF_OFFSET_mask = 0xfffff << 0, */
+/* PGM_CF_OFFSET_shift = 0, */
+ SQ_PGM_CF_OFFSET_ES = 0x000288d8,
+/* PGM_CF_OFFSET_mask = 0xfffff << 0, */
+/* PGM_CF_OFFSET_shift = 0, */
+ SQ_PGM_CF_OFFSET_FS = 0x000288dc,
+/* PGM_CF_OFFSET_mask = 0xfffff << 0, */
+/* PGM_CF_OFFSET_shift = 0, */
+ SQ_VTX_SEMANTIC_CLEAR = 0x000288e0,
+ SQ_ALU_CONST_CACHE_PS_0 = 0x00028940,
+ SQ_ALU_CONST_CACHE_PS_0_num = 16,
+ SQ_ALU_CONST_CACHE_VS_0 = 0x00028980,
+ SQ_ALU_CONST_CACHE_VS_0_num = 16,
+ SQ_ALU_CONST_CACHE_GS_0 = 0x000289c0,
+ SQ_ALU_CONST_CACHE_GS_0_num = 16,
+ PA_SU_POINT_SIZE = 0x00028a00,
+ PA_SU_POINT_SIZE__HEIGHT_mask = 0xffff << 0,
+ PA_SU_POINT_SIZE__HEIGHT_shift = 0,
+ PA_SU_POINT_SIZE__WIDTH_mask = 0xffff << 16,
+ PA_SU_POINT_SIZE__WIDTH_shift = 16,
+ PA_SU_POINT_MINMAX = 0x00028a04,
+ MIN_SIZE_mask = 0xffff << 0,
+ MIN_SIZE_shift = 0,
+ MAX_SIZE_mask = 0xffff << 16,
+ MAX_SIZE_shift = 16,
+ PA_SU_LINE_CNTL = 0x00028a08,
+ PA_SU_LINE_CNTL__WIDTH_mask = 0xffff << 0,
+ PA_SU_LINE_CNTL__WIDTH_shift = 0,
+ PA_SC_LINE_STIPPLE = 0x00028a0c,
+ LINE_PATTERN_mask = 0xffff << 0,
+ LINE_PATTERN_shift = 0,
+ REPEAT_COUNT_mask = 0xff << 16,
+ REPEAT_COUNT_shift = 16,
+ PATTERN_BIT_ORDER_bit = 1 << 28,
+ AUTO_RESET_CNTL_mask = 0x03 << 29,
+ AUTO_RESET_CNTL_shift = 29,
+ VGT_OUTPUT_PATH_CNTL = 0x00028a10,
+ PATH_SELECT_mask = 0x03 << 0,
+ PATH_SELECT_shift = 0,
+ VGT_OUTPATH_VTX_REUSE = 0x00,
+ VGT_OUTPATH_TESS_EN = 0x01,
+ VGT_OUTPATH_PASSTHRU = 0x02,
+ VGT_OUTPATH_GS_BLOCK = 0x03,
+ VGT_HOS_CNTL = 0x00028a14,
+ TESS_MODE_mask = 0x03 << 0,
+ TESS_MODE_shift = 0,
+ VGT_HOS_MAX_TESS_LEVEL = 0x00028a18,
+ VGT_HOS_MIN_TESS_LEVEL = 0x00028a1c,
+ VGT_HOS_REUSE_DEPTH = 0x00028a20,
+ REUSE_DEPTH_mask = 0xff << 0,
+ REUSE_DEPTH_shift = 0,
+ VGT_GROUP_PRIM_TYPE = 0x00028a24,
+ VGT_GROUP_PRIM_TYPE__PRIM_TYPE_mask = 0x1f << 0,
+ VGT_GROUP_PRIM_TYPE__PRIM_TYPE_shift = 0,
+ VGT_GRP_3D_POINT = 0x00,
+ VGT_GRP_3D_LINE = 0x01,
+ VGT_GRP_3D_TRI = 0x02,
+ VGT_GRP_3D_RECT = 0x03,
+ VGT_GRP_3D_QUAD = 0x04,
+ VGT_GRP_2D_COPY_RECT_V0 = 0x05,
+ VGT_GRP_2D_COPY_RECT_V1 = 0x06,
+ VGT_GRP_2D_COPY_RECT_V2 = 0x07,
+ VGT_GRP_2D_COPY_RECT_V3 = 0x08,
+ VGT_GRP_2D_FILL_RECT = 0x09,
+ VGT_GRP_2D_LINE = 0x0a,
+ VGT_GRP_2D_TRI = 0x0b,
+ VGT_GRP_PRIM_INDEX_LINE = 0x0c,
+ VGT_GRP_PRIM_INDEX_TRI = 0x0d,
+ VGT_GRP_PRIM_INDEX_QUAD = 0x0e,
+ VGT_GRP_3D_LINE_ADJ = 0x0f,
+ VGT_GRP_3D_TRI_ADJ = 0x10,
+ RETAIN_ORDER_bit = 1 << 14,
+ RETAIN_QUADS_bit = 1 << 15,
+ PRIM_ORDER_mask = 0x07 << 16,
+ PRIM_ORDER_shift = 16,
+ VGT_GRP_LIST = 0x00,
+ VGT_GRP_STRIP = 0x01,
+ VGT_GRP_FAN = 0x02,
+ VGT_GRP_LOOP = 0x03,
+ VGT_GRP_POLYGON = 0x04,
+ VGT_GROUP_FIRST_DECR = 0x00028a28,
+ FIRST_DECR_mask = 0x0f << 0,
+ FIRST_DECR_shift = 0,
+ VGT_GROUP_DECR = 0x00028a2c,
+ DECR_mask = 0x0f << 0,
+ DECR_shift = 0,
+ VGT_GROUP_VECT_0_CNTL = 0x00028a30,
+ COMP_X_EN_bit = 1 << 0,
+ COMP_Y_EN_bit = 1 << 1,
+ COMP_Z_EN_bit = 1 << 2,
+ COMP_W_EN_bit = 1 << 3,
+ VGT_GROUP_VECT_0_CNTL__STRIDE_mask = 0xff << 8,
+ VGT_GROUP_VECT_0_CNTL__STRIDE_shift = 8,
+ SHIFT_mask = 0xff << 16,
+ SHIFT_shift = 16,
+ VGT_GROUP_VECT_1_CNTL = 0x00028a34,
+/* COMP_X_EN_bit = 1 << 0, */
+/* COMP_Y_EN_bit = 1 << 1, */
+/* COMP_Z_EN_bit = 1 << 2, */
+/* COMP_W_EN_bit = 1 << 3, */
+ VGT_GROUP_VECT_1_CNTL__STRIDE_mask = 0xff << 8,
+ VGT_GROUP_VECT_1_CNTL__STRIDE_shift = 8,
+/* SHIFT_mask = 0xff << 16, */
+/* SHIFT_shift = 16, */
+ VGT_GROUP_VECT_0_FMT_CNTL = 0x00028a38,
+ X_CONV_mask = 0x0f << 0,
+ X_CONV_shift = 0,
+ VGT_GRP_INDEX_16 = 0x00,
+ VGT_GRP_INDEX_32 = 0x01,
+ VGT_GRP_UINT_16 = 0x02,
+ VGT_GRP_UINT_32 = 0x03,
+ VGT_GRP_SINT_16 = 0x04,
+ VGT_GRP_SINT_32 = 0x05,
+ VGT_GRP_FLOAT_32 = 0x06,
+ VGT_GRP_AUTO_PRIM = 0x07,
+ VGT_GRP_FIX_1_23_TO_FLOAT = 0x08,
+ X_OFFSET_mask = 0x0f << 4,
+ X_OFFSET_shift = 4,
+ Y_CONV_mask = 0x0f << 8,
+ Y_CONV_shift = 8,
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+ Y_OFFSET_mask = 0x0f << 12,
+ Y_OFFSET_shift = 12,
+ Z_CONV_mask = 0x0f << 16,
+ Z_CONV_shift = 16,
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+ Z_OFFSET_mask = 0x0f << 20,
+ Z_OFFSET_shift = 20,
+ W_CONV_mask = 0x0f << 24,
+ W_CONV_shift = 24,
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+ W_OFFSET_mask = 0x0f << 28,
+ W_OFFSET_shift = 28,
+ VGT_GROUP_VECT_1_FMT_CNTL = 0x00028a3c,
+/* X_CONV_mask = 0x0f << 0, */
+/* X_CONV_shift = 0, */
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* X_OFFSET_mask = 0x0f << 4, */
+/* X_OFFSET_shift = 4, */
+/* Y_CONV_mask = 0x0f << 8, */
+/* Y_CONV_shift = 8, */
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* Y_OFFSET_mask = 0x0f << 12, */
+/* Y_OFFSET_shift = 12, */
+/* Z_CONV_mask = 0x0f << 16, */
+/* Z_CONV_shift = 16, */
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* Z_OFFSET_mask = 0x0f << 20, */
+/* Z_OFFSET_shift = 20, */
+/* W_CONV_mask = 0x0f << 24, */
+/* W_CONV_shift = 24, */
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* W_OFFSET_mask = 0x0f << 28, */
+/* W_OFFSET_shift = 28, */
+ VGT_GS_MODE = 0x00028a40,
+ MODE_mask = 0x03 << 0,
+ MODE_shift = 0,
+ GS_OFF = 0x00,
+ GS_SCENARIO_A = 0x01,
+ GS_SCENARIO_B = 0x02,
+ GS_SCENARIO_G = 0x03,
+ ES_PASSTHRU_bit = 1 << 2,
+ CUT_MODE_mask = 0x03 << 3,
+ CUT_MODE_shift = 3,
+ GS_CUT_1024 = 0x00,
+ GS_CUT_512 = 0x01,
+ GS_CUT_256 = 0x02,
+ GS_CUT_128 = 0x03,
+ PA_SC_MPASS_PS_CNTL = 0x00028a48,
+ MPASS_PIX_VEC_PER_PASS_mask = 0xfffff << 0,
+ MPASS_PIX_VEC_PER_PASS_shift = 0,
+ MPASS_PS_ENA_bit = 1 << 31,
+ PA_SC_MODE_CNTL = 0x00028a4c,
+ MSAA_ENABLE_bit = 1 << 0,
+ CLIPRECT_ENABLE_bit = 1 << 1,
+ LINE_STIPPLE_ENABLE_bit = 1 << 2,
+ MULTI_CHIP_PRIM_DISCARD_ENAB_bit = 1 << 3,
+ WALK_ORDER_ENABLE_bit = 1 << 4,
+ HALVE_DETAIL_SAMPLE_PERF_bit = 1 << 5,
+ WALK_SIZE_bit = 1 << 6,
+ WALK_ALIGNMENT_bit = 1 << 7,
+ WALK_ALIGN8_PRIM_FITS_ST_bit = 1 << 8,
+ TILE_COVER_NO_SCISSOR_bit = 1 << 9,
+ KILL_PIX_POST_HI_Z_bit = 1 << 10,
+ KILL_PIX_POST_DETAIL_MASK_bit = 1 << 11,
+ MULTI_CHIP_SUPERTILE_ENABLE_bit = 1 << 12,
+ TILE_COVER_DISABLE_bit = 1 << 13,
+ FORCE_EOV_CNTDWN_ENABLE_bit = 1 << 14,
+ FORCE_EOV_TILE_ENABLE_bit = 1 << 15,
+ FORCE_EOV_REZ_ENABLE_bit = 1 << 16,
+ PS_ITER_SAMPLE_bit = 1 << 17,
+ VGT_ENHANCE = 0x00028a50,
+ VGT_ENHANCE__MI_TIMESTAMP_RES_mask = 0x03 << 0,
+ VGT_ENHANCE__MI_TIMESTAMP_RES_shift = 0,
+ X_0_992_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_32 = 0x00,
+ X_0_496_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_16 = 0x01,
+ X_0_248_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_8 = 0x02,
+ X_0_124_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_4 = 0x03,
+ MISC_mask = 0x3fffffff << 2,
+ MISC_shift = 2,
+ VGT_GS_OUT_PRIM_TYPE = 0x00028a6c,
+ OUTPRIM_TYPE_mask = 0x3f << 0,
+ OUTPRIM_TYPE_shift = 0,
+ POINTLIST = 0x00,
+ LINESTRIP = 0x01,
+ TRISTRIP = 0x02,
+ VGT_DMA_SIZE = 0x00028a74,
+ VGT_DMA_INDEX_TYPE = 0x00028a7c,
+/* INDEX_TYPE_mask = 0x03 << 0, */
+/* INDEX_TYPE_shift = 0, */
+ VGT_INDEX_16 = 0x00,
+ VGT_INDEX_32 = 0x01,
+ SWAP_MODE_mask = 0x03 << 2,
+ SWAP_MODE_shift = 2,
+ VGT_DMA_SWAP_NONE = 0x00,
+ VGT_DMA_SWAP_16_BIT = 0x01,
+ VGT_DMA_SWAP_32_BIT = 0x02,
+ VGT_DMA_SWAP_WORD = 0x03,
+ VGT_PRIMITIVEID_EN = 0x00028a84,
+ PRIMITIVEID_EN_bit = 1 << 0,
+ VGT_DMA_NUM_INSTANCES = 0x00028a88,
+ VGT_EVENT_INITIATOR = 0x00028a90,
+ EVENT_TYPE_mask = 0x3f << 0,
+ EVENT_TYPE_shift = 0,
+ CACHE_FLUSH_TS = 0x04,
+ CONTEXT_DONE = 0x05,
+ CACHE_FLUSH = 0x06,
+ VIZQUERY_START = 0x07,
+ VIZQUERY_END = 0x08,
+ SC_WAIT_WC = 0x09,
+ MPASS_PS_CP_REFETCH = 0x0a,
+ MPASS_PS_RST_START = 0x0b,
+ MPASS_PS_INCR_START = 0x0c,
+ RST_PIX_CNT = 0x0d,
+ RST_VTX_CNT = 0x0e,
+ VS_PARTIAL_FLUSH = 0x0f,
+ PS_PARTIAL_FLUSH = 0x10,
+ CACHE_FLUSH_AND_INV_TS_EVENT = 0x14,
+ ZPASS_DONE = 0x15,
+ CACHE_FLUSH_AND_INV_EVENT = 0x16,
+ PERFCOUNTER_START = 0x17,
+ PERFCOUNTER_STOP = 0x18,
+ PIPELINESTAT_START = 0x19,
+ PIPELINESTAT_STOP = 0x1a,
+ PERFCOUNTER_SAMPLE = 0x1b,
+ FLUSH_ES_OUTPUT = 0x1c,
+ FLUSH_GS_OUTPUT = 0x1d,
+ SAMPLE_PIPELINESTAT = 0x1e,
+ SO_VGTSTREAMOUT_FLUSH = 0x1f,
+ SAMPLE_STREAMOUTSTATS = 0x20,
+ RESET_VTX_CNT = 0x21,
+ BLOCK_CONTEXT_DONE = 0x22,
+ CR_CONTEXT_DONE = 0x23,
+ VGT_FLUSH = 0x24,
+ CR_DONE_TS = 0x25,
+ SQ_NON_EVENT = 0x26,
+ SC_SEND_DB_VPZ = 0x27,
+ BOTTOM_OF_PIPE_TS = 0x28,
+ DB_CACHE_FLUSH_AND_INV = 0x2a,
+ ADDRESS_HI_mask = 0xff << 19,
+ ADDRESS_HI_shift = 19,
+ EXTENDED_EVENT_bit = 1 << 27,
+ VGT_MULTI_PRIM_IB_RESET_EN = 0x00028a94,
+ RESET_EN_bit = 1 << 0,
+ VGT_INSTANCE_STEP_RATE_0 = 0x00028aa0,
+ VGT_INSTANCE_STEP_RATE_1 = 0x00028aa4,
+ VGT_STRMOUT_EN = 0x00028ab0,
+ STREAMOUT_bit = 1 << 0,
+ VGT_REUSE_OFF = 0x00028ab4,
+ REUSE_OFF_bit = 1 << 0,
+ VGT_VTX_CNT_EN = 0x00028ab8,
+ VTX_CNT_EN_bit = 1 << 0,
+ VGT_STRMOUT_BUFFER_SIZE_0 = 0x00028ad0,
+ VGT_STRMOUT_VTX_STRIDE_0 = 0x00028ad4,
+ VGT_STRMOUT_VTX_STRIDE_0__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_VTX_STRIDE_0__STRIDE_shift = 0,
+ VGT_STRMOUT_BUFFER_BASE_0 = 0x00028ad8,
+ VGT_STRMOUT_BUFFER_OFFSET_0 = 0x00028adc,
+ VGT_STRMOUT_BUFFER_SIZE_1 = 0x00028ae0,
+ VGT_STRMOUT_VTX_STRIDE_1 = 0x00028ae4,
+ VGT_STRMOUT_VTX_STRIDE_1__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_VTX_STRIDE_1__STRIDE_shift = 0,
+ VGT_STRMOUT_BUFFER_BASE_1 = 0x00028ae8,
+ VGT_STRMOUT_BUFFER_OFFSET_1 = 0x00028aec,
+ VGT_STRMOUT_BUFFER_SIZE_2 = 0x00028af0,
+ VGT_STRMOUT_VTX_STRIDE_2 = 0x00028af4,
+ VGT_STRMOUT_VTX_STRIDE_2__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_VTX_STRIDE_2__STRIDE_shift = 0,
+ VGT_STRMOUT_BUFFER_BASE_2 = 0x00028af8,
+ VGT_STRMOUT_BUFFER_OFFSET_2 = 0x00028afc,
+ VGT_STRMOUT_BUFFER_SIZE_3 = 0x00028b00,
+ VGT_STRMOUT_VTX_STRIDE_3 = 0x00028b04,
+ VGT_STRMOUT_VTX_STRIDE_3__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_VTX_STRIDE_3__STRIDE_shift = 0,
+ VGT_STRMOUT_BUFFER_BASE_3 = 0x00028b08,
+ VGT_STRMOUT_BUFFER_OFFSET_3 = 0x00028b0c,
+ VGT_STRMOUT_BASE_OFFSET_0 = 0x00028b10,
+ VGT_STRMOUT_BASE_OFFSET_1 = 0x00028b14,
+ VGT_STRMOUT_BASE_OFFSET_2 = 0x00028b18,
+ VGT_STRMOUT_BASE_OFFSET_3 = 0x00028b1c,
+ VGT_STRMOUT_BUFFER_EN = 0x00028b20,
+ BUFFER_0_EN_bit = 1 << 0,
+ BUFFER_1_EN_bit = 1 << 1,
+ BUFFER_2_EN_bit = 1 << 2,
+ BUFFER_3_EN_bit = 1 << 3,
+ VGT_STRMOUT_DRAW_OPAQUE_OFFSET = 0x00028b28,
+ VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE = 0x00028b2c,
+ VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE = 0x00028b30,
+ VGT_STRMOUT_BASE_OFFSET_HI_0 = 0x00028b44,
+ VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_shift = 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_1 = 0x00028b48,
+ VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_shift = 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_2 = 0x00028b4c,
+ VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_shift = 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_3 = 0x00028b50,
+ VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_shift = 0,
+ PA_SC_LINE_CNTL = 0x00028c00,
+ BRES_CNTL_mask = 0xff << 0,
+ BRES_CNTL_shift = 0,
+ USE_BRES_CNTL_bit = 1 << 8,
+ EXPAND_LINE_WIDTH_bit = 1 << 9,
+ LAST_PIXEL_bit = 1 << 10,
+ PA_SC_AA_CONFIG = 0x00028c04,
+ MSAA_NUM_SAMPLES_mask = 0x03 << 0,
+ MSAA_NUM_SAMPLES_shift = 0,
+ AA_MASK_CENTROID_DTMN_bit = 1 << 4,
+ MAX_SAMPLE_DIST_mask = 0x0f << 13,
+ MAX_SAMPLE_DIST_shift = 13,
+ PA_SU_VTX_CNTL = 0x00028c08,
+ PIX_CENTER_bit = 1 << 0,
+ PA_SU_VTX_CNTL__ROUND_MODE_mask = 0x03 << 1,
+ PA_SU_VTX_CNTL__ROUND_MODE_shift = 1,
+ X_TRUNCATE = 0x00,
+ X_ROUND = 0x01,
+ X_ROUND_TO_EVEN = 0x02,
+ X_ROUND_TO_ODD = 0x03,
+ QUANT_MODE_mask = 0x07 << 3,
+ QUANT_MODE_shift = 3,
+ X_1_16TH = 0x00,
+ X_1_8TH = 0x01,
+ X_1_4TH = 0x02,
+ X_1_2 = 0x03,
+ X_1 = 0x04,
+ X_1_256TH = 0x05,
+ PA_CL_GB_VERT_CLIP_ADJ = 0x00028c0c,
+ PA_CL_GB_VERT_DISC_ADJ = 0x00028c10,
+ PA_CL_GB_HORZ_CLIP_ADJ = 0x00028c14,
+ PA_CL_GB_HORZ_DISC_ADJ = 0x00028c18,
+ PA_SC_AA_SAMPLE_LOCS_MCTX = 0x00028c1c,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+/* S2_X_mask = 0x0f << 16, */
+/* S2_X_shift = 16, */
+/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_shift = 20, */
+/* S3_X_mask = 0x0f << 24, */
+/* S3_X_shift = 24, */
+/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_shift = 28, */
+ PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX = 0x00028c20,
+/* S4_X_mask = 0x0f << 0, */
+/* S4_X_shift = 0, */
+/* S4_Y_mask = 0x0f << 4, */
+/* S4_Y_shift = 4, */
+/* S5_X_mask = 0x0f << 8, */
+/* S5_X_shift = 8, */
+/* S5_Y_mask = 0x0f << 12, */
+/* S5_Y_shift = 12, */
+/* S6_X_mask = 0x0f << 16, */
+/* S6_X_shift = 16, */
+/* S6_Y_mask = 0x0f << 20, */
+/* S6_Y_shift = 20, */
+/* S7_X_mask = 0x0f << 24, */
+/* S7_X_shift = 24, */
+/* S7_Y_mask = 0x0f << 28, */
+/* S7_Y_shift = 28, */
+ CB_CLRCMP_CONTROL = 0x00028c30,
+ CLRCMP_FCN_SRC_mask = 0x07 << 0,
+ CLRCMP_FCN_SRC_shift = 0,
+ CLRCMP_DRAW_ALWAYS = 0x00,
+ CLRCMP_DRAW_NEVER = 0x01,
+ CLRCMP_DRAW_ON_NEQ = 0x04,
+ CLRCMP_DRAW_ON_EQ = 0x05,
+ CLRCMP_FCN_DST_mask = 0x07 << 8,
+ CLRCMP_FCN_DST_shift = 8,
+/* CLRCMP_DRAW_ALWAYS = 0x00, */
+/* CLRCMP_DRAW_NEVER = 0x01, */
+/* CLRCMP_DRAW_ON_NEQ = 0x04, */
+/* CLRCMP_DRAW_ON_EQ = 0x05, */
+ CLRCMP_FCN_SEL_mask = 0x03 << 24,
+ CLRCMP_FCN_SEL_shift = 24,
+ CLRCMP_SEL_DST = 0x00,
+ CLRCMP_SEL_SRC = 0x01,
+ CLRCMP_SEL_AND = 0x02,
+ CB_CLRCMP_SRC = 0x00028c34,
+ CB_CLRCMP_DST = 0x00028c38,
+ CB_CLRCMP_MSK = 0x00028c3c,
+ PA_SC_AA_MASK = 0x00028c48,
+ VGT_VERTEX_REUSE_BLOCK_CNTL = 0x00028c58,
+ VTX_REUSE_DEPTH_mask = 0xff << 0,
+ VTX_REUSE_DEPTH_shift = 0,
+ VGT_OUT_DEALLOC_CNTL = 0x00028c5c,
+ DEALLOC_DIST_mask = 0x7f << 0,
+ DEALLOC_DIST_shift = 0,
+ DB_RENDER_CONTROL = 0x00028d0c,
+ DEPTH_CLEAR_ENABLE_bit = 1 << 0,
+ STENCIL_CLEAR_ENABLE_bit = 1 << 1,
+ DEPTH_COPY_bit = 1 << 2,
+ STENCIL_COPY_bit = 1 << 3,
+ RESUMMARIZE_ENABLE_bit = 1 << 4,
+ STENCIL_COMPRESS_DISABLE_bit = 1 << 5,
+ DEPTH_COMPRESS_DISABLE_bit = 1 << 6,
+ COPY_CENTROID_bit = 1 << 7,
+ COPY_SAMPLE_mask = 0x07 << 8,
+ COPY_SAMPLE_shift = 8,
+ ZPASS_INCREMENT_DISABLE_bit = 1 << 11,
+ DB_RENDER_OVERRIDE = 0x00028d10,
+ FORCE_HIZ_ENABLE_mask = 0x03 << 0,
+ FORCE_HIZ_ENABLE_shift = 0,
+ FORCE_OFF = 0x00,
+ FORCE_ENABLE = 0x01,
+ FORCE_DISABLE = 0x02,
+ FORCE_RESERVED = 0x03,
+ FORCE_HIS_ENABLE0_mask = 0x03 << 2,
+ FORCE_HIS_ENABLE0_shift = 2,
+/* FORCE_OFF = 0x00, */
+/* FORCE_ENABLE = 0x01, */
+/* FORCE_DISABLE = 0x02, */
+/* FORCE_RESERVED = 0x03, */
+ FORCE_HIS_ENABLE1_mask = 0x03 << 4,
+ FORCE_HIS_ENABLE1_shift = 4,
+/* FORCE_OFF = 0x00, */
+/* FORCE_ENABLE = 0x01, */
+/* FORCE_DISABLE = 0x02, */
+/* FORCE_RESERVED = 0x03, */
+ FORCE_SHADER_Z_ORDER_bit = 1 << 6,
+ FAST_Z_DISABLE_bit = 1 << 7,
+ FAST_STENCIL_DISABLE_bit = 1 << 8,
+ NOOP_CULL_DISABLE_bit = 1 << 9,
+ FORCE_COLOR_KILL_bit = 1 << 10,
+ FORCE_Z_READ_bit = 1 << 11,
+ FORCE_STENCIL_READ_bit = 1 << 12,
+ FORCE_FULL_Z_RANGE_mask = 0x03 << 13,
+ FORCE_FULL_Z_RANGE_shift = 13,
+/* FORCE_OFF = 0x00, */
+/* FORCE_ENABLE = 0x01, */
+/* FORCE_DISABLE = 0x02, */
+/* FORCE_RESERVED = 0x03, */
+ FORCE_QC_SMASK_CONFLICT_bit = 1 << 15,
+ DISABLE_VIEWPORT_CLAMP_bit = 1 << 16,
+ IGNORE_SC_ZRANGE_bit = 1 << 17,
+ DB_HTILE_SURFACE = 0x00028d24,
+ HTILE_WIDTH_bit = 1 << 0,
+ HTILE_HEIGHT_bit = 1 << 1,
+ LINEAR_bit = 1 << 2,
+ FULL_CACHE_bit = 1 << 3,
+ HTILE_USES_PRELOAD_WIN_bit = 1 << 4,
+ PRELOAD_bit = 1 << 5,
+ PREFETCH_WIDTH_mask = 0x3f << 6,
+ PREFETCH_WIDTH_shift = 6,
+ PREFETCH_HEIGHT_mask = 0x3f << 12,
+ PREFETCH_HEIGHT_shift = 12,
+ DB_SRESULTS_COMPARE_STATE1 = 0x00028d2c,
+ COMPAREFUNC1_mask = 0x07 << 0,
+ COMPAREFUNC1_shift = 0,
+/* REF_NEVER = 0x00, */
+/* REF_LESS = 0x01, */
+/* REF_EQUAL = 0x02, */
+/* REF_LEQUAL = 0x03, */
+/* REF_GREATER = 0x04, */
+/* REF_NOTEQUAL = 0x05, */
+/* REF_GEQUAL = 0x06, */
+/* REF_ALWAYS = 0x07, */
+ COMPAREVALUE1_mask = 0xff << 4,
+ COMPAREVALUE1_shift = 4,
+ COMPAREMASK1_mask = 0xff << 12,
+ COMPAREMASK1_shift = 12,
+ ENABLE1_bit = 1 << 24,
+ DB_PRELOAD_CONTROL = 0x00028d30,
+ START_X_mask = 0xff << 0,
+ START_X_shift = 0,
+ START_Y_mask = 0xff << 8,
+ START_Y_shift = 8,
+ MAX_X_mask = 0xff << 16,
+ MAX_X_shift = 16,
+ MAX_Y_mask = 0xff << 24,
+ MAX_Y_shift = 24,
+ DB_PREFETCH_LIMIT = 0x00028d34,
+ DEPTH_HEIGHT_TILE_MAX_mask = 0x3ff << 0,
+ DEPTH_HEIGHT_TILE_MAX_shift = 0,
+ PA_SU_POLY_OFFSET_DB_FMT_CNTL = 0x00028df8,
+ POLY_OFFSET_NEG_NUM_DB_BITS_mask = 0xff << 0,
+ POLY_OFFSET_NEG_NUM_DB_BITS_shift = 0,
+ POLY_OFFSET_DB_IS_FLOAT_FMT_bit = 1 << 8,
+ PA_SU_POLY_OFFSET_CLAMP = 0x00028dfc,
+ PA_SU_POLY_OFFSET_FRONT_SCALE = 0x00028e00,
+ PA_SU_POLY_OFFSET_FRONT_OFFSET = 0x00028e04,
+ PA_SU_POLY_OFFSET_BACK_SCALE = 0x00028e08,
+ PA_SU_POLY_OFFSET_BACK_OFFSET = 0x00028e0c,
+ PA_CL_POINT_X_RAD = 0x00028e10,
+ PA_CL_POINT_Y_RAD = 0x00028e14,
+ PA_CL_POINT_SIZE = 0x00028e18,
+ PA_CL_POINT_CULL_RAD = 0x00028e1c,
+ PA_CL_UCP_0_X = 0x00028e20,
+ PA_CL_UCP_0_X_num = 6,
+ PA_CL_UCP_0_X_offset = 16,
+ PA_CL_UCP_0_Y = 0x00028e24,
+ PA_CL_UCP_0_Y_num = 6,
+ PA_CL_UCP_0_Y_offset = 16,
+ PA_CL_UCP_0_Z = 0x00028e28,
+ PA_CL_UCP_0_Z_num = 6,
+ PA_CL_UCP_0_Z_offset = 16,
+ SQ_ALU_CONSTANT0_0 = 0x00030000,
+ SQ_ALU_CONSTANT1_0 = 0x00030004,
+ SQ_ALU_CONSTANT2_0 = 0x00030008,
+ SQ_ALU_CONSTANT3_0 = 0x0003000c,
+ SQ_VTX_CONSTANT_WORD0_0 = 0x00038000,
+ SQ_TEX_RESOURCE_WORD0_0 = 0x00038000,
+ DIM_mask = 0x07 << 0,
+ DIM_shift = 0,
+ SQ_TEX_DIM_1D = 0x00,
+ SQ_TEX_DIM_2D = 0x01,
+ SQ_TEX_DIM_3D = 0x02,
+ SQ_TEX_DIM_CUBEMAP = 0x03,
+ SQ_TEX_DIM_1D_ARRAY = 0x04,
+ SQ_TEX_DIM_2D_ARRAY = 0x05,
+ SQ_TEX_DIM_2D_MSAA = 0x06,
+ SQ_TEX_DIM_2D_ARRAY_MSAA = 0x07,
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask = 0x0f << 3,
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift = 3,
+ TILE_TYPE_bit = 1 << 7,
+ PITCH_mask = 0x7ff << 8,
+ PITCH_shift = 8,
+ TEX_WIDTH_mask = 0x1fff << 19,
+ TEX_WIDTH_shift = 19,
+ SQ_VTX_CONSTANT_WORD1_0 = 0x00038004,
+ SQ_TEX_RESOURCE_WORD1_0 = 0x00038004,
+ TEX_HEIGHT_mask = 0x1fff << 0,
+ TEX_HEIGHT_shift = 0,
+ TEX_DEPTH_mask = 0x1fff << 13,
+ TEX_DEPTH_shift = 13,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask = 0x3f << 26,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift = 26,
+ SQ_VTX_CONSTANT_WORD2_0 = 0x00038008,
+ BASE_ADDRESS_HI_mask = 0xff << 0,
+ BASE_ADDRESS_HI_shift = 0,
+ SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask = 0x7ff << 8,
+ SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift = 8,
+ SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit = 1 << 19,
+ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20,
+ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift = 20,
+ SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask = 0x03 << 26,
+ SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift = 26,
+/* SQ_NUM_FORMAT_NORM = 0x00, */
+/* SQ_NUM_FORMAT_INT = 0x01, */
+/* SQ_NUM_FORMAT_SCALED = 0x02, */
+ SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit = 1 << 28,
+ SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit = 1 << 29,
+ SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_mask = 0x03 << 30,
+ SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift = 30,
+/* SQ_ENDIAN_NONE = 0x00, */
+/* SQ_ENDIAN_8IN16 = 0x01, */
+/* SQ_ENDIAN_8IN32 = 0x02, */
+ SQ_TEX_RESOURCE_WORD2_0 = 0x00038008,
+ SQ_VTX_CONSTANT_WORD3_0 = 0x0003800c,
+ MEM_REQUEST_SIZE_mask = 0x03 << 0,
+ MEM_REQUEST_SIZE_shift = 0,
+ SQ_TEX_RESOURCE_WORD3_0 = 0x0003800c,
+ SQ_TEX_RESOURCE_WORD4_0 = 0x00038010,
+ FORMAT_COMP_X_mask = 0x03 << 0,
+ FORMAT_COMP_X_shift = 0,
+ SQ_FORMAT_COMP_UNSIGNED = 0x00,
+ SQ_FORMAT_COMP_SIGNED = 0x01,
+ SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02,
+ FORMAT_COMP_Y_mask = 0x03 << 2,
+ FORMAT_COMP_Y_shift = 2,
+/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */
+/* SQ_FORMAT_COMP_SIGNED = 0x01, */
+/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */
+ FORMAT_COMP_Z_mask = 0x03 << 4,
+ FORMAT_COMP_Z_shift = 4,
+/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */
+/* SQ_FORMAT_COMP_SIGNED = 0x01, */
+/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */
+ FORMAT_COMP_W_mask = 0x03 << 6,
+ FORMAT_COMP_W_shift = 6,
+/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */
+/* SQ_FORMAT_COMP_SIGNED = 0x01, */
+/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */
+ SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask = 0x03 << 8,
+ SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift = 8,
+/* SQ_NUM_FORMAT_NORM = 0x00, */
+/* SQ_NUM_FORMAT_INT = 0x01, */
+/* SQ_NUM_FORMAT_SCALED = 0x02, */
+ SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit = 1 << 10,
+ SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit = 1 << 11,
+ SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask = 0x03 << 12,
+ SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift = 12,
+/* SQ_ENDIAN_NONE = 0x00, */
+/* SQ_ENDIAN_8IN16 = 0x01, */
+/* SQ_ENDIAN_8IN32 = 0x02, */
+ REQUEST_SIZE_mask = 0x03 << 14,
+ REQUEST_SIZE_shift = 14,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask = 0x07 << 16,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift = 16,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask = 0x07 << 19,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift = 19,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask = 0x07 << 22,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift = 22,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask = 0x07 << 25,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift = 25,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ BASE_LEVEL_mask = 0x0f << 28,
+ BASE_LEVEL_shift = 28,
+ SQ_TEX_RESOURCE_WORD5_0 = 0x00038014,
+ LAST_LEVEL_mask = 0x0f << 0,
+ LAST_LEVEL_shift = 0,
+ BASE_ARRAY_mask = 0x1fff << 4,
+ BASE_ARRAY_shift = 4,
+ LAST_ARRAY_mask = 0x1fff << 17,
+ LAST_ARRAY_shift = 17,
+ SQ_TEX_RESOURCE_WORD6_0 = 0x00038018,
+ MPEG_CLAMP_mask = 0x03 << 0,
+ MPEG_CLAMP_shift = 0,
+ SQ_TEX_MPEG_CLAMP_OFF = 0x00,
+ SQ_TEX_MPEG_9 = 0x01,
+ SQ_TEX_MPEG_10 = 0x02,
+ PERF_MODULATION_mask = 0x07 << 5,
+ PERF_MODULATION_shift = 5,
+ INTERLACED_bit = 1 << 8,
+ SQ_TEX_RESOURCE_WORD6_0__TYPE_mask = 0x03 << 30,
+ SQ_TEX_RESOURCE_WORD6_0__TYPE_shift = 30,
+ SQ_TEX_VTX_INVALID_TEXTURE = 0x00,
+ SQ_TEX_VTX_INVALID_BUFFER = 0x01,
+ SQ_TEX_VTX_VALID_TEXTURE = 0x02,
+ SQ_TEX_VTX_VALID_BUFFER = 0x03,
+ SQ_VTX_CONSTANT_WORD6_0 = 0x00038018,
+ SQ_VTX_CONSTANT_WORD6_0__TYPE_mask = 0x03 << 30,
+ SQ_VTX_CONSTANT_WORD6_0__TYPE_shift = 30,
+/* SQ_TEX_VTX_INVALID_TEXTURE = 0x00, */
+/* SQ_TEX_VTX_INVALID_BUFFER = 0x01, */
+/* SQ_TEX_VTX_VALID_TEXTURE = 0x02, */
+/* SQ_TEX_VTX_VALID_BUFFER = 0x03, */
+ SQ_TEX_SAMPLER_WORD0_0 = 0x0003c000,
+ SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask = 0x07 << 0,
+ SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift = 0,
+ SQ_TEX_WRAP = 0x00,
+ SQ_TEX_MIRROR = 0x01,
+ SQ_TEX_CLAMP_LAST_TEXEL = 0x02,
+ SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03,
+ SQ_TEX_CLAMP_HALF_BORDER = 0x04,
+ SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05,
+ SQ_TEX_CLAMP_BORDER = 0x06,
+ SQ_TEX_MIRROR_ONCE_BORDER = 0x07,
+ CLAMP_Y_mask = 0x07 << 3,
+ CLAMP_Y_shift = 3,
+/* SQ_TEX_WRAP = 0x00, */
+/* SQ_TEX_MIRROR = 0x01, */
+/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */
+/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */
+/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */
+/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */
+/* SQ_TEX_CLAMP_BORDER = 0x06, */
+/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */
+ CLAMP_Z_mask = 0x07 << 6,
+ CLAMP_Z_shift = 6,
+/* SQ_TEX_WRAP = 0x00, */
+/* SQ_TEX_MIRROR = 0x01, */
+/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */
+/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */
+/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */
+/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */
+/* SQ_TEX_CLAMP_BORDER = 0x06, */
+/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */
+ XY_MAG_FILTER_mask = 0x07 << 9,
+ XY_MAG_FILTER_shift = 9,
+ SQ_TEX_XY_FILTER_POINT = 0x00,
+ SQ_TEX_XY_FILTER_BILINEAR = 0x01,
+ SQ_TEX_XY_FILTER_BICUBIC = 0x02,
+ XY_MIN_FILTER_mask = 0x07 << 12,
+ XY_MIN_FILTER_shift = 12,
+/* SQ_TEX_XY_FILTER_POINT = 0x00, */
+/* SQ_TEX_XY_FILTER_BILINEAR = 0x01, */
+/* SQ_TEX_XY_FILTER_BICUBIC = 0x02, */
+ Z_FILTER_mask = 0x03 << 15,
+ Z_FILTER_shift = 15,
+ SQ_TEX_Z_FILTER_NONE = 0x00,
+ SQ_TEX_Z_FILTER_POINT = 0x01,
+ SQ_TEX_Z_FILTER_LINEAR = 0x02,
+ MIP_FILTER_mask = 0x03 << 17,
+ MIP_FILTER_shift = 17,
+/* SQ_TEX_Z_FILTER_NONE = 0x00, */
+/* SQ_TEX_Z_FILTER_POINT = 0x01, */
+/* SQ_TEX_Z_FILTER_LINEAR = 0x02, */
+ BORDER_COLOR_TYPE_mask = 0x03 << 22,
+ BORDER_COLOR_TYPE_shift = 22,
+ SQ_TEX_BORDER_COLOR_TRANS_BLACK = 0x00,
+ SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x01,
+ SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x02,
+ SQ_TEX_BORDER_COLOR_REGISTER = 0x03,
+ POINT_SAMPLING_CLAMP_bit = 1 << 24,
+ TEX_ARRAY_OVERRIDE_bit = 1 << 25,
+ DEPTH_COMPARE_FUNCTION_mask = 0x07 << 26,
+ DEPTH_COMPARE_FUNCTION_shift = 26,
+ SQ_TEX_DEPTH_COMPARE_NEVER = 0x00,
+ SQ_TEX_DEPTH_COMPARE_LESS = 0x01,
+ SQ_TEX_DEPTH_COMPARE_EQUAL = 0x02,
+ SQ_TEX_DEPTH_COMPARE_LESSEQUAL = 0x03,
+ SQ_TEX_DEPTH_COMPARE_GREATER = 0x04,
+ SQ_TEX_DEPTH_COMPARE_NOTEQUAL = 0x05,
+ SQ_TEX_DEPTH_COMPARE_GREATEREQUAL = 0x06,
+ SQ_TEX_DEPTH_COMPARE_ALWAYS = 0x07,
+ CHROMA_KEY_mask = 0x03 << 29,
+ CHROMA_KEY_shift = 29,
+ SQ_TEX_CHROMA_KEY_DISABLED = 0x00,
+ SQ_TEX_CHROMA_KEY_KILL = 0x01,
+ SQ_TEX_CHROMA_KEY_BLEND = 0x02,
+ LOD_USES_MINOR_AXIS_bit = 1 << 31,
+ SQ_TEX_SAMPLER_WORD1_0 = 0x0003c004,
+ MIN_LOD_mask = 0x3ff << 0,
+ MIN_LOD_shift = 0,
+ MAX_LOD_mask = 0x3ff << 10,
+ MAX_LOD_shift = 10,
+ SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_mask = 0xfff << 20,
+ SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift = 20,
+ SQ_TEX_SAMPLER_WORD2_0 = 0x0003c008,
+ LOD_BIAS_SEC_mask = 0xfff << 0,
+ LOD_BIAS_SEC_shift = 0,
+ MC_COORD_TRUNCATE_bit = 1 << 12,
+ SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit = 1 << 13,
+ HIGH_PRECISION_FILTER_bit = 1 << 14,
+ PERF_MIP_mask = 0x07 << 15,
+ PERF_MIP_shift = 15,
+ PERF_Z_mask = 0x03 << 18,
+ PERF_Z_shift = 18,
+ FETCH_4_bit = 1 << 26,
+ SAMPLE_IS_PCF_bit = 1 << 27,
+ SQ_TEX_SAMPLER_WORD2_0__TYPE_bit = 1 << 31,
+ SQ_VTX_BASE_VTX_LOC = 0x0003cff0,
+ SQ_VTX_START_INST_LOC = 0x0003cff4,
+ SQ_LOOP_CONST_DX10_0 = 0x0003e200,
+ SQ_LOOP_CONST_0 = 0x0003e200,
+ SQ_LOOP_CONST_0__COUNT_mask = 0xfff << 0,
+ SQ_LOOP_CONST_0__COUNT_shift = 0,
+ INIT_mask = 0xfff << 12,
+ INIT_shift = 12,
+ INC_mask = 0xff << 24,
+ INC_shift = 24,
+ SQ_BOOL_CONST_0 = 0x0003e380,
+ SQ_BOOL_CONST_0_num = 3,
+
+} ;
+
+#endif /* _AUTOREGS */
+
diff --git a/r600_reg_r6xx.h b/r600_reg_r6xx.h
new file mode 100644
index 0000000..f7702c4
--- /dev/null
+++ b/r600_reg_r6xx.h
@@ -0,0 +1,492 @@
+/*
+ * RadeonHD R6xx, R7xx Register documentation
+ *
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _R600_REG_R6xx_H_
+#define _R600_REG_R6xx_H_
+
+/*
+ * Registers for R6xx chips that are not documented yet
+ */
+
+enum {
+
+ MM_INDEX = 0x0000,
+ MM_DATA = 0x0004,
+
+ SRBM_STATUS = 0x0e50,
+ RLC_RQ_PENDING_bit = 1 << 3,
+ RCU_RQ_PENDING_bit = 1 << 4,
+ GRBM_RQ_PENDING_bit = 1 << 5,
+ HI_RQ_PENDING_bit = 1 << 6,
+ IO_EXTERN_SIGNAL_bit = 1 << 7,
+ VMC_BUSY_bit = 1 << 8,
+ MCB_BUSY_bit = 1 << 9,
+ MCDZ_BUSY_bit = 1 << 10,
+ MCDY_BUSY_bit = 1 << 11,
+ MCDX_BUSY_bit = 1 << 12,
+ MCDW_BUSY_bit = 1 << 13,
+ SEM_BUSY_bit = 1 << 14,
+ SRBM_STATUS__RLC_BUSY_bit = 1 << 15,
+ PDMA_BUSY_bit = 1 << 16,
+ IH_BUSY_bit = 1 << 17,
+ CSC_BUSY_bit = 1 << 20,
+ CMC7_BUSY_bit = 1 << 21,
+ CMC6_BUSY_bit = 1 << 22,
+ CMC5_BUSY_bit = 1 << 23,
+ CMC4_BUSY_bit = 1 << 24,
+ CMC3_BUSY_bit = 1 << 25,
+ CMC2_BUSY_bit = 1 << 26,
+ CMC1_BUSY_bit = 1 << 27,
+ CMC0_BUSY_bit = 1 << 28,
+ BIF_BUSY_bit = 1 << 29,
+ IDCT_BUSY_bit = 1 << 30,
+
+ SRBM_READ_ERROR = 0x0e98,
+ READ_ADDRESS_mask = 0xffff << 2,
+ READ_ADDRESS_shift = 2,
+ READ_REQUESTER_HI_bit = 1 << 24,
+ READ_REQUESTER_GRBM_bit = 1 << 25,
+ READ_REQUESTER_RCU_bit = 1 << 26,
+ READ_REQUESTER_RLC_bit = 1 << 27,
+ READ_ERROR_bit = 1 << 31,
+
+ SRBM_INT_STATUS = 0x0ea4,
+ RDERR_INT_STAT_bit = 1 << 0,
+ GFX_CNTX_SWITCH_INT_STAT_bit = 1 << 1,
+ SRBM_INT_ACK = 0x0ea8,
+ RDERR_INT_ACK_bit = 1 << 0,
+ GFX_CNTX_SWITCH_INT_ACK_bit = 1 << 1,
+
+ R6XX_MC_VM_FB_LOCATION = 0x2180,
+
+ VENDOR_DEVICE_ID = 0x4000,
+
+ D1GRPH_PRIMARY_SURFACE_ADDRESS = 0x6110,
+ D1GRPH_PITCH = 0x6120,
+ D1GRPH_Y_END = 0x6138,
+
+ GRBM_STATUS = 0x8010,
+ CMDFIFO_AVAIL_mask = 0x1f << 0,
+ CMDFIFO_AVAIL_shift = 0,
+ SRBM_RQ_PENDING_bit = 1 << 5,
+ CP_RQ_PENDING_bit = 1 << 6,
+ CF_RQ_PENDING_bit = 1 << 7,
+ PF_RQ_PENDING_bit = 1 << 8,
+ GRBM_EE_BUSY_bit = 1 << 10,
+ GRBM_STATUS__VC_BUSY_bit = 1 << 11,
+ DB03_CLEAN_bit = 1 << 12,
+ CB03_CLEAN_bit = 1 << 13,
+ VGT_BUSY_NO_DMA_bit = 1 << 16,
+ GRBM_STATUS__VGT_BUSY_bit = 1 << 17,
+ TA03_BUSY_bit = 1 << 18,
+ GRBM_STATUS__TC_BUSY_bit = 1 << 19,
+ SX_BUSY_bit = 1 << 20,
+ SH_BUSY_bit = 1 << 21,
+ SPI03_BUSY_bit = 1 << 22,
+ SMX_BUSY_bit = 1 << 23,
+ SC_BUSY_bit = 1 << 24,
+ PA_BUSY_bit = 1 << 25,
+ DB03_BUSY_bit = 1 << 26,
+ CR_BUSY_bit = 1 << 27,
+ CP_COHERENCY_BUSY_bit = 1 << 28,
+ GRBM_STATUS__CP_BUSY_bit = 1 << 29,
+ CB03_BUSY_bit = 1 << 30,
+ GUI_ACTIVE_bit = 1 << 31,
+ GRBM_STATUS2 = 0x8014,
+ CR_CLEAN_bit = 1 << 0,
+ SMX_CLEAN_bit = 1 << 1,
+ SPI0_BUSY_bit = 1 << 8,
+ SPI1_BUSY_bit = 1 << 9,
+ SPI2_BUSY_bit = 1 << 10,
+ SPI3_BUSY_bit = 1 << 11,
+ TA0_BUSY_bit = 1 << 12,
+ TA1_BUSY_bit = 1 << 13,
+ TA2_BUSY_bit = 1 << 14,
+ TA3_BUSY_bit = 1 << 15,
+ DB0_BUSY_bit = 1 << 16,
+ DB1_BUSY_bit = 1 << 17,
+ DB2_BUSY_bit = 1 << 18,
+ DB3_BUSY_bit = 1 << 19,
+ CB0_BUSY_bit = 1 << 20,
+ CB1_BUSY_bit = 1 << 21,
+ CB2_BUSY_bit = 1 << 22,
+ CB3_BUSY_bit = 1 << 23,
+ GRBM_SOFT_RESET = 0x8020,
+ SOFT_RESET_CP_bit = 1 << 0,
+ SOFT_RESET_CB_bit = 1 << 1,
+ SOFT_RESET_CR_bit = 1 << 2,
+ SOFT_RESET_DB_bit = 1 << 3,
+ SOFT_RESET_PA_bit = 1 << 5,
+ SOFT_RESET_SC_bit = 1 << 6,
+ SOFT_RESET_SMX_bit = 1 << 7,
+ SOFT_RESET_SPI_bit = 1 << 8,
+ SOFT_RESET_SH_bit = 1 << 9,
+ SOFT_RESET_SX_bit = 1 << 10,
+ SOFT_RESET_TC_bit = 1 << 11,
+ SOFT_RESET_TA_bit = 1 << 12,
+ SOFT_RESET_VC_bit = 1 << 13,
+ SOFT_RESET_VGT_bit = 1 << 14,
+ SOFT_RESET_GRBM_GCA_bit = 1 << 15,
+
+ WAIT_UNTIL = 0x8040,
+ WAIT_CP_DMA_IDLE_bit = 1 << 8,
+ WAIT_CMDFIFO_bit = 1 << 10,
+ WAIT_2D_IDLE_bit = 1 << 14,
+ WAIT_3D_IDLE_bit = 1 << 15,
+ WAIT_2D_IDLECLEAN_bit = 1 << 16,
+ WAIT_3D_IDLECLEAN_bit = 1 << 17,
+ WAIT_EXTERN_SIG_bit = 1 << 19,
+ CMDFIFO_ENTRIES_mask = 0x1f << 20,
+ CMDFIFO_ENTRIES_shift = 20,
+
+ GRBM_READ_ERROR = 0x8058,
+/* READ_ADDRESS_mask = 0xffff << 2, */
+/* READ_ADDRESS_shift = 2, */
+ READ_REQUESTER_SRBM_bit = 1 << 28,
+ READ_REQUESTER_CP_bit = 1 << 29,
+ READ_REQUESTER_WU_POLL_bit = 1 << 30,
+/* READ_ERROR_bit = 1 << 31, */
+
+ SCRATCH_REG0 = 0x8500,
+ SCRATCH_REG1 = 0x8504,
+ SCRATCH_REG2 = 0x8508,
+ SCRATCH_REG3 = 0x850c,
+ SCRATCH_REG4 = 0x8510,
+ SCRATCH_REG5 = 0x8514,
+ SCRATCH_REG6 = 0x8518,
+ SCRATCH_REG7 = 0x851c,
+ SCRATCH_UMSK = 0x8540,
+ SCRATCH_ADDR = 0x8544,
+
+ CP_COHER_CNTL = 0x85f0,
+ DEST_BASE_0_ENA_bit = 1 << 0,
+ DEST_BASE_1_ENA_bit = 1 << 1,
+ SO0_DEST_BASE_ENA_bit = 1 << 2,
+ SO1_DEST_BASE_ENA_bit = 1 << 3,
+ SO2_DEST_BASE_ENA_bit = 1 << 4,
+ SO3_DEST_BASE_ENA_bit = 1 << 5,
+ CB0_DEST_BASE_ENA_bit = 1 << 6,
+ CB1_DEST_BASE_ENA_bit = 1 << 7,
+ CB2_DEST_BASE_ENA_bit = 1 << 8,
+ CB3_DEST_BASE_ENA_bit = 1 << 9,
+ CB4_DEST_BASE_ENA_bit = 1 << 10,
+ CB5_DEST_BASE_ENA_bit = 1 << 11,
+ CB6_DEST_BASE_ENA_bit = 1 << 12,
+ CB7_DEST_BASE_ENA_bit = 1 << 13,
+ DB_DEST_BASE_ENA_bit = 1 << 14,
+ CR_DEST_BASE_ENA_bit = 1 << 15,
+ TC_ACTION_ENA_bit = 1 << 23,
+ VC_ACTION_ENA_bit = 1 << 24,
+ CB_ACTION_ENA_bit = 1 << 25,
+ DB_ACTION_ENA_bit = 1 << 26,
+ SH_ACTION_ENA_bit = 1 << 27,
+ SMX_ACTION_ENA_bit = 1 << 28,
+ CR0_ACTION_ENA_bit = 1 << 29,
+ CR1_ACTION_ENA_bit = 1 << 30,
+ CR2_ACTION_ENA_bit = 1 << 31,
+ CP_COHER_SIZE = 0x85f4,
+ CP_COHER_BASE = 0x85f8,
+ CP_COHER_STATUS = 0x85fc,
+ MATCHING_GFX_CNTX_mask = 0xff << 0,
+ MATCHING_GFX_CNTX_shift = 0,
+ MATCHING_CR_CNTX_mask = 0xffff << 8,
+ MATCHING_CR_CNTX_shift = 8,
+ STATUS_bit = 1 << 31,
+
+ CP_STALLED_STAT1 = 0x8674,
+ RBIU_TO_DMA_NOT_RDY_TO_RCV_bit = 1 << 0,
+ RBIU_TO_IBS_NOT_RDY_TO_RCV_bit = 1 << 1,
+ RBIU_TO_SEM_NOT_RDY_TO_RCV_bit = 1 << 2,
+ RBIU_TO_2DREGS_NOT_RDY_TO_RCV_bit = 1 << 3,
+ RBIU_TO_MEMWR_NOT_RDY_TO_RCV_bit = 1 << 4,
+ RBIU_TO_MEMRD_NOT_RDY_TO_RCV_bit = 1 << 5,
+ RBIU_TO_EOPD_NOT_RDY_TO_RCV_bit = 1 << 6,
+ RBIU_TO_RECT_NOT_RDY_TO_RCV_bit = 1 << 7,
+ RBIU_TO_STRMO_NOT_RDY_TO_RCV_bit = 1 << 8,
+ RBIU_TO_PSTAT_NOT_RDY_TO_RCV_bit = 1 << 9,
+ MIU_WAITING_ON_RDREQ_FREE_bit = 1 << 16,
+ MIU_WAITING_ON_WRREQ_FREE_bit = 1 << 17,
+ MIU_NEEDS_AVAIL_WRREQ_PHASE_bit = 1 << 18,
+ RCIU_WAITING_ON_GRBM_FREE_bit = 1 << 24,
+ RCIU_WAITING_ON_VGT_FREE_bit = 1 << 25,
+ RCIU_STALLED_ON_ME_READ_bit = 1 << 26,
+ RCIU_STALLED_ON_DMA_READ_bit = 1 << 27,
+ RCIU_HALTED_BY_REG_VIOLATION_bit = 1 << 28,
+ CP_STALLED_STAT2 = 0x8678,
+ PFP_TO_CSF_NOT_RDY_TO_RCV_bit = 1 << 0,
+ PFP_TO_MEQ_NOT_RDY_TO_RCV_bit = 1 << 1,
+ PFP_TO_VGT_NOT_RDY_TO_RCV_bit = 1 << 2,
+ PFP_HALTED_BY_INSTR_VIOLATION_bit = 1 << 3,
+ MULTIPASS_IB_PENDING_IN_PFP_bit = 1 << 4,
+ ME_BRUSH_WC_NOT_RDY_TO_RCV_bit = 1 << 8,
+ ME_STALLED_ON_BRUSH_LOGIC_bit = 1 << 9,
+ CR_CNTX_NOT_AVAIL_TO_ME_bit = 1 << 10,
+ GFX_CNTX_NOT_AVAIL_TO_ME_bit = 1 << 11,
+ ME_RCIU_NOT_RDY_TO_RCV_bit = 1 << 12,
+ ME_TO_CONST_NOT_RDY_TO_RCV_bit = 1 << 13,
+ ME_WAITING_DATA_FROM_PFP_bit = 1 << 14,
+ ME_WAITING_ON_PARTIAL_FLUSH_bit = 1 << 15,
+ RECT_FIFO_NEEDS_CR_RECT_DONE_bit = 1 << 16,
+ RECT_FIFO_NEEDS_WR_CONFIRM_bit = 1 << 17,
+ EOPD_FIFO_NEEDS_SC_EOP_DONE_bit = 1 << 18,
+ EOPD_FIFO_NEEDS_SMX_EOP_DONE_bit = 1 << 19,
+ EOPD_FIFO_NEEDS_WR_CONFIRM_bit = 1 << 20,
+ EOPD_FIFO_NEEDS_SIGNAL_SEM_bit = 1 << 21,
+ SO_NUMPRIM_FIFO_NEEDS_SOADDR_bit = 1 << 22,
+ SO_NUMPRIM_FIFO_NEEDS_NUMPRIM_bit = 1 << 23,
+ PIPE_STATS_FIFO_NEEDS_SAMPLE_bit = 1 << 24,
+ SURF_SYNC_NEEDS_IDLE_CNTXS_bit = 1 << 30,
+ SURF_SYNC_NEEDS_ALL_CLEAN_bit = 1 << 31,
+ CP_BUSY_STAT = 0x867c,
+ REG_BUS_FIFO_BUSY_bit = 1 << 0,
+ RING_FETCHING_DATA_bit = 1 << 1,
+ INDR1_FETCHING_DATA_bit = 1 << 2,
+ INDR2_FETCHING_DATA_bit = 1 << 3,
+ STATE_FETCHING_DATA_bit = 1 << 4,
+ PRED_FETCHING_DATA_bit = 1 << 5,
+ COHER_CNTR_NEQ_ZERO_bit = 1 << 6,
+ PFP_PARSING_PACKETS_bit = 1 << 7,
+ ME_PARSING_PACKETS_bit = 1 << 8,
+ RCIU_PFP_BUSY_bit = 1 << 9,
+ RCIU_ME_BUSY_bit = 1 << 10,
+ OUTSTANDING_READ_TAGS_bit = 1 << 11,
+ SEM_CMDFIFO_NOT_EMPTY_bit = 1 << 12,
+ SEM_FAILED_AND_HOLDING_bit = 1 << 13,
+ SEM_POLLING_FOR_PASS_bit = 1 << 14,
+ _3D_BUSY_bit = 1 << 15,
+ _2D_BUSY_bit = 1 << 16,
+ CP_STAT = 0x8680,
+ CSF_RING_BUSY_bit = 1 << 0,
+ CSF_WPTR_POLL_BUSY_bit = 1 << 1,
+ CSF_INDIRECT1_BUSY_bit = 1 << 2,
+ CSF_INDIRECT2_BUSY_bit = 1 << 3,
+ CSF_STATE_BUSY_bit = 1 << 4,
+ CSF_PREDICATE_BUSY_bit = 1 << 5,
+ CSF_BUSY_bit = 1 << 6,
+ MIU_RDREQ_BUSY_bit = 1 << 7,
+ MIU_WRREQ_BUSY_bit = 1 << 8,
+ ROQ_RING_BUSY_bit = 1 << 9,
+ ROQ_INDIRECT1_BUSY_bit = 1 << 10,
+ ROQ_INDIRECT2_BUSY_bit = 1 << 11,
+ ROQ_STATE_BUSY_bit = 1 << 12,
+ ROQ_PREDICATE_BUSY_bit = 1 << 13,
+ ROQ_ALIGN_BUSY_bit = 1 << 14,
+ PFP_BUSY_bit = 1 << 15,
+ MEQ_BUSY_bit = 1 << 16,
+ ME_BUSY_bit = 1 << 17,
+ QUERY_BUSY_bit = 1 << 18,
+ SEMAPHORE_BUSY_bit = 1 << 19,
+ INTERRUPT_BUSY_bit = 1 << 20,
+ SURFACE_SYNC_BUSY_bit = 1 << 21,
+ DMA_BUSY_bit = 1 << 22,
+ RCIU_BUSY_bit = 1 << 23,
+ CP_STAT__CP_BUSY_bit = 1 << 31,
+
+ CP_ME_CNTL = 0x86d8,
+ ME_STATMUX_mask = 0xff << 0,
+ ME_STATMUX_shift = 0,
+ ME_HALT_bit = 1 << 28,
+ CP_ME_STATUS = 0x86dc,
+
+ CP_RB_RPTR = 0x8700,
+ RB_RPTR_mask = 0xfffff << 0,
+ RB_RPTR_shift = 0,
+ CP_RB_WPTR_DELAY = 0x8704,
+ PRE_WRITE_TIMER_mask = 0xfffffff << 0,
+ PRE_WRITE_TIMER_shift = 0,
+ PRE_WRITE_LIMIT_mask = 0x0f << 28,
+ PRE_WRITE_LIMIT_shift = 28,
+
+ CP_ROQ_RB_STAT = 0x8780,
+ ROQ_RPTR_PRIMARY_mask = 0x3ff << 0,
+ ROQ_RPTR_PRIMARY_shift = 0,
+ ROQ_WPTR_PRIMARY_mask = 0x3ff << 16,
+ ROQ_WPTR_PRIMARY_shift = 16,
+ CP_ROQ_IB1_STAT = 0x8784,
+ ROQ_RPTR_INDIRECT1_mask = 0x3ff << 0,
+ ROQ_RPTR_INDIRECT1_shift = 0,
+ ROQ_WPTR_INDIRECT1_mask = 0x3ff << 16,
+ ROQ_WPTR_INDIRECT1_shift = 16,
+ CP_ROQ_IB2_STAT = 0x8788,
+ ROQ_RPTR_INDIRECT2_mask = 0x3ff << 0,
+ ROQ_RPTR_INDIRECT2_shift = 0,
+ ROQ_WPTR_INDIRECT2_mask = 0x3ff << 16,
+ ROQ_WPTR_INDIRECT2_shift = 16,
+
+ CP_MEQ_STAT = 0x8794,
+ MEQ_RPTR_mask = 0x3ff << 0,
+ MEQ_RPTR_shift = 0,
+ MEQ_WPTR_mask = 0x3ff << 16,
+ MEQ_WPTR_shift = 16,
+
+ CC_GC_SHADER_PIPE_CONFIG = 0x8950,
+ INACTIVE_QD_PIPES_mask = 0xff << 8,
+ INACTIVE_QD_PIPES_shift = 8,
+ R6XX_MAX_QD_PIPES = 8,
+ INACTIVE_SIMDS_mask = 0xff << 16,
+ INACTIVE_SIMDS_shift = 16,
+ R6XX_MAX_SIMDS = 8,
+ GC_USER_SHADER_PIPE_CONFIG = 0x8954,
+
+ VC_ENHANCE = 0x9714,
+ DB_DEBUG = 0x9830,
+ PREZ_MUST_WAIT_FOR_POSTZ_DONE = 1 << 31,
+
+ DB_WATERMARKS = 0x00009838,
+ DEPTH_FREE_mask = 0x1f << 0,
+ DEPTH_FREE_shift = 0,
+ DEPTH_FLUSH_mask = 0x3f << 5,
+ DEPTH_FLUSH_shift = 5,
+ FORCE_SUMMARIZE_mask = 0x0f << 11,
+ FORCE_SUMMARIZE_shift = 11,
+ DEPTH_PENDING_FREE_mask = 0x1f << 15,
+ DEPTH_PENDING_FREE_shift = 15,
+ DEPTH_CACHELINE_FREE_mask = 0x1f << 20,
+ DEPTH_CACHELINE_FREE_shift = 20,
+ EARLY_Z_PANIC_DISABLE_bit = 1 << 25,
+ LATE_Z_PANIC_DISABLE_bit = 1 << 26,
+ RE_Z_PANIC_DISABLE_bit = 1 << 27,
+ DB_EXTRA_DEBUG_mask = 0x0f << 28,
+ DB_EXTRA_DEBUG_shift = 28,
+
+ CP_RB_BASE = 0xc100,
+ CP_RB_CNTL = 0xc104,
+ RB_BUFSZ_mask = 0x3f << 0,
+ CP_RB_WPTR = 0xc114,
+ RB_WPTR_mask = 0xfffff << 0,
+ RB_WPTR_shift = 0,
+ CP_RB_RPTR_WR = 0xc108,
+ RB_RPTR_WR_mask = 0xfffff << 0,
+ RB_RPTR_WR_shift = 0,
+
+ CP_INT_STATUS = 0xc128,
+ DISABLE_CNTX_SWITCH_INT_STAT_bit = 1 << 0,
+ ENABLE_CNTX_SWITCH_INT_STAT_bit = 1 << 1,
+ SEM_SIGNAL_INT_STAT_bit = 1 << 18,
+ CNTX_BUSY_INT_STAT_bit = 1 << 19,
+ CNTX_EMPTY_INT_STAT_bit = 1 << 20,
+ WAITMEM_SEM_INT_STAT_bit = 1 << 21,
+ PRIV_INSTR_INT_STAT_bit = 1 << 22,
+ PRIV_REG_INT_STAT_bit = 1 << 23,
+ OPCODE_ERROR_INT_STAT_bit = 1 << 24,
+ SCRATCH_INT_STAT_bit = 1 << 25,
+ TIME_STAMP_INT_STAT_bit = 1 << 26,
+ RESERVED_BIT_ERROR_INT_STAT_bit = 1 << 27,
+ DMA_INT_STAT_bit = 1 << 28,
+ IB2_INT_STAT_bit = 1 << 29,
+ IB1_INT_STAT_bit = 1 << 30,
+ RB_INT_STAT_bit = 1 << 31,
+
+// SX_ALPHA_TEST_CONTROL = 0x00028410,
+ ALPHA_FUNC__REF_NEVER = 0,
+ ALPHA_FUNC__REF_ALWAYS = 7,
+// DB_SHADER_CONTROL = 0x0002880c,
+ Z_ORDER__EARLY_Z_THEN_LATE_Z = 2,
+// PA_SU_SC_MODE_CNTL = 0x00028814,
+// POLY_MODE_mask = 0x03 << 3,
+ POLY_MODE__TRIANGLES = 0, POLY_MODE__DUAL_MODE,
+// POLYMODE_FRONT_PTYPE_mask = 0x07 << 5,
+ POLYMODE_PTYPE__POINTS = 0, POLYMODE_PTYPE__LINES, POLYMODE_PTYPE__TRIANGLES,
+ PA_SC_AA_SAMPLE_LOCS_8S_WD1_M = 0x00028c20,
+ DB_SRESULTS_COMPARE_STATE0 = 0x00028d28, /* See autoregs: DB_SRESULTS_COMPARE_STATE1 */
+// DB_SRESULTS_COMPARE_STATE1 = 0x00028d2c,
+ DB_ALPHA_TO_MASK = 0x00028d44,
+ ALPHA_TO_MASK_ENABLE = 1 << 0,
+ ALPHA_TO_MASK_OFFSET0_mask = 0x03 << 8,
+ ALPHA_TO_MASK_OFFSET0_shift = 8,
+ ALPHA_TO_MASK_OFFSET1_mask = 0x03 << 8,
+ ALPHA_TO_MASK_OFFSET1_shift = 10,
+ ALPHA_TO_MASK_OFFSET2_mask = 0x03 << 8,
+ ALPHA_TO_MASK_OFFSET2_shift = 12,
+ ALPHA_TO_MASK_OFFSET3_mask = 0x03 << 8,
+ ALPHA_TO_MASK_OFFSET3_shift = 14,
+
+// SQ_VTX_CONSTANT_WORD2_0 = 0x00038008,
+// SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20,
+ FMT_INVALID=0, FMT_8, FMT_4_4, FMT_3_3_2,
+ FMT_16=5, FMT_16_FLOAT, FMT_8_8,
+ FMT_5_6_5, FMT_6_5_5, FMT_1_5_5_5, FMT_4_4_4_4,
+ FMT_5_5_5_1, FMT_32, FMT_32_FLOAT, FMT_16_16,
+ FMT_16_16_FLOAT=16, FMT_8_24, FMT_8_24_FLOAT, FMT_24_8,
+ FMT_24_8_FLOAT, FMT_10_11_11, FMT_10_11_11_FLOAT, FMT_11_11_10,
+ FMT_11_11_10_FLOAT, FMT_2_10_10_10, FMT_8_8_8_8, FMT_10_10_10_2,
+ FMT_X24_8_32_FLOAT, FMT_32_32, FMT_32_32_FLOAT, FMT_16_16_16_16,
+ FMT_16_16_16_16_FLOAT=32, FMT_32_32_32_32=34, FMT_32_32_32_32_FLOAT,
+ FMT_1 = 37, FMT_GB_GR=39,
+ FMT_BG_RG, FMT_32_AS_8, FMT_32_AS_8_8, FMT_5_9_9_9_SHAREDEXP,
+ FMT_8_8_8, FMT_16_16_16, FMT_16_16_16_FLOAT, FMT_32_32_32,
+ FMT_32_32_32_FLOAT=48,
+
+// High level register file lengths
+ SQ_ALU_CONSTANT = SQ_ALU_CONSTANT0_0, /* 256 PS, 256 VS */
+ SQ_ALU_CONSTANT_ps_num = 256,
+ SQ_ALU_CONSTANT_vs_num = 256,
+ SQ_ALU_CONSTANT_all_num = 512,
+ SQ_ALU_CONSTANT_offset = 16,
+ SQ_ALU_CONSTANT_ps = 0,
+ SQ_ALU_CONSTANT_vs = SQ_ALU_CONSTANT_ps + SQ_ALU_CONSTANT_ps_num,
+ SQ_TEX_RESOURCE = SQ_TEX_RESOURCE_WORD0_0, /* 160 PS, 160 VS, 16 FS, 160 GS */
+ SQ_TEX_RESOURCE_ps_num = 160,
+ SQ_TEX_RESOURCE_vs_num = 160,
+ SQ_TEX_RESOURCE_fs_num = 16,
+ SQ_TEX_RESOURCE_gs_num = 160,
+ SQ_TEX_RESOURCE_all_num = 496,
+ SQ_TEX_RESOURCE_offset = 28,
+ SQ_TEX_RESOURCE_ps = 0,
+ SQ_TEX_RESOURCE_vs = SQ_TEX_RESOURCE_ps + SQ_TEX_RESOURCE_ps_num,
+ SQ_TEX_RESOURCE_fs = SQ_TEX_RESOURCE_vs + SQ_TEX_RESOURCE_vs_num,
+ SQ_TEX_RESOURCE_gs = SQ_TEX_RESOURCE_fs + SQ_TEX_RESOURCE_fs_num,
+ SQ_VTX_RESOURCE = SQ_VTX_CONSTANT_WORD0_0, /* 160 PS, 160 VS, 16 FS, 160 GS */
+ SQ_VTX_RESOURCE_ps_num = 160,
+ SQ_VTX_RESOURCE_vs_num = 160,
+ SQ_VTX_RESOURCE_fs_num = 16,
+ SQ_VTX_RESOURCE_gs_num = 160,
+ SQ_VTX_RESOURCE_all_num = 496,
+ SQ_VTX_RESOURCE_offset = 28,
+ SQ_VTX_RESOURCE_ps = 0,
+ SQ_VTX_RESOURCE_vs = SQ_VTX_RESOURCE_ps + SQ_VTX_RESOURCE_ps_num,
+ SQ_VTX_RESOURCE_fs = SQ_VTX_RESOURCE_vs + SQ_VTX_RESOURCE_vs_num,
+ SQ_VTX_RESOURCE_gs = SQ_VTX_RESOURCE_fs + SQ_VTX_RESOURCE_fs_num,
+ SQ_TEX_SAMPLER_WORD = SQ_TEX_SAMPLER_WORD0_0, /* 18 per PS, VS, GS */
+ SQ_TEX_SAMPLER_WORD_ps_num = 18,
+ SQ_TEX_SAMPLER_WORD_vs_num = 18,
+ SQ_TEX_SAMPLER_WORD_gs_num = 18,
+ SQ_TEX_SAMPLER_WORD_all_num = 54,
+ SQ_TEX_SAMPLER_WORD_offset = 12,
+ SQ_TEX_SAMPLER_WORD_ps = 0,
+ SQ_TEX_SAMPLER_WORD_vs = SQ_TEX_SAMPLER_WORD_ps + SQ_TEX_SAMPLER_WORD_ps_num,
+ SQ_TEX_SAMPLER_WORD_gs = SQ_TEX_SAMPLER_WORD_vs + SQ_TEX_SAMPLER_WORD_vs_num,
+ SQ_LOOP_CONST = SQ_LOOP_CONST_0, /* 32 per PS, VS, GS */
+ SQ_LOOP_CONST_ps_num = 32,
+ SQ_LOOP_CONST_vs_num = 32,
+ SQ_LOOP_CONST_gs_num = 32,
+ SQ_LOOP_CONST_all_num = 96,
+ SQ_LOOP_CONST_offset = 4,
+ SQ_LOOP_CONST_ps = 0,
+ SQ_LOOP_CONST_vs = SQ_LOOP_CONST_ps + SQ_LOOP_CONST_ps_num,
+ SQ_LOOP_CONST_gs = SQ_LOOP_CONST_vs + SQ_LOOP_CONST_vs_num,
+} ;
+
+
+#endif
diff --git a/r600_reg_r7xx.h b/r600_reg_r7xx.h
new file mode 100644
index 0000000..e5c01c8
--- /dev/null
+++ b/r600_reg_r7xx.h
@@ -0,0 +1,149 @@
+/*
+ * RadeonHD R6xx, R7xx Register documentation
+ *
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _R600_REG_R7xx_H_
+#define _R600_REG_R7xx_H_
+
+/*
+ * Register update for R7xx chips
+ */
+
+enum {
+
+ R7XX_MC_VM_FB_LOCATION = 0x00002024,
+
+// GRBM_STATUS = 0x00008010,
+ R7XX_TA_BUSY_bit = 1 << 14,
+
+ R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ = 0x00008d8c,
+ RING0_OFFSET_mask = 0xff << 0,
+ RING0_OFFSET_shift = 0,
+ ISOLATE_ES_ENABLE_bit = 1 << 12,
+ ISOLATE_GS_ENABLE_bit = 1 << 13,
+ VS_PC_LIMIT_ENABLE_bit = 1 << 14,
+
+// SQ_ALU_WORD0 = 0x00008dfc,
+// SRC0_SEL_mask = 0x1ff << 0,
+// SRC1_SEL_mask = 0x1ff << 13,
+ R7xx_SQ_ALU_SRC_1_DBL_L = 0xf4,
+ R7xx_SQ_ALU_SRC_1_DBL_M = 0xf5,
+ R7xx_SQ_ALU_SRC_0_5_DBL_L = 0xf6,
+ R7xx_SQ_ALU_SRC_0_5_DBL_M = 0xf7,
+// INDEX_MODE_mask = 0x07 << 26,
+ R7xx_SQ_INDEX_GLOBAL = 0x05,
+ R7xx_SQ_INDEX_GLOBAL_AR_X = 0x06,
+ R6xx_SQ_ALU_WORD1_OP2 = 0x00008dfc,
+ R7xx_SQ_ALU_WORD1_OP2_V2 = 0x00008dfc,
+ R6xx_FOG_MERGE_bit = 1 << 5,
+ R6xx_OMOD_mask = 0x03 << 6,
+ R7xx_OMOD_mask = 0x03 << 5,
+ R6xx_OMOD_shift = 6,
+ R7xx_OMOD_shift = 5,
+ R6xx_SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x3ff << 8,
+ R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_mask = 0x7ff << 7,
+ R6xx_SQ_ALU_WORD1_OP2__ALU_INST_shift = 8,
+ R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_shift = 7,
+ R7xx_SQ_OP2_INST_FREXP_64 = 0x07,
+ R7xx_SQ_OP2_INST_ADD_64 = 0x17,
+ R7xx_SQ_OP2_INST_MUL_64 = 0x1b,
+ R7xx_SQ_OP2_INST_FLT64_TO_FLT32 = 0x1c,
+ R7xx_SQ_OP2_INST_FLT32_TO_FLT64 = 0x1d,
+ R7xx_SQ_OP2_INST_LDEXP_64 = 0x7a,
+ R7xx_SQ_OP2_INST_FRACT_64 = 0x7b,
+ R7xx_SQ_OP2_INST_PRED_SETGT_64 = 0x7c,
+ R7xx_SQ_OP2_INST_PRED_SETE_64 = 0x7d,
+ R7xx_SQ_OP2_INST_PRED_SETGE_64 = 0x7e,
+// SQ_ALU_WORD1_OP3 = 0x00008dfc,
+// SRC2_SEL_mask = 0x1ff << 0,
+// R7xx_SQ_ALU_SRC_1_DBL_L = 0xf4,
+// R7xx_SQ_ALU_SRC_1_DBL_M = 0xf5,
+// R7xx_SQ_ALU_SRC_0_5_DBL_L = 0xf6,
+// R7xx_SQ_ALU_SRC_0_5_DBL_M = 0xf7,
+// SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13,
+ R7xx_SQ_OP3_INST_MULADD_64 = 0x08,
+ R7xx_SQ_OP3_INST_MULADD_64_M2 = 0x09,
+ R7xx_SQ_OP3_INST_MULADD_64_M4 = 0x0a,
+ R7xx_SQ_OP3_INST_MULADD_64_D2 = 0x0b,
+// SQ_CF_ALU_WORD1 = 0x00008dfc,
+ R6xx_USES_WATERFALL_bit = 1 << 25,
+ R7xx_SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25,
+// SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc,
+// ARRAY_BASE_mask = 0x1fff << 0,
+// TYPE_mask = 0x03 << 13,
+// SQ_EXPORT_PARAM = 0x02,
+// X_UNUSED_FOR_SX_EXPORTS = 0x03,
+// ELEM_SIZE_mask = 0x03 << 30,
+// SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc,
+// SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0x7f << 23,
+ R7xx_SQ_CF_INST_MEM_EXPORT = 0x3a,
+// SQ_CF_WORD1 = 0x00008dfc,
+// SQ_CF_WORD1__COUNT_mask = 0x07 << 10,
+ R7xx_COUNT_3_bit = 1 << 19,
+// SQ_CF_WORD1__CF_INST_mask = 0x7f << 23,
+ R7xx_SQ_CF_INST_END_PROGRAM = 0x19,
+ R7xx_SQ_CF_INST_WAIT_ACK = 0x1a,
+ R7xx_SQ_CF_INST_TEX_ACK = 0x1b,
+ R7xx_SQ_CF_INST_VTX_ACK = 0x1c,
+ R7xx_SQ_CF_INST_VTX_TC_ACK = 0x1d,
+// SQ_VTX_WORD0 = 0x00008dfc,
+// VTX_INST_mask = 0x1f << 0,
+ R7xx_SQ_VTX_INST_MEM = 0x02,
+// SQ_VTX_WORD2 = 0x00008dfc,
+ R7xx_SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20,
+
+// SQ_TEX_WORD0 = 0x00008dfc,
+// TEX_INST_mask = 0x1f << 0,
+ R7xx_X_MEMORY_READ = 0x02,
+ R7xx_SQ_TEX_INST_KEEP_GRADIENTS = 0x0a,
+ R7xx_X_FETCH4_LOAD4_INSTRUCTION_FOR_DX10_1 = 0x0f,
+ R7xx_SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24,
+
+ R7xx_PA_SC_EDGERULE = 0x00028230,
+ R7xx_SPI_THREAD_GROUPING = 0x000286c8,
+ PS_GROUPING_mask = 0x1f << 0,
+ PS_GROUPING_shift = 0,
+ VS_GROUPING_mask = 0x1f << 8,
+ VS_GROUPING_shift = 8,
+ GS_GROUPING_mask = 0x1f << 16,
+ GS_GROUPING_shift = 16,
+ ES_GROUPING_mask = 0x1f << 24,
+ ES_GROUPING_shift = 24,
+ R7xx_CB_SHADER_CONTROL = 0x000287a0,
+ RT0_ENABLE_bit = 1 << 0,
+ RT1_ENABLE_bit = 1 << 1,
+ RT2_ENABLE_bit = 1 << 2,
+ RT3_ENABLE_bit = 1 << 3,
+ RT4_ENABLE_bit = 1 << 4,
+ RT5_ENABLE_bit = 1 << 5,
+ RT6_ENABLE_bit = 1 << 6,
+ RT7_ENABLE_bit = 1 << 7,
+// DB_ALPHA_TO_MASK = 0x00028d44,
+ R7xx_OFFSET_ROUND_bit = 1 << 16,
+// SQ_TEX_SAMPLER_MISC_0 = 0x0003d03c,
+ R7xx_TRUNCATE_COORD_bit = 1 << 9,
+ R7xx_DISABLE_CUBE_WRAP_bit = 1 << 10,
+
+} ;
+
+#endif /* _R600_REG_R7xx_H_ */
diff --git a/r600_shader.h b/r600_shader.h
new file mode 100644
index 0000000..03f6416
--- /dev/null
+++ b/r600_shader.h
@@ -0,0 +1,347 @@
+/*
+ * RadeonHD R6xx, R7xx DRI driver
+ *
+ * Copyright (C) 2008-2009 Alexander Deucher
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Shader macros
+ */
+
+#ifndef __SHADER_H__
+#define __SHADER_H__
+
+
+/* Restrictions of ALU instructions
+ * order of scalar ops is always x,y,z,w,t(rans), last to be indicated by last==1.
+ * max of 3 different src GPRs per instr.
+ * max of 4 different cfile constant components per instr.
+ * max of 2 (different) constants (any type) for t.
+ * bank swizzle (see below).
+ * GPR write stalls read of same register. Auto-replaced by PV/PS, NOP needed if registers are relative to
+ * different indices (gpr,loop,nothing).
+ * may use constant registers or constant cache, but not both.
+ */
+
+/* Bank_swizzle: (pp. 297ff)
+ * Only one of each x,y,z,w GPR component can be loaded per cycle (3 cycles per instr, called 0-2).
+ * per scalar instruction bank_swizzle can select which cycle each operand comes from. e.g.:
+ * SRC0 SRC1 SRC2 SWIZZLE cycle0 cycle1 cycle2
+ * 1.x 2.x 012 1.x 2.x -
+ * 3.x 1.y 201 1.y - 3.x
+ * 2.x 1.y 102 (1.y) (2.x) -
+ * If data is read in a cycle, multiple scalar instructions can reference it.
+ * Special case: square() - i.e. same component in src0+src1 doesn't need read port -> ignores swizzle for src1.
+ * No restrictions for constants or PV/PS.
+ * t can load multiple components in a single cycle slot, but has to share cycles with xyzw.
+ * t with single constant may not load GPRs or PV/PS in cycle 0 (carefull with ALU_TRANS_210).
+ * t with two constants may only load GPRs or PV/PS in cycle 2.
+ */
+
+
+/* Oder of instructions: All CF, All ALU, All Tex/Vtx fetches */
+
+
+// CF insts
+// addr
+#define ADDR(x) (x)
+// pc
+#define POP_COUNT(x) (x)
+// const
+#define CF_CONST(x) (x)
+// cond
+#define COND(x) (x) // SQ_COND_*
+// count
+#define COUNT(x) ((x) ? ((x) - 1) : 0)
+//r7xx
+#define COUNT_3(x) (x)
+// call count
+#define CALL_COUNT(x) (x)
+// eop
+#define END_OF_PROGRAM(x) (x)
+// vpm
+#define VALID_PIXEL_MODE(x) (x)
+// cf inst
+#define CF_INST(x) (x) // SQ_CF_INST_*
+
+// wqm
+#define WHOLE_QUAD_MODE(x) (x)
+// barrier
+#define BARRIER(x) (x)
+//kb0
+#define KCACHE_BANK0(x) (x)
+//kb1
+#define KCACHE_BANK1(x) (x)
+// km0/1
+#define KCACHE_MODE0(x) (x)
+#define KCACHE_MODE1(x) (x) // SQ_CF_KCACHE_*
+//
+#define KCACHE_ADDR0(x) (x)
+#define KCACHE_ADDR1(x) (x)
+// uw
+#define USES_WATERFALL(x) (x)
+
+#define ARRAY_BASE(x) (x)
+// export pixel
+#define CF_PIXEL_MRT0 0
+#define CF_PIXEL_MRT1 1
+#define CF_PIXEL_MRT2 2
+#define CF_PIXEL_MRT3 3
+#define CF_PIXEL_MRT4 4
+#define CF_PIXEL_MRT5 5
+#define CF_PIXEL_MRT6 6
+#define CF_PIXEL_MRT7 7
+// *_FOG: r6xx only
+#define CF_PIXEL_MRT0_FOG 16
+#define CF_PIXEL_MRT1_FOG 17
+#define CF_PIXEL_MRT2_FOG 18
+#define CF_PIXEL_MRT3_FOG 19
+#define CF_PIXEL_MRT4_FOG 20
+#define CF_PIXEL_MRT5_FOG 21
+#define CF_PIXEL_MRT6_FOG 22
+#define CF_PIXEL_MRT7_FOG 23
+#define CF_PIXEL_Z 61
+// export pos
+#define CF_POS0 60
+#define CF_POS1 61
+#define CF_POS2 62
+#define CF_POS3 63
+// export param
+// 0...31
+#define TYPE(x) (x) // SQ_EXPORT_*
+#if 0
+// type export
+#define SQ_EXPORT_PIXEL 0
+#define SQ_EXPORT_POS 1
+#define SQ_EXPORT_PARAM 2
+// reserved 3
+// type mem
+#define SQ_EXPORT_WRITE 0
+#define SQ_EXPORT_WRITE_IND 1
+#define SQ_EXPORT_WRITE_ACK 2
+#define SQ_EXPORT_WRITE_IND_ACK 3
+#endif
+
+#define RW_GPR(x) (x)
+#define RW_REL(x) (x)
+#define ABSOLUTE 0
+#define RELATIVE 1
+#define INDEX_GPR(x) (x)
+#define ELEM_SIZE(x) ((x) ? ((x) - 1) : 0)
+#define COMP_MASK(x) (x)
+#define R6xx_ELEM_LOOP(x) (x)
+#define BURST_COUNT(x) ((x) ? ((x) - 1) : 0)
+
+// swiz
+#define SRC_SEL_X(x) (x) // SQ_SEL_* each
+#define SRC_SEL_Y(x) (x)
+#define SRC_SEL_Z(x) (x)
+#define SRC_SEL_W(x) (x)
+
+#define CF_DWORD0(addr) (addr)
+// R7xx has another entry (COUNT3), but that is only used for adding a bit to count.
+// We allow one more bit for count in the argument of the macro on R7xx instead.
+// R6xx: [0,7] R7xx: [1,16]
+#define CF_DWORD1(pc, cf_const, cond, count, call_count, eop, vpm, cf_inst, wqm, b) \
+ (((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | (((count) & 7) << 10) | (((count) >> 3) << 19) | \
+ ((call_count) << 13) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | \
+ ((wqm) << 30) | ((b) << 31))
+
+#define CF_ALU_DWORD0(addr, kb0, kb1, km0) (((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30))
+#define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, uw, cf_inst, wqm, b) \
+ (((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \
+ ((count) << 18) | ((uw) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31))
+
+#define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \
+ (((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | ((index_gpr) << 23) | \
+ ((es) << 30))
+// R7xx apparently doesn't have the ELEM_LOOP entry any more
+// We still expose it, but ELEM_LOOP is explicitely R6xx now.
+// TODO: is this just forgotten in the docs, or really not available any more?
+#define CF_ALLOC_IMP_EXP_DWORD1_BUF(array_size, comp_mask, el, bc, eop, vpm, cf_inst, wqm, b) \
+ (((array_size) << 0) | ((comp_mask) << 12) | ((el) << 16) | ((bc) << 17) | \
+ ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31))
+#define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, el, bc, eop, vpm, cf_inst, wqm, b) \
+ (((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | ((el) << 16) | \
+ ((bc) << 17) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | \
+ ((wqm) << 30) | ((b) << 31))
+
+// ALU clause insts
+#define SRC0_SEL(x) (x)
+#define SRC1_SEL(x) (x)
+#define SRC2_SEL(x) (x)
+// src[0-2]_sel
+// 0-127 GPR
+// 128-159 kcache constants bank 0
+// 160-191 kcache constants bank 1
+// 248-255 special SQ_ALU_SRC_* (0, 1, etc.)
+
+#define SRC0_REL(x) (x)
+#define SRC1_REL(x) (x)
+#define SRC2_REL(x) (x)
+// elem
+#define SRC0_ELEM(x) (x)
+#define SRC1_ELEM(x) (x)
+#define SRC2_ELEM(x) (x)
+#define ELEM_X 0
+#define ELEM_Y 1
+#define ELEM_Z 2
+#define ELEM_W 3
+// neg
+#define SRC0_NEG(x) (x)
+#define SRC1_NEG(x) (x)
+#define SRC2_NEG(x) (x)
+// im
+#define INDEX_MODE(x) (x) // SQ_INDEX_*
+// ps
+#define PRED_SEL(x) (x) // SQ_PRED_SEL_*
+// last
+#define LAST(x) (x)
+// abs
+#define SRC0_ABS(x) (x)
+#define SRC1_ABS(x) (x)
+// uem
+#define UPDATE_EXECUTE_MASK(x) (x)
+// up
+#define UPDATE_PRED(x) (x)
+// wm
+#define WRITE_MASK(x) (x)
+// fm
+#define FOG_MERGE(x) (x)
+// omod
+#define OMOD(x) (x) // SQ_ALU_OMOD_*
+// alu inst
+#define ALU_INST(x) (x) // SQ_ALU_INST_*
+//bs
+#define BANK_SWIZZLE(x) (x) // SQ_ALU_VEC_*
+#define DST_GPR(x) (x)
+#define DST_REL(x) (x)
+#define DST_ELEM(x) (x)
+#define CLAMP(x) (x)
+
+#define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \
+ (((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \
+ ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \
+ ((im) << 26) | ((ps) << 29) | ((last) << 31))
+// R7xx has alu_inst at a different slot, and no fog merge any more (no fix function fog any more)
+#define R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
+ (((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
+ ((fm) << 5) | ((omod) << 6) | ((alu_inst) << 8) | ((bs) << 18) | ((dst_gpr) << 21) | \
+ ((dr) << 28) | ((de) << 29) | ((clamp) << 31))
+#define R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
+ (((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
+ ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \
+ ((dr) << 28) | ((de) << 29) | ((clamp) << 31))
+// This is a general chipset macro, but due to selection by chipid typically not usable in static arrays
+// Fog is NOT USED on R7xx, even if specified.
+#define ALU_DWORD1_OP2(chipid, s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
+ ((chipid) <= CHIPSET_RV670 ? \
+ R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) : \
+ R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp))
+#define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \
+ (((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \
+ ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \
+ ((de) << 29) | ((clamp) << 31))
+
+// VTX clause insts
+// vxt insts
+#define VTX_INST(x) (x) // SQ_VTX_INST_*
+
+// fetch type
+#define FETCH_TYPE(x) (x) // SQ_VTX_FETCH_*
+
+#define FETCH_WHOLE_QUAD(x) (x)
+#define BUFFER_ID(x) (x)
+#define SRC_GPR(x) (x)
+#define SRC_REL(x) (x)
+#define MEGA_FETCH_COUNT(x) ((x) ? ((x) - 1) : 0)
+
+#define SEMANTIC_ID(x) (x)
+#define DST_SEL_X(x) (x)
+#define DST_SEL_Y(x) (x)
+#define DST_SEL_Z(x) (x)
+#define DST_SEL_W(x) (x)
+#define USE_CONST_FIELDS(x) (x)
+#define DATA_FORMAT(x) (x)
+// num format
+#define NUM_FORMAT_ALL(x) (x) // SQ_NUM_FORMAT_*
+// format comp
+#define FORMAT_COMP_ALL(x) (x) // SQ_FORMAT_COMP_*
+// sma
+#define SRF_MODE_ALL(x) (x)
+#define SRF_MODE_ZERO_CLAMP_MINUS_ONE 0
+#define SRF_MODE_NO_ZERO 1
+#define OFFSET(x) (x)
+// endian swap
+#define ENDIAN_SWAP(x) (x) // SQ_ENDIAN_*
+#define CONST_BUF_NO_STRIDE(x) (x)
+// mf
+#define MEGA_FETCH(x) (x)
+
+#define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, mfc) \
+ (((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \
+ ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26))
+#define VTX_DWORD1_SEM(semantic_id, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \
+ (((semantic_id) << 0) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
+ ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31))
+#define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \
+ (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
+ ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31))
+#define VTX_DWORD2(offset, es, cbns, mf) \
+ (((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19))
+#define VTX_DWORD_PAD 0x00000000
+
+// TEX clause insts
+// tex insts
+#define TEX_INST(x) (x) // SQ_TEX_INST_*
+
+#define BC_FRAC_MODE(x) (x)
+#define FETCH_WHOLE_QUAD(x) (x)
+#define RESOURCE_ID(x) (x)
+#define R7xx_ALT_CONST(x) (x)
+
+#define LOD_BIAS(x) (x)
+//ct
+#define COORD_TYPE_X(x) (x)
+#define COORD_TYPE_Y(x) (x)
+#define COORD_TYPE_Z(x) (x)
+#define COORD_TYPE_W(x) (x)
+#define TEX_UNNORMALIZED 0
+#define TEX_NORMALIZED 1
+#define OFFSET_X(x) (x)
+#define OFFSET_Y(x) (x)
+#define OFFSET_Z(x) (x)
+#define SAMPLER_ID(x) (x)
+
+// R7xx has an additional parameter ALT_CONST. We always expose it, but ALT_CONST is R7xx only
+#define TEX_DWORD0(tex_inst, bfm, fwq, resource_id, src_gpr, sr, ac) \
+ (((tex_inst) << 0) | ((bfm) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \
+ ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24))
+#define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \
+ (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
+ ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31))
+#define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \
+ (((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \
+ ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29))
+#define TEX_DWORD_PAD 0x00000000
+
+
+#endif
diff --git a/r600_state.h b/r600_state.h
new file mode 100644
index 0000000..8acf4c8
--- /dev/null
+++ b/r600_state.h
@@ -0,0 +1,194 @@
+/*
+ * RadeonHD R6xx, R7xx DRI driver
+ *
+ * Copyright (C) 2008-2009 Alexander Deucher
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Subsystem state definitions
+ */
+
+#ifndef __R600_STATE_H__
+#define __R600_STATE_H__
+
+typedef int bool_t;
+
+
+/* Sequencer / thread handling */
+typedef struct {
+ int ps_prio;
+ int vs_prio;
+ int gs_prio;
+ int es_prio;
+ int num_ps_gprs;
+ int num_vs_gprs;
+ int num_gs_gprs;
+ int num_es_gprs;
+ int num_temp_gprs;
+ int num_ps_threads;
+ int num_vs_threads;
+ int num_gs_threads;
+ int num_es_threads;
+ int num_ps_stack_entries;
+ int num_vs_stack_entries;
+ int num_gs_stack_entries;
+ int num_es_stack_entries;
+ int num_qd_pipes; /* ? does this fit in here */
+} sq_config_t;
+
+/* Color buffer / render target */
+typedef struct {
+ int id;
+ int w;
+ int h;
+ uint64_t base;
+ int format;
+ int endian;
+ int array_mode; // tiling
+ int number_type;
+ int read_size;
+ int comp_swap;
+ int tile_mode;
+ int blend_clamp;
+ int clear_color;
+ int blend_bypass;
+ int blend_float32;
+ int simple_float;
+ int round_mode;
+ int tile_compact;
+ int source_format;
+} cb_config_t;
+
+/* Depth buffer */
+typedef struct {
+ int w;
+ int h;
+ uint64_t base;
+ int format;
+ int read_size;
+ int array_mode; // tiling
+ int tile_surface_en;
+ int tile_compact;
+ int zrange_precision;
+} db_config_t;
+
+/* Shader */
+typedef struct {
+ uint64_t shader_addr;
+ int num_gprs;
+ int stack_size;
+ int dx10_clamp;
+ int fetch_cache_lines;
+ int clamp_consts;
+ int export_mode;
+ int uncached_first_inst;
+} shader_config_t;
+
+/* Vertex buffer / vtx resource */
+typedef struct {
+ int id;
+ uint64_t vb_addr;
+ uint32_t vtx_num_entries;
+ uint32_t vtx_size_dw;
+ int clamp_x;
+ int format;
+ int num_format_all;
+ int format_comp_all;
+ int srf_mode_all;
+ int endian;
+ int mem_req_size;
+} vtx_resource_t;
+
+/* Texture resource */
+typedef struct {
+ int id;
+ int w;
+ int h;
+ int pitch;
+ int depth;
+ int dim;
+ int tile_mode;
+ int tile_type;
+ int format;
+ uint64_t base;
+ uint64_t mip_base;
+ int format_comp_x;
+ int format_comp_y;
+ int format_comp_z;
+ int format_comp_w;
+ int num_format_all;
+ int srf_mode_all;
+ int force_degamma;
+ int endian;
+ int request_size;
+ int dst_sel_x;
+ int dst_sel_y;
+ int dst_sel_z;
+ int dst_sel_w;
+ int base_level;
+ int last_level;
+ int base_array;
+ int last_array;
+ int mpeg_clamp;
+ int perf_modulation;
+ int interlaced;
+} tex_resource_t;
+
+/* Texture sampler */
+typedef struct {
+ int id;
+ /* Clamping */
+ int clamp_x, clamp_y, clamp_z;
+ int border_color;
+ /* Filtering */
+ int xy_mag_filter, xy_min_filter;
+ int z_filter;
+ int mip_filter;
+ bool_t high_precision_filter; /* ? */
+ int perf_mip; /* ? 0-7 */
+ int perf_z; /* ? 3 */
+ /* LoD selection */
+ int min_lod, max_lod; /* 0-0x3ff */
+ int lod_bias; /* 0-0xfff (signed?) */
+ int lod_bias2; /* ? 0-0xfff (signed?) */
+ bool_t lod_uses_minor_axis; /* ? */
+ /* Other stuff */
+ bool_t point_sampling_clamp; /* ? */
+ bool_t tex_array_override; /* ? */
+ bool_t mc_coord_truncate; /* ? */
+ bool_t force_degamma; /* ? */
+ bool_t fetch_4; /* ? */
+ bool_t sample_is_pcf; /* ? */
+ bool_t type; /* ? */
+ int depth_compare; /* only depth textures? */
+ int chroma_key;
+} tex_sampler_t;
+
+/* Draw command */
+typedef struct {
+ uint32_t prim_type;
+ uint32_t vgt_draw_initiator;
+ uint32_t index_type;
+ uint32_t num_instances;
+ uint32_t num_indices;
+} draw_config_t;
+
+#endif
diff --git a/r600_texture.c b/r600_texture.c
new file mode 100644
index 0000000..7c5bb3f
--- /dev/null
+++ b/r600_texture.c
@@ -0,0 +1,407 @@
+/*
+ * r600_demo
+ *
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Texture test, derived from pm4 tests
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "r600_reg.h"
+#include "r600_emit.h"
+#include "r600_lib.h"
+#include "r600_state.h"
+#include "r600_init.h"
+#include "r600_shader.h"
+
+
+uint32_t *create_sample_texture (int width, int height, int pitch)
+{
+ int y, x;
+ uint32_t *tex = malloc (pitch * height * sizeof(uint32_t));
+ uint32_t *t = tex;
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++)
+ t[x] = ((0x00000001 * (0x100 * x / width)) |
+ (0x00000100 * (0x100 * y / height)) |
+ (0x00010000 * (((0x200 * x / width) + (0x200 * y / height)) % 0xff))|
+ (0xff000000));
+ t += pitch;
+ }
+ return tex;
+}
+
+
+/*
+ * Simple textured triangle test, scaling, float coords, explicit texture coords
+ */
+
+void quad_test_tex_scaled(adapter_t *adapt)
+{
+ static uint32_t vs[] = {
+ // CF INST 0
+ CF_DWORD0(ADDR(4)),
+ CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ COUNT(2),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_VTX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ // CF INST 1
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ // CF INST 2
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0)),
+ // padding vtx/tex inst are 128 bit aligned
+ 0x00000000,
+ 0x00000000,
+ // VTX INST 0
+ VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(16)),
+ VTX_DWORD1_GPR(DST_GPR(1), DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_UNSIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)),
+ VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1)),
+ VTX_DWORD_PAD,
+ // VTX INST 1
+ VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8)),
+ VTX_DWORD1_GPR(DST_GPR(0), DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_UNSIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)),
+ VTX_DWORD2(OFFSET(8),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0)),
+ VTX_DWORD_PAD,
+ } ;
+
+ static uint32_t ps[] = {
+ // CF INST 0
+ CF_DWORD0(ADDR(2)),
+ CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ COUNT(1),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TEX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ // CF INST 1
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ // TEX INST 0
+ TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ BC_FRAC_MODE(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ R7xx_ALT_CONST(0)),
+ TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED)),
+ TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1)),
+ } ;
+
+ typedef struct {
+ float x, y;
+ float s, t;
+ } vertex_tex2d_t;
+ static vertex_tex2d_t vb[] = {
+ { 2, 2, 0, 0 },
+ { 258, 2, 1, 0 },
+ { 258, 258, 1, 1 },
+ { 2, 258, 0, 1 },
+ };
+
+ static uint32_t indices[] = { 0, 1, 2, 3 };
+
+
+ draw_config_t draw_conf;
+ cb_config_t cb_conf;
+ vtx_resource_t vtx_res;
+ tex_resource_t tex_res;
+ tex_sampler_t tex_samp;
+ shader_config_t vs_conf, ps_conf;
+
+ uint64_t vb_addr, vs_addr, ps_addr, tex_addr;
+ uint32_t *tex;
+
+ CLEAR (draw_conf);
+ CLEAR (cb_conf);
+ CLEAR (vtx_res);
+ CLEAR (tex_res);
+ CLEAR (tex_samp);
+ CLEAR (vs_conf);
+ CLEAR (ps_conf);
+
+
+ printf ("\n* Quad Test tex scaled\n\n");
+
+ if (verbose) {
+ dump_shader (adapt, vs, sizeof(vs), "vertex");
+ dump_shader (adapt, ps, sizeof(ps), "pixel");
+ printf ("\n");
+ }
+
+
+ /* Init */
+ start_3d(adapt);
+ set_default_state(adapt);
+
+
+ /* Scissor / viewport */
+ ereg (PA_CL_VTE_CNTL, VTX_XY_FMT_bit);
+ /* Not necessary due to PA_CL_VTE_CNTL */
+// pack0 (PA_CL_VPORT_XSCALE_0, 4);
+// efloat (1.0);
+// efloat (0.0);
+// efloat (1.0);
+// efloat (0.0);
+ ereg (PA_CL_CLIP_CNTL, CLIP_DISABLE_bit);
+
+
+ /* Upload */
+ vs_addr = upload (adapt, vs, sizeof(vs), 0);
+ ps_addr = upload (adapt, ps, sizeof(ps), 4096);
+ vb_addr = upload (adapt, vb, sizeof(vb), 8192);
+ tex = create_sample_texture (TEX_WIDTH, TEX_HEIGHT, TEX_PITCH);
+ tex_addr= upload (adapt, tex, TEX_PITCH * TEX_HEIGHT * sizeof(uint32_t), 12288);
+ free (tex);
+
+
+ /* Shader */
+ vs_conf.shader_addr = vs_addr;
+ vs_conf.num_gprs = 4;
+ vs_conf.stack_size = 1;
+ vs_setup (adapt, &vs_conf);
+
+ ps_conf.shader_addr = ps_addr;
+ ps_conf.num_gprs = 1;
+ ps_conf.stack_size = 0;
+ ps_conf.uncached_first_inst = 1;
+ ps_conf.clamp_consts = 1;
+ ps_conf.export_mode = 2;
+ ps_setup (adapt, &ps_conf);
+
+
+ /* Texture */
+ tex_res.id = 0;
+ tex_res.w = TEX_WIDTH;
+ tex_res.h = TEX_HEIGHT;
+ tex_res.pitch = TEX_PITCH;
+ tex_res.depth = 0; /* ? */
+ tex_res.dim = 1; //2D
+ tex_res.base = tex_addr;
+ tex_res.mip_base = tex_addr;
+ tex_res.format = FMT_8_8_8_8;
+ tex_res.request_size = 0; /* 2 ? */
+ tex_res.dst_sel_x = 0;
+ tex_res.dst_sel_y = 1;
+ tex_res.dst_sel_z = 2;
+ tex_res.dst_sel_w = 3;
+ tex_res.base_level = 0;
+ tex_res.last_level = 0; /* 1 test */
+ tex_res.perf_modulation = 0; /* 1 ? */
+ set_tex_resource (adapt, &tex_res);
+
+ tex_samp.id = 0;
+ tex_samp.clamp_x = 0;
+ tex_samp.clamp_y = 0;
+ tex_samp.clamp_z = 0;
+ tex_samp.xy_mag_filter = 0; /* 0: point 1:bilinear 2:bicubic */
+ tex_samp.xy_min_filter = 0; /* 0: point 1:bilinear 2:bicubic */
+ tex_samp.z_filter = 0; /* 0: none 1: point 2: linear */
+ tex_samp.mip_filter = 0; /* no mipmap */
+ set_tex_sampler (adapt, &tex_samp);
+
+
+ /* Render setup */
+ ereg (CB_SHADER_MASK, (0x0f << OUTPUT0_ENABLE_shift));
+ ereg (R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit));
+ ereg (CB_COLOR_CONTROL, (0xcc << ROP3_shift)); /* copy */
+
+ cb_conf.id = 0;
+ cb_conf.w = adapt->color_pitch;
+ cb_conf.h = adapt->color_height;
+ cb_conf.base = adapt->color_gpu;
+ cb_conf.format = FMT_8_8_8_8;
+ cb_conf.comp_swap = 1;
+ cb_conf.source_format = 1;
+ cb_conf.blend_clamp = 1;
+ set_render_target(adapt, &cb_conf);
+
+ ereg (PA_SU_SC_MODE_CNTL, (FACE_bit |
+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) |
+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));
+ ereg (DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */
+ DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
+
+
+ /* Interpolator setup */
+ /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
+ * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
+ ereg (SPI_PS_IN_CONTROL_0, ((1 << NUM_INTERP_shift)));
+ ereg (SPI_PS_IN_CONTROL_1, 0);
+ ereg (SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) |
+ (0x03 << DEFAULT_VAL_shift) |
+ FLAT_SHADE_bit |
+ SEL_CENTROID_bit));
+ ereg (SPI_INTERP_CONTROL_0, /* FLAT_SHADE_ENA_bit | */ 0);
+
+
+ /* Vertex buffer setup */
+ vtx_res.id = SQ_VTX_RESOURCE_vs;
+ vtx_res.vtx_size_dw = sizeof(vertex_tex2d_t) / 4;
+ vtx_res.vtx_num_entries = sizeof(vb) / 4; /* Can overcommit if necessary */
+ vtx_res.mem_req_size = 1;
+ vtx_res.vb_addr = vb_addr;
+ set_vtx_resource (adapt, &vtx_res);
+
+
+ /* Draw */
+ draw_conf.prim_type = DI_PT_QUADLIST;
+ draw_conf.vgt_draw_initiator = DI_SRC_SEL_IMMEDIATE;
+ draw_conf.num_instances = 1;
+ draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
+ draw_conf.index_type = DI_INDEX_SIZE_16_BIT;
+
+ ereg (VGT_INSTANCE_STEP_RATE_0, 0); /* ? */
+ ereg (VGT_INSTANCE_STEP_RATE_1, 0);
+
+ ereg (VGT_MAX_VTX_INDX, draw_conf.num_indices);
+ ereg (VGT_MIN_VTX_INDX, 0);
+ ereg (VGT_INDX_OFFSET, 0);
+
+ draw_immd(adapt, &draw_conf, indices);
+
+ wait_3d_idle_clean();
+
+}
+
+/*
+ * Temporary test
+ */
+
+void tmp_test(adapter_t *adapt)
+{
+}
diff --git a/r600_triangles.c b/r600_triangles.c
new file mode 100644
index 0000000..fc6d0dd
--- /dev/null
+++ b/r600_triangles.c
@@ -0,0 +1,1348 @@
+/*
+ * RadeonHD R6xx, R7xx DRI driver
+ *
+ * Copyright (C) 2008-2009 Matthias Hopf
+ * Copyright (C) 2008-2009 Alexander Deucher
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Single triangle tests, derived from pm4 tests
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "r600_reg.h"
+#include "r600_emit.h"
+#include "r600_lib.h"
+#include "r600_state.h"
+#include "r600_init.h"
+#include "r600_shader.h"
+
+
+/*
+ * Simple triangle test, masively edited
+ */
+
+void tri_test_2d(adapter_t *adapt)
+{
+ draw_config_t draw_conf;
+ cb_config_t cb_conf;
+ vtx_resource_t vtx_res;
+ tex_resource_t tex_res;
+ tex_sampler_t tex_samp;
+ shader_config_t vs_conf, ps_conf;
+
+ uint64_t vb_addr, vs_addr, ps_addr;
+ int vs_len, vb_len;
+
+ static int32_t vb_orig[][3] = {
+ { 128, 0, 0xFFFF0000 },
+ { 0, 196, 0xFF00FF00 },
+ { 256, 320, 0xFF0000FF },
+ { 192, -32, 0xFF00FFFF },
+ { 128, 128, 0xFFFF00FF },
+ { 320, 192, 0xFFFFFF00 },
+ } ;
+ char vb[256]; /* char because of aliasing */
+
+ uint32_t vs[40];
+ static uint32_t ps[] = {
+ // CF INST 0
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ } ;
+
+
+ int i = 0;
+ vs[i++] = CF_DWORD0(ADDR(6)); /* CF INST 0 */
+ vs[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ COUNT(2),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_VTX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ if ((vertex_alu_scale_override == 0 &&
+ (vertex_load_as_int > 0 && vertex_format > 0)) ||
+ vertex_alu_scale_override == 1) {
+ vs[i++] = CF_ALU_DWORD0(ADDR(4), /* CF INST 1a */
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(0));
+ vs[i++] = CF_ALU_DWORD1(KCACHE_MODE1(0),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ COUNT(2),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ } else {
+ vs[i++] = CF_ALU_DWORD0(0, 0, 0, 0); /* CF INST 1b */
+ vs[i++] = CF_ALU_DWORD1(0, 0, 0, 0, 0, CF_INST(SQ_CF_INST_NOP), 0, BARRIER(0));
+ }
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), /* CF INST 2 */
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), /* CF INST 3 */
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(2),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ // INT_TO_FLT is trans unit only, so both use individual slots
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1), /* ALU inst 0 */
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INT_TO_FLT),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1), /* ALU inst 1 */
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INT_TO_FLT),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), /* VTX inst 0 */
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(12));
+ vs[i++] = VTX_DWORD1_GPR(DST_GPR(1), DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(vertex_format == 0 ? FMT_32_32_FLOAT :
+ vertex_format == 1 ? FMT_32_32_FLOAT :
+ vertex_format == 2 ? FMT_16_16_FLOAT :
+ vertex_format == 3 ? FMT_32_32 :
+ vertex_format == 4 ? FMT_16_16 : -1),
+ NUM_FORMAT_ALL(vertex_load_as_int == 0 ? SQ_NUM_FORMAT_SCALED :
+ vertex_load_as_int == 1 ? SQ_NUM_FORMAT_INT :
+ vertex_load_as_int == 2 ? SQ_NUM_FORMAT_NORM : -1),
+ FORMAT_COMP_ALL(vertex_unsigned ? SQ_FORMAT_COMP_UNSIGNED :
+ SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ vs[i++] = VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1));
+ vs[i++] = VTX_DWORD_PAD;
+ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), /* VTX inst 1 */
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(4));
+ vs[i++] = VTX_DWORD1_GPR(DST_GPR(2), DST_REL(0),
+ DST_SEL_X(SQ_SEL_Z),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_X),
+ DST_SEL_W(SQ_SEL_W),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_8_8_8_8),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_UNSIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ vs[i++] = VTX_DWORD2(OFFSET(8),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0));
+ vs[i++] = VTX_DWORD_PAD;
+ vs_len = i;
+
+ vb_len = sizeof(vb_orig) / 12;
+ for (i = 0; i < vb_len; i++) {
+ int32_t *vbl = (int32_t *) &vb[i*12];
+ if (vertex_format == 0) {
+ float *vbf = (float *) vbl;
+ *vbf++ = vb_orig[i][0];
+ *vbf++ = vb_orig[i][1];
+ } else if (vertex_format == 1 || vertex_format == 3) {
+ *vbl++ = vb_orig[i][0];
+ *vbl++ = vb_orig[i][1];
+ } else {
+ int16_t *vbs = (int16_t *) vbl;
+ *vbs++ = vb_orig[i][0];
+ *vbs++ = vb_orig[i][1];
+ }
+ vbl = (int32_t *) &vb[i*12 + 8];
+ *vbl++ = vb_orig[i][2];
+ }
+
+ CLEAR (draw_conf);
+ CLEAR (cb_conf);
+ CLEAR (vtx_res);
+ CLEAR (tex_res);
+ CLEAR (tex_samp);
+ CLEAR (vs_conf);
+ CLEAR (ps_conf);
+
+
+ printf ("\n* Tri Test 2d - Vertex format %s %s %s\n\n",
+ vertex_format == 0 ? "float" :
+ vertex_format == 1 ? "int32 (FMT_32_32_FLOAT)" :
+ vertex_format == 2 ? "int16 (FMT_16_16_FLOAT)" :
+ vertex_format == 3 ? "int32 (FMT_32_32)" :
+ vertex_format == 4 ? "int16 (FMT_16_16)" : NULL,
+ vertex_load_as_int == 0 ? "_SCALED" : vertex_load_as_int == 1 ? "_INT" : "_NORM",
+ vertex_unsigned ? "unsigned" : "signed"
+ );
+
+ if (verbose) {
+ dump_shader (adapt, vs, vs_len * 4, "vertex");
+ dump_shader (adapt, ps, sizeof(ps), "pixel");
+ printf ("\n");
+ }
+
+
+ /* Init */
+ start_3d(adapt);
+ set_default_state(adapt);
+
+
+ /* Scissor / viewport */
+ ereg (PA_CL_VTE_CNTL, VTX_XY_FMT_bit);
+ /* Not necessary due to PA_CL_VTE_CNTL */
+// pack0 (PA_CL_VPORT_XSCALE_0, 4);
+// efloat (1.0);
+// efloat (0.0);
+// efloat (1.0);
+// efloat (0.0);
+ ereg (PA_CL_CLIP_CNTL, CLIP_DISABLE_bit);
+
+
+ /* Upload */
+ vs_addr = upload (adapt, vs, vs_len * 4, 0);
+ ps_addr = upload (adapt, ps, sizeof(ps), 4096);
+ vb_addr = upload (adapt, vb, vb_len * 12, 8192);
+
+
+ /* Shader */
+ vs_conf.shader_addr = vs_addr;
+ vs_conf.num_gprs = 4;
+ vs_conf.stack_size = 1;
+ vs_setup (adapt, &vs_conf);
+
+ ps_conf.shader_addr = ps_addr;
+ ps_conf.num_gprs = 1;
+ ps_conf.stack_size = 0;
+ ps_conf.uncached_first_inst = 1;
+ ps_conf.clamp_consts = 1;
+ ps_conf.export_mode = 2;
+ ps_setup (adapt, &ps_conf);
+
+
+ /* Render setup */
+ ereg (CB_SHADER_MASK, (0x0f << OUTPUT0_ENABLE_shift));
+ ereg (R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit));
+ ereg (CB_COLOR_CONTROL, (0xcc << ROP3_shift)); /* copy */
+
+ cb_conf.id = 0;
+ cb_conf.w = adapt->color_pitch;
+ cb_conf.h = adapt->color_height;
+ cb_conf.base = adapt->color_gpu;
+ cb_conf.format = FMT_8_8_8_8;
+ cb_conf.comp_swap = 1;
+ cb_conf.source_format = 1;
+ cb_conf.blend_clamp = 1;
+ set_render_target(adapt, &cb_conf);
+
+ ereg (PA_SU_SC_MODE_CNTL, (FACE_bit |
+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) |
+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));
+ ereg (DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */
+ DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
+
+
+ /* Interpolator setup */
+ /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
+ * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
+ ereg (SPI_PS_IN_CONTROL_0, (((2 - 1) << NUM_INTERP_shift)));
+ ereg (SPI_PS_IN_CONTROL_1, 0);
+ ereg (SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) |
+ (0x03 << DEFAULT_VAL_shift) |
+ FLAT_SHADE_bit |
+ SEL_CENTROID_bit));
+ ereg (SPI_INTERP_CONTROL_0, /* FLAT_SHADE_ENA_bit | */ 0);
+
+
+ /* Vertex buffer setup */
+ vtx_res.id = SQ_VTX_RESOURCE_vs;
+ vtx_res.vtx_size_dw = 12 / 4;
+ vtx_res.vtx_num_entries = vb_len * 12 / 4; /* Can overcommit if necessary */
+ vtx_res.mem_req_size = 1;
+ vtx_res.vb_addr = vb_addr;
+ /* these are only used if USE_CONST_FIELDS is set in the shader vtx fetch instruction */
+ vtx_res.format = (vertex_format == 0 ? FMT_32_32_FLOAT :
+ vertex_format == 1 ? FMT_32_32_FLOAT :
+ vertex_format == 2 ? FMT_16_16_FLOAT :
+ vertex_format == 3 ? FMT_32_32 :
+ vertex_format == 4 ? FMT_16_16 : -1);
+ vtx_res.num_format_all = (vertex_load_as_int == 0 ? SQ_NUM_FORMAT_SCALED :
+ vertex_load_as_int == 1 ? SQ_NUM_FORMAT_INT :
+ vertex_load_as_int == 2 ? SQ_NUM_FORMAT_NORM : -1);
+ vtx_res.format_comp_all = (vertex_unsigned ? SQ_FORMAT_COMP_UNSIGNED :
+ SQ_FORMAT_COMP_SIGNED);
+
+ set_vtx_resource (adapt, &vtx_res);
+
+
+ /* Draw */
+ draw_conf.prim_type = DI_PT_TRILIST;
+ draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
+ draw_conf.num_instances = 1;
+ draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
+ draw_conf.index_type = DI_INDEX_SIZE_16_BIT;
+
+ ereg (VGT_INSTANCE_STEP_RATE_0, 0); /* ? */
+ ereg (VGT_INSTANCE_STEP_RATE_1, 0);
+
+ ereg (VGT_MAX_VTX_INDX, draw_conf.num_indices);
+ ereg (VGT_MIN_VTX_INDX, 0);
+ ereg (VGT_INDX_OFFSET, 0);
+
+ draw_auto(adapt, &draw_conf);
+
+ wait_3d_idle_clean();
+
+}
+
+
+void tri_test_3d(adapter_t *adapt)
+{
+ // 4 DWs per const
+ static float vs_alu_consts_12[] = {
+ 2.0, 0.0, 0.0, -1.0,
+ 0.0, -2.0, 0.0, 1.0,
+ 0.0, 0.0, -1.0, -0.0,
+ 0.0, 0.0, 0.0, 1.0,
+ 0.9999999403953552, 0.0, 0.0, -0.0,
+ 0.0, 0.9999998807907104, 0.0, -0.0,
+ 0.0, 0.0, 0.9999999403953552, -0.0,
+ 0.0, 0.0, 0.0, 1.0,
+ };
+
+ uint32_t vs[100];
+
+ static uint32_t trivial_ps[] = {
+ // CF INST 0
+ CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1)),
+ CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1)),
+ } ;
+
+ typedef struct {
+ float x, y, z;
+ uint32_t argb;
+ } vertex_3d_t;
+ static vertex_3d_t vb[] = {
+ { .5, 0, 0, 0xFFFF0000 },
+ { 0, .75, 0, 0xFF00FF00 },
+ { 1, 1.25, 0, 0xFF0000FF },
+ { .75, -.125, 0, 0xFF00FFFF },
+ { .5, .5, 0, 0xFFFF00FF },
+ { 1.25, .75, 0, 0xFFFFFF00 },
+ };
+
+ static uint32_t indices[] = { 0, 1, 2, 3, 4, 5 };
+ int vs_len;
+
+ int i = 0;
+ // CF INST 0
+ vs[i++] = CF_DWORD0(ADDR(28));
+ vs[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ COUNT(2),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_VTX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ // CF INST 1
+ vs[i++] = CF_ALU_DWORD0(ADDR(4),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(0));
+ vs[i++] = CF_ALU_DWORD1(KCACHE_MODE1(0),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ COUNT(24),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ // CF INST 2
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ // CF INST 3
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(2),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ // ALU clause INST 0
+ // ALU 0: R1 dot C16 -> R127.X
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(272), //cfile
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(127),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ // 1
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(272), //cfile
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ // 2
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(272), //cfile
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ // 3
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(272),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+ // ALU 4: R1 dot C17 -> R127.Y
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(273),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ // 5
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(273),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(127),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ // 6
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(273),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ // 7
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(273),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(1));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+ // ALU 8: R1 dot C18 -> R127.Z
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(274),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ // 9
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(274),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ // 10
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(274),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(127),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ // 11
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(274),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(1));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+ // ALU 12: R1 dot C19 -> R127.W
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(275),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ // 13
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(275),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ // 14
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(275),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ // 15
+ vs[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(275),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(1));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(127),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+ // ALU 16: R127 dot C12 -> R1.X
+ vs[i++] = ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(268),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ // 17
+ vs[i++] = ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(268),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ // 18
+ vs[i++] = ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(268),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0)
+ ),
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ // 19
+ vs[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_PV),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(268),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(1));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+ // ALU 20: R127 dot C13 -> R1.Y
+ vs[i++] = ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(269),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ // 21
+ vs[i++] = ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(269),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ // 22
+ vs[i++] = ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(269),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(0));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ // 23
+ vs[i++] = ALU_DWORD0(SRC0_SEL(127),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(269),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_PRED_SEL_OFF),
+ LAST(1));
+ vs[i++] = ALU_DWORD1_OP2(adapt->chipset,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+ // VTX clause INST 0
+ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(16));
+ vs[i++] = VTX_DWORD1_GPR(DST_GPR(1), DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ vs[i++] = VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1));
+ vs[i++] = VTX_DWORD_PAD,
+ // VTX clause INST 1
+ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(4));
+ vs[i++] = VTX_DWORD1_GPR(DST_GPR(2), DST_REL(0),
+ DST_SEL_X(SQ_SEL_Z),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_X),
+ DST_SEL_W(SQ_SEL_W),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_8_8_8_8),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_UNSIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ vs[i++] = VTX_DWORD2(OFFSET(12),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0));
+ vs[i++] = VTX_DWORD_PAD;
+ vs_len = i;
+
+ draw_config_t draw_conf;
+ cb_config_t cb_conf;
+ vtx_resource_t vtx_res;
+ tex_resource_t tex_res;
+ tex_sampler_t tex_samp;
+ shader_config_t vs_conf, ps_conf;
+
+ uint64_t vb_addr, vs_addr, ps_addr;
+
+ CLEAR (draw_conf);
+ CLEAR (cb_conf);
+ CLEAR (vtx_res);
+ CLEAR (tex_res);
+ CLEAR (tex_samp);
+ CLEAR (vs_conf);
+ CLEAR (ps_conf);
+
+
+ printf ("\n* Tri Test 3d\n\n");
+
+ if (verbose) {
+ dump_shader (adapt, vs, vs_len * 4, "vertex");
+ dump_shader (adapt, trivial_ps, sizeof(trivial_ps), "pixel");
+ printf ("\n");
+ }
+
+
+ /* Init */
+ start_3d(adapt);
+ set_default_state(adapt);
+
+
+ /* Scissor / viewport */
+ set_generic_scissor(0, 0, 256, 256);
+ set_window_scissor(0, 0, 256, 256);
+ ereg (PA_CL_VTE_CNTL, (VPORT_X_SCALE_ENA_bit |
+ VPORT_X_OFFSET_ENA_bit |
+ VPORT_Y_SCALE_ENA_bit |
+ VPORT_Y_OFFSET_ENA_bit |
+ VPORT_Z_SCALE_ENA_bit |
+ VPORT_Z_OFFSET_ENA_bit |
+ //VTX_XY_FMT_bit |
+ //VTX_Z_FMT_bit |
+ VTX_W0_FMT_bit |
+ 0));
+ set_viewport(adapt, 256, 256, 0);
+ ereg (PA_CL_CLIP_CNTL, 0x00000000);
+
+
+ /* Upload */
+ vs_addr = upload (adapt, vs, vs_len * 4, 0);
+ ps_addr = upload (adapt, trivial_ps, sizeof(trivial_ps), 4096);
+ vb_addr = upload (adapt, vb, sizeof(vb), 8192);
+
+
+ /* Shader */
+ vs_conf.shader_addr = vs_addr;
+ vs_conf.num_gprs = 4;
+ vs_conf.stack_size = 1;
+ vs_setup (adapt, &vs_conf);
+
+ ps_conf.shader_addr = ps_addr;
+ ps_conf.num_gprs = 1;
+ ps_conf.stack_size = 0;
+ ps_conf.uncached_first_inst = 1;
+ ps_conf.clamp_consts = 1;
+ ps_conf.export_mode = 2;
+ ps_setup (adapt, &ps_conf);
+
+
+ /* Constants */
+ set_alu_consts(SQ_ALU_CONSTANT_vs + 12,
+ sizeof(vs_alu_consts_12) / SQ_ALU_CONSTANT_offset, vs_alu_consts_12);
+
+
+ /* Render setup */
+ ereg (CB_SHADER_MASK, (0x0f << OUTPUT0_ENABLE_shift));
+ ereg (0x287A0, 0x00000001);
+ ereg (CB_COLOR_CONTROL, (0xcc << ROP3_shift)); /* copy */
+
+ cb_conf.id = 0;
+ cb_conf.w = adapt->color_pitch;
+ cb_conf.h = adapt->color_height;
+ cb_conf.base = adapt->color_gpu;
+ cb_conf.format = FMT_8_8_8_8;
+ cb_conf.comp_swap = 1;
+ cb_conf.source_format = 1;
+ cb_conf.blend_clamp = 1;
+ set_render_target(adapt, &cb_conf);
+
+ ereg (PA_SU_SC_MODE_CNTL, (FACE_bit |
+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) |
+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));
+ ereg (DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */
+ DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
+
+
+ /* Interpolator setup */
+ ereg (SPI_PS_IN_CONTROL_0, (((2 - 1) << NUM_INTERP_shift) |
+ (1 << BARYC_SAMPLE_CNTL_shift) |
+ PERSP_GRADIENT_ENA_bit));
+ ereg (SPI_PS_IN_CONTROL_1, 0);
+ ereg (SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) |
+ (0x03 << DEFAULT_VAL_shift) |
+ SEL_CENTROID_bit));
+ ereg (SPI_INTERP_CONTROL_0, ((2 << PNT_SPRITE_OVRD_X_shift) |
+ (3 << PNT_SPRITE_OVRD_Y_shift) |
+ (0 << PNT_SPRITE_OVRD_Z_shift) |
+ (1 << PNT_SPRITE_OVRD_W_shift)));
+
+
+ /* Vertex buffer setup */
+ vtx_res.id = SQ_VTX_RESOURCE_vs;
+ vtx_res.vtx_size_dw = sizeof(vertex_3d_t) / 4;
+ vtx_res.vtx_num_entries = sizeof(vb) / 4; /* Can overcommit if necessary */
+ vtx_res.mem_req_size = 1;
+ vtx_res.vb_addr = vb_addr;
+ set_vtx_resource (adapt, &vtx_res);
+
+
+ /* Draw */
+ draw_conf.prim_type = DI_PT_TRILIST;
+ draw_conf.vgt_draw_initiator = DI_SRC_SEL_IMMEDIATE;
+ draw_conf.num_instances = 1;
+ draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
+ draw_conf.index_type = DI_INDEX_SIZE_16_BIT;
+
+ ereg (VGT_INSTANCE_STEP_RATE_0, 0); /* ? */
+ ereg (VGT_INSTANCE_STEP_RATE_1, 0);
+
+ ereg (VGT_MAX_VTX_INDX, draw_conf.num_indices);
+ ereg (VGT_MIN_VTX_INDX, 0);
+ ereg (VGT_INDX_OFFSET, 0);
+
+ draw_immd(adapt, &draw_conf, indices);
+
+ wait_3d_idle_clean();
+
+}
diff --git a/radeon_drm.h b/radeon_drm.h
new file mode 100644
index 0000000..c0d566c
--- /dev/null
+++ b/radeon_drm.h
@@ -0,0 +1,755 @@
+/* radeon_drm.h -- Public header for the radeon driver -*- linux-c -*-
+ *
+ * Copyright 2000 Precision Insight, Inc., Cedar Park, Texas.
+ * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
+ * Copyright 2002 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Kevin E. Martin <martin@valinux.com>
+ * Gareth Hughes <gareth@valinux.com>
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __RADEON_DRM_H__
+#define __RADEON_DRM_H__
+
+/* WARNING: If you change any of these defines, make sure to change the
+ * defines in the X server file (radeon_sarea.h)
+ */
+#ifndef __RADEON_SAREA_DEFINES__
+#define __RADEON_SAREA_DEFINES__
+
+/* Old style state flags, required for sarea interface (1.1 and 1.2
+ * clears) and 1.2 drm_vertex2 ioctl.
+ */
+#define RADEON_UPLOAD_CONTEXT 0x00000001
+#define RADEON_UPLOAD_VERTFMT 0x00000002
+#define RADEON_UPLOAD_LINE 0x00000004
+#define RADEON_UPLOAD_BUMPMAP 0x00000008
+#define RADEON_UPLOAD_MASKS 0x00000010
+#define RADEON_UPLOAD_VIEWPORT 0x00000020
+#define RADEON_UPLOAD_SETUP 0x00000040
+#define RADEON_UPLOAD_TCL 0x00000080
+#define RADEON_UPLOAD_MISC 0x00000100
+#define RADEON_UPLOAD_TEX0 0x00000200
+#define RADEON_UPLOAD_TEX1 0x00000400
+#define RADEON_UPLOAD_TEX2 0x00000800
+#define RADEON_UPLOAD_TEX0IMAGES 0x00001000
+#define RADEON_UPLOAD_TEX1IMAGES 0x00002000
+#define RADEON_UPLOAD_TEX2IMAGES 0x00004000
+#define RADEON_UPLOAD_CLIPRECTS 0x00008000 /* handled client-side */
+#define RADEON_REQUIRE_QUIESCENCE 0x00010000
+#define RADEON_UPLOAD_ZBIAS 0x00020000 /* version 1.2 and newer */
+#define RADEON_UPLOAD_ALL 0x003effff
+#define RADEON_UPLOAD_CONTEXT_ALL 0x003e01ff
+
+/* New style per-packet identifiers for use in cmd_buffer ioctl with
+ * the RADEON_EMIT_PACKET command. Comments relate new packets to old
+ * state bits and the packet size:
+ */
+#define RADEON_EMIT_PP_MISC 0 /* context/7 */
+#define RADEON_EMIT_PP_CNTL 1 /* context/3 */
+#define RADEON_EMIT_RB3D_COLORPITCH 2 /* context/1 */
+#define RADEON_EMIT_RE_LINE_PATTERN 3 /* line/2 */
+#define RADEON_EMIT_SE_LINE_WIDTH 4 /* line/1 */
+#define RADEON_EMIT_PP_LUM_MATRIX 5 /* bumpmap/1 */
+#define RADEON_EMIT_PP_ROT_MATRIX_0 6 /* bumpmap/2 */
+#define RADEON_EMIT_RB3D_STENCILREFMASK 7 /* masks/3 */
+#define RADEON_EMIT_SE_VPORT_XSCALE 8 /* viewport/6 */
+#define RADEON_EMIT_SE_CNTL 9 /* setup/2 */
+#define RADEON_EMIT_SE_CNTL_STATUS 10 /* setup/1 */
+#define RADEON_EMIT_RE_MISC 11 /* misc/1 */
+#define RADEON_EMIT_PP_TXFILTER_0 12 /* tex0/6 */
+#define RADEON_EMIT_PP_BORDER_COLOR_0 13 /* tex0/1 */
+#define RADEON_EMIT_PP_TXFILTER_1 14 /* tex1/6 */
+#define RADEON_EMIT_PP_BORDER_COLOR_1 15 /* tex1/1 */
+#define RADEON_EMIT_PP_TXFILTER_2 16 /* tex2/6 */
+#define RADEON_EMIT_PP_BORDER_COLOR_2 17 /* tex2/1 */
+#define RADEON_EMIT_SE_ZBIAS_FACTOR 18 /* zbias/2 */
+#define RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT 19 /* tcl/11 */
+#define RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED 20 /* material/17 */
+#define R200_EMIT_PP_TXCBLEND_0 21 /* tex0/4 */
+#define R200_EMIT_PP_TXCBLEND_1 22 /* tex1/4 */
+#define R200_EMIT_PP_TXCBLEND_2 23 /* tex2/4 */
+#define R200_EMIT_PP_TXCBLEND_3 24 /* tex3/4 */
+#define R200_EMIT_PP_TXCBLEND_4 25 /* tex4/4 */
+#define R200_EMIT_PP_TXCBLEND_5 26 /* tex5/4 */
+#define R200_EMIT_PP_TXCBLEND_6 27 /* /4 */
+#define R200_EMIT_PP_TXCBLEND_7 28 /* /4 */
+#define R200_EMIT_TCL_LIGHT_MODEL_CTL_0 29 /* tcl/7 */
+#define R200_EMIT_TFACTOR_0 30 /* tf/7 */
+#define R200_EMIT_VTX_FMT_0 31 /* vtx/5 */
+#define R200_EMIT_VAP_CTL 32 /* vap/1 */
+#define R200_EMIT_MATRIX_SELECT_0 33 /* msl/5 */
+#define R200_EMIT_TEX_PROC_CTL_2 34 /* tcg/5 */
+#define R200_EMIT_TCL_UCP_VERT_BLEND_CTL 35 /* tcl/1 */
+#define R200_EMIT_PP_TXFILTER_0 36 /* tex0/6 */
+#define R200_EMIT_PP_TXFILTER_1 37 /* tex1/6 */
+#define R200_EMIT_PP_TXFILTER_2 38 /* tex2/6 */
+#define R200_EMIT_PP_TXFILTER_3 39 /* tex3/6 */
+#define R200_EMIT_PP_TXFILTER_4 40 /* tex4/6 */
+#define R200_EMIT_PP_TXFILTER_5 41 /* tex5/6 */
+#define R200_EMIT_PP_TXOFFSET_0 42 /* tex0/1 */
+#define R200_EMIT_PP_TXOFFSET_1 43 /* tex1/1 */
+#define R200_EMIT_PP_TXOFFSET_2 44 /* tex2/1 */
+#define R200_EMIT_PP_TXOFFSET_3 45 /* tex3/1 */
+#define R200_EMIT_PP_TXOFFSET_4 46 /* tex4/1 */
+#define R200_EMIT_PP_TXOFFSET_5 47 /* tex5/1 */
+#define R200_EMIT_VTE_CNTL 48 /* vte/1 */
+#define R200_EMIT_OUTPUT_VTX_COMP_SEL 49 /* vtx/1 */
+#define R200_EMIT_PP_TAM_DEBUG3 50 /* tam/1 */
+#define R200_EMIT_PP_CNTL_X 51 /* cst/1 */
+#define R200_EMIT_RB3D_DEPTHXY_OFFSET 52 /* cst/1 */
+#define R200_EMIT_RE_AUX_SCISSOR_CNTL 53 /* cst/1 */
+#define R200_EMIT_RE_SCISSOR_TL_0 54 /* cst/2 */
+#define R200_EMIT_RE_SCISSOR_TL_1 55 /* cst/2 */
+#define R200_EMIT_RE_SCISSOR_TL_2 56 /* cst/2 */
+#define R200_EMIT_SE_VAP_CNTL_STATUS 57 /* cst/1 */
+#define R200_EMIT_SE_VTX_STATE_CNTL 58 /* cst/1 */
+#define R200_EMIT_RE_POINTSIZE 59 /* cst/1 */
+#define R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0 60 /* cst/4 */
+#define R200_EMIT_PP_CUBIC_FACES_0 61
+#define R200_EMIT_PP_CUBIC_OFFSETS_0 62
+#define R200_EMIT_PP_CUBIC_FACES_1 63
+#define R200_EMIT_PP_CUBIC_OFFSETS_1 64
+#define R200_EMIT_PP_CUBIC_FACES_2 65
+#define R200_EMIT_PP_CUBIC_OFFSETS_2 66
+#define R200_EMIT_PP_CUBIC_FACES_3 67
+#define R200_EMIT_PP_CUBIC_OFFSETS_3 68
+#define R200_EMIT_PP_CUBIC_FACES_4 69
+#define R200_EMIT_PP_CUBIC_OFFSETS_4 70
+#define R200_EMIT_PP_CUBIC_FACES_5 71
+#define R200_EMIT_PP_CUBIC_OFFSETS_5 72
+#define RADEON_EMIT_PP_TEX_SIZE_0 73
+#define RADEON_EMIT_PP_TEX_SIZE_1 74
+#define RADEON_EMIT_PP_TEX_SIZE_2 75
+#define R200_EMIT_RB3D_BLENDCOLOR 76
+#define R200_EMIT_TCL_POINT_SPRITE_CNTL 77
+#define RADEON_EMIT_PP_CUBIC_FACES_0 78
+#define RADEON_EMIT_PP_CUBIC_OFFSETS_T0 79
+#define RADEON_EMIT_PP_CUBIC_FACES_1 80
+#define RADEON_EMIT_PP_CUBIC_OFFSETS_T1 81
+#define RADEON_EMIT_PP_CUBIC_FACES_2 82
+#define RADEON_EMIT_PP_CUBIC_OFFSETS_T2 83
+#define R200_EMIT_PP_TRI_PERF_CNTL 84
+#define R200_EMIT_PP_AFS_0 85
+#define R200_EMIT_PP_AFS_1 86
+#define R200_EMIT_ATF_TFACTOR 87
+#define R200_EMIT_PP_TXCTLALL_0 88
+#define R200_EMIT_PP_TXCTLALL_1 89
+#define R200_EMIT_PP_TXCTLALL_2 90
+#define R200_EMIT_PP_TXCTLALL_3 91
+#define R200_EMIT_PP_TXCTLALL_4 92
+#define R200_EMIT_PP_TXCTLALL_5 93
+#define R200_EMIT_VAP_PVS_CNTL 94
+#define RADEON_MAX_STATE_PACKETS 95
+
+/* Commands understood by cmd_buffer ioctl. More can be added but
+ * obviously these can't be removed or changed:
+ */
+#define RADEON_CMD_PACKET 1 /* emit one of the register packets above */
+#define RADEON_CMD_SCALARS 2 /* emit scalar data */
+#define RADEON_CMD_VECTORS 3 /* emit vector data */
+#define RADEON_CMD_DMA_DISCARD 4 /* discard current dma buf */
+#define RADEON_CMD_PACKET3 5 /* emit hw packet */
+#define RADEON_CMD_PACKET3_CLIP 6 /* emit hw packet wrapped in cliprects */
+#define RADEON_CMD_SCALARS2 7 /* r200 stopgap */
+#define RADEON_CMD_WAIT 8 /* emit hw wait commands -- note:
+ * doesn't make the cpu wait, just
+ * the graphics hardware */
+#define RADEON_CMD_VECLINEAR 9 /* another r200 stopgap */
+
+typedef union {
+ int i;
+ struct {
+ unsigned char cmd_type, pad0, pad1, pad2;
+ } header;
+ struct {
+ unsigned char cmd_type, packet_id, pad0, pad1;
+ } packet;
+ struct {
+ unsigned char cmd_type, offset, stride, count;
+ } scalars;
+ struct {
+ unsigned char cmd_type, offset, stride, count;
+ } vectors;
+ struct {
+ unsigned char cmd_type, addr_lo, addr_hi, count;
+ } veclinear;
+ struct {
+ unsigned char cmd_type, buf_idx, pad0, pad1;
+ } dma;
+ struct {
+ unsigned char cmd_type, flags, pad0, pad1;
+ } wait;
+} drm_radeon_cmd_header_t;
+
+#define RADEON_WAIT_2D 0x1
+#define RADEON_WAIT_3D 0x2
+
+/* Allowed parameters for R300_CMD_PACKET3
+ */
+#define R300_CMD_PACKET3_CLEAR 0
+#define R300_CMD_PACKET3_RAW 1
+
+/* Commands understood by cmd_buffer ioctl for R300.
+ * The interface has not been stabilized, so some of these may be removed
+ * and eventually reordered before stabilization.
+ */
+#define R300_CMD_PACKET0 1
+#define R300_CMD_VPU 2 /* emit vertex program upload */
+#define R300_CMD_PACKET3 3 /* emit a packet3 */
+#define R300_CMD_END3D 4 /* emit sequence ending 3d rendering */
+#define R300_CMD_CP_DELAY 5
+#define R300_CMD_DMA_DISCARD 6
+#define R300_CMD_WAIT 7
+# define R300_WAIT_2D 0x1
+# define R300_WAIT_3D 0x2
+/* these two defines are DOING IT WRONG - however
+ * we have userspace which relies on using these.
+ * The wait interface is backwards compat new
+ * code should use the NEW_WAIT defines below
+ * THESE ARE NOT BIT FIELDS
+ */
+# define R300_WAIT_2D_CLEAN 0x3
+# define R300_WAIT_3D_CLEAN 0x4
+
+# define R300_NEW_WAIT_2D_3D 0x3
+# define R300_NEW_WAIT_2D_2D_CLEAN 0x4
+# define R300_NEW_WAIT_3D_3D_CLEAN 0x6
+# define R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN 0x8
+
+#define R300_CMD_SCRATCH 8
+#define R300_CMD_R500FP 9
+
+typedef union {
+ unsigned int u;
+ struct {
+ unsigned char cmd_type, pad0, pad1, pad2;
+ } header;
+ struct {
+ unsigned char cmd_type, count, reglo, reghi;
+ } packet0;
+ struct {
+ unsigned char cmd_type, count, adrlo, adrhi;
+ } vpu;
+ struct {
+ unsigned char cmd_type, packet, pad0, pad1;
+ } packet3;
+ struct {
+ unsigned char cmd_type, packet;
+ unsigned short count; /* amount of packet2 to emit */
+ } delay;
+ struct {
+ unsigned char cmd_type, buf_idx, pad0, pad1;
+ } dma;
+ struct {
+ unsigned char cmd_type, flags, pad0, pad1;
+ } wait;
+ struct {
+ unsigned char cmd_type, reg, n_bufs, flags;
+ } scratch;
+ struct {
+ unsigned char cmd_type, count, adrlo, adrhi_flags;
+ } r500fp;
+} drm_r300_cmd_header_t;
+
+#define RADEON_FRONT 0x1
+#define RADEON_BACK 0x2
+#define RADEON_DEPTH 0x4
+#define RADEON_STENCIL 0x8
+#define RADEON_CLEAR_FASTZ 0x80000000
+#define RADEON_USE_HIERZ 0x40000000
+#define RADEON_USE_COMP_ZBUF 0x20000000
+
+#define R500FP_CONSTANT_TYPE (1 << 1)
+#define R500FP_CONSTANT_CLAMP (1 << 2)
+
+/* Primitive types
+ */
+#define RADEON_POINTS 0x1
+#define RADEON_LINES 0x2
+#define RADEON_LINE_STRIP 0x3
+#define RADEON_TRIANGLES 0x4
+#define RADEON_TRIANGLE_FAN 0x5
+#define RADEON_TRIANGLE_STRIP 0x6
+
+/* Vertex/indirect buffer size
+ */
+#define RADEON_BUFFER_SIZE 65536
+
+/* Byte offsets for indirect buffer data
+ */
+#define RADEON_INDEX_PRIM_OFFSET 20
+
+#define RADEON_SCRATCH_REG_OFFSET 32
+#define R600_SCRATCH_REG_OFFSET 256
+
+#define RADEON_NR_SAREA_CLIPRECTS 12
+
+/* There are 2 heaps (local/GART). Each region within a heap is a
+ * minimum of 64k, and there are at most 64 of them per heap.
+ */
+#define RADEON_LOCAL_TEX_HEAP 0
+#define RADEON_GART_TEX_HEAP 1
+#define RADEON_NR_TEX_HEAPS 2
+#define RADEON_NR_TEX_REGIONS 64
+#define RADEON_LOG_TEX_GRANULARITY 16
+
+#define RADEON_MAX_TEXTURE_LEVELS 12
+#define RADEON_MAX_TEXTURE_UNITS 3
+
+#define RADEON_MAX_SURFACES 8
+
+/* Blits have strict offset rules. All blit offset must be aligned on
+ * a 1K-byte boundary.
+ */
+#define RADEON_OFFSET_SHIFT 10
+#define RADEON_OFFSET_ALIGN (1 << RADEON_OFFSET_SHIFT)
+#define RADEON_OFFSET_MASK (RADEON_OFFSET_ALIGN - 1)
+
+#endif /* __RADEON_SAREA_DEFINES__ */
+
+typedef struct {
+ unsigned int red;
+ unsigned int green;
+ unsigned int blue;
+ unsigned int alpha;
+} radeon_color_regs_t;
+
+typedef struct {
+ /* Context state */
+ unsigned int pp_misc; /* 0x1c14 */
+ unsigned int pp_fog_color;
+ unsigned int re_solid_color;
+ unsigned int rb3d_blendcntl;
+ unsigned int rb3d_depthoffset;
+ unsigned int rb3d_depthpitch;
+ unsigned int rb3d_zstencilcntl;
+
+ unsigned int pp_cntl; /* 0x1c38 */
+ unsigned int rb3d_cntl;
+ unsigned int rb3d_coloroffset;
+ unsigned int re_width_height;
+ unsigned int rb3d_colorpitch;
+ unsigned int se_cntl;
+
+ /* Vertex format state */
+ unsigned int se_coord_fmt; /* 0x1c50 */
+
+ /* Line state */
+ unsigned int re_line_pattern; /* 0x1cd0 */
+ unsigned int re_line_state;
+
+ unsigned int se_line_width; /* 0x1db8 */
+
+ /* Bumpmap state */
+ unsigned int pp_lum_matrix; /* 0x1d00 */
+
+ unsigned int pp_rot_matrix_0; /* 0x1d58 */
+ unsigned int pp_rot_matrix_1;
+
+ /* Mask state */
+ unsigned int rb3d_stencilrefmask; /* 0x1d7c */
+ unsigned int rb3d_ropcntl;
+ unsigned int rb3d_planemask;
+
+ /* Viewport state */
+ unsigned int se_vport_xscale; /* 0x1d98 */
+ unsigned int se_vport_xoffset;
+ unsigned int se_vport_yscale;
+ unsigned int se_vport_yoffset;
+ unsigned int se_vport_zscale;
+ unsigned int se_vport_zoffset;
+
+ /* Setup state */
+ unsigned int se_cntl_status; /* 0x2140 */
+
+ /* Misc state */
+ unsigned int re_top_left; /* 0x26c0 */
+ unsigned int re_misc;
+} drm_radeon_context_regs_t;
+
+typedef struct {
+ /* Zbias state */
+ unsigned int se_zbias_factor; /* 0x1dac */
+ unsigned int se_zbias_constant;
+} drm_radeon_context2_regs_t;
+
+/* Setup registers for each texture unit
+ */
+typedef struct {
+ unsigned int pp_txfilter;
+ unsigned int pp_txformat;
+ unsigned int pp_txoffset;
+ unsigned int pp_txcblend;
+ unsigned int pp_txablend;
+ unsigned int pp_tfactor;
+ unsigned int pp_border_color;
+} drm_radeon_texture_regs_t;
+
+typedef struct {
+ unsigned int start;
+ unsigned int finish;
+ unsigned int prim:8;
+ unsigned int stateidx:8;
+ unsigned int numverts:16; /* overloaded as offset/64 for elt prims */
+ unsigned int vc_format; /* vertex format */
+} drm_radeon_prim_t;
+
+typedef struct {
+ drm_radeon_context_regs_t context;
+ drm_radeon_texture_regs_t tex[RADEON_MAX_TEXTURE_UNITS];
+ drm_radeon_context2_regs_t context2;
+ unsigned int dirty;
+} drm_radeon_state_t;
+
+typedef struct {
+ /* The channel for communication of state information to the
+ * kernel on firing a vertex buffer with either of the
+ * obsoleted vertex/index ioctls.
+ */
+ drm_radeon_context_regs_t context_state;
+ drm_radeon_texture_regs_t tex_state[RADEON_MAX_TEXTURE_UNITS];
+ unsigned int dirty;
+ unsigned int vertsize;
+ unsigned int vc_format;
+
+ /* The current cliprects, or a subset thereof.
+ */
+ struct drm_clip_rect boxes[RADEON_NR_SAREA_CLIPRECTS];
+ unsigned int nbox;
+
+ /* Counters for client-side throttling of rendering clients.
+ */
+ unsigned int last_frame;
+ unsigned int last_dispatch;
+ unsigned int last_clear;
+
+ struct drm_tex_region tex_list[RADEON_NR_TEX_HEAPS][RADEON_NR_TEX_REGIONS +
+ 1];
+ unsigned int tex_age[RADEON_NR_TEX_HEAPS];
+ int ctx_owner;
+ int pfState; /* number of 3d windows (0,1,2ormore) */
+ int pfCurrentPage; /* which buffer is being displayed? */
+ int crtc2_base; /* CRTC2 frame offset */
+ int tiling_enabled; /* set by drm, read by 2d + 3d clients */
+} drm_radeon_sarea_t;
+
+/* WARNING: If you change any of these defines, make sure to change the
+ * defines in the Xserver file (xf86drmRadeon.h)
+ *
+ * KW: actually it's illegal to change any of this (backwards compatibility).
+ */
+
+/* Radeon specific ioctls
+ * The device specific ioctl range is 0x40 to 0x79.
+ */
+#define DRM_RADEON_CP_INIT 0x00
+#define DRM_RADEON_CP_START 0x01
+#define DRM_RADEON_CP_STOP 0x02
+#define DRM_RADEON_CP_RESET 0x03
+#define DRM_RADEON_CP_IDLE 0x04
+#define DRM_RADEON_RESET 0x05
+#define DRM_RADEON_FULLSCREEN 0x06
+#define DRM_RADEON_SWAP 0x07
+#define DRM_RADEON_CLEAR 0x08
+#define DRM_RADEON_VERTEX 0x09
+#define DRM_RADEON_INDICES 0x0A
+#define DRM_RADEON_NOT_USED
+#define DRM_RADEON_STIPPLE 0x0C
+#define DRM_RADEON_INDIRECT 0x0D
+#define DRM_RADEON_TEXTURE 0x0E
+#define DRM_RADEON_VERTEX2 0x0F
+#define DRM_RADEON_CMDBUF 0x10
+#define DRM_RADEON_GETPARAM 0x11
+#define DRM_RADEON_FLIP 0x12
+#define DRM_RADEON_ALLOC 0x13
+#define DRM_RADEON_FREE 0x14
+#define DRM_RADEON_INIT_HEAP 0x15
+#define DRM_RADEON_IRQ_EMIT 0x16
+#define DRM_RADEON_IRQ_WAIT 0x17
+#define DRM_RADEON_CP_RESUME 0x18
+#define DRM_RADEON_SETPARAM 0x19
+#define DRM_RADEON_SURF_ALLOC 0x1a
+#define DRM_RADEON_SURF_FREE 0x1b
+
+#define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t)
+#define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START)
+#define DRM_IOCTL_RADEON_CP_STOP DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_STOP, drm_radeon_cp_stop_t)
+#define DRM_IOCTL_RADEON_CP_RESET DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_RESET)
+#define DRM_IOCTL_RADEON_CP_IDLE DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_IDLE)
+#define DRM_IOCTL_RADEON_RESET DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_RESET)
+#define DRM_IOCTL_RADEON_FULLSCREEN DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_FULLSCREEN, drm_radeon_fullscreen_t)
+#define DRM_IOCTL_RADEON_SWAP DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_SWAP)
+#define DRM_IOCTL_RADEON_CLEAR DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CLEAR, drm_radeon_clear_t)
+#define DRM_IOCTL_RADEON_VERTEX DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_VERTEX, drm_radeon_vertex_t)
+#define DRM_IOCTL_RADEON_INDICES DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_INDICES, drm_radeon_indices_t)
+#define DRM_IOCTL_RADEON_STIPPLE DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_STIPPLE, drm_radeon_stipple_t)
+#define DRM_IOCTL_RADEON_INDIRECT DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INDIRECT, drm_radeon_indirect_t)
+#define DRM_IOCTL_RADEON_TEXTURE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_TEXTURE, drm_radeon_texture_t)
+#define DRM_IOCTL_RADEON_VERTEX2 DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_VERTEX2, drm_radeon_vertex2_t)
+#define DRM_IOCTL_RADEON_CMDBUF DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CMDBUF, drm_radeon_cmd_buffer_t)
+#define DRM_IOCTL_RADEON_GETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GETPARAM, drm_radeon_getparam_t)
+#define DRM_IOCTL_RADEON_FLIP DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_FLIP)
+#define DRM_IOCTL_RADEON_ALLOC DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_ALLOC, drm_radeon_mem_alloc_t)
+#define DRM_IOCTL_RADEON_FREE DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_FREE, drm_radeon_mem_free_t)
+#define DRM_IOCTL_RADEON_INIT_HEAP DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_INIT_HEAP, drm_radeon_mem_init_heap_t)
+#define DRM_IOCTL_RADEON_IRQ_EMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_IRQ_EMIT, drm_radeon_irq_emit_t)
+#define DRM_IOCTL_RADEON_IRQ_WAIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_IRQ_WAIT, drm_radeon_irq_wait_t)
+#define DRM_IOCTL_RADEON_CP_RESUME DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_RESUME)
+#define DRM_IOCTL_RADEON_SETPARAM DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SETPARAM, drm_radeon_setparam_t)
+#define DRM_IOCTL_RADEON_SURF_ALLOC DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SURF_ALLOC, drm_radeon_surface_alloc_t)
+#define DRM_IOCTL_RADEON_SURF_FREE DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SURF_FREE, drm_radeon_surface_free_t)
+
+typedef struct drm_radeon_init {
+ enum {
+ RADEON_INIT_CP = 0x01,
+ RADEON_CLEANUP_CP = 0x02,
+ RADEON_INIT_R200_CP = 0x03,
+ RADEON_INIT_R300_CP = 0x04,
+ RADEON_INIT_R600_CP = 0x05,
+ } func;
+ unsigned long sarea_priv_offset;
+ int is_pci; /* for overriding only */
+ int cp_mode;
+ int gart_size;
+ int ring_size;
+ int usec_timeout;
+
+ unsigned int fb_bpp;
+ unsigned int front_offset, front_pitch;
+ unsigned int back_offset, back_pitch;
+ unsigned int depth_bpp;
+ unsigned int depth_offset, depth_pitch;
+
+ unsigned long fb_offset DEPRECATED; /* deprecated, driver asks hardware */
+ unsigned long mmio_offset DEPRECATED; /* deprecated, driver asks hardware */
+ unsigned long ring_offset;
+ unsigned long ring_rptr_offset;
+ unsigned long buffers_offset;
+ unsigned long gart_textures_offset;
+} drm_radeon_init_t;
+
+typedef struct drm_radeon_cp_stop {
+ int flush;
+ int idle;
+} drm_radeon_cp_stop_t;
+
+typedef struct drm_radeon_fullscreen {
+ enum {
+ RADEON_INIT_FULLSCREEN = 0x01,
+ RADEON_CLEANUP_FULLSCREEN = 0x02
+ } func;
+} drm_radeon_fullscreen_t;
+
+#define CLEAR_X1 0
+#define CLEAR_Y1 1
+#define CLEAR_X2 2
+#define CLEAR_Y2 3
+#define CLEAR_DEPTH 4
+
+typedef union drm_radeon_clear_rect {
+ float f[5];
+ unsigned int ui[5];
+} drm_radeon_clear_rect_t;
+
+typedef struct drm_radeon_clear {
+ unsigned int flags;
+ unsigned int clear_color;
+ unsigned int clear_depth;
+ unsigned int color_mask;
+ unsigned int depth_mask; /* misnamed field: should be stencil */
+ drm_radeon_clear_rect_t __user *depth_boxes;
+} drm_radeon_clear_t;
+
+typedef struct drm_radeon_vertex {
+ int prim;
+ int idx; /* Index of vertex buffer */
+ int count; /* Number of vertices in buffer */
+ int discard; /* Client finished with buffer? */
+} drm_radeon_vertex_t;
+
+typedef struct drm_radeon_indices {
+ int prim;
+ int idx;
+ int start;
+ int end;
+ int discard; /* Client finished with buffer? */
+} drm_radeon_indices_t;
+
+/* v1.2 - obsoletes drm_radeon_vertex and drm_radeon_indices
+ * - allows multiple primitives and state changes in a single ioctl
+ * - supports driver change to emit native primitives
+ */
+typedef struct drm_radeon_vertex2 {
+ int idx; /* Index of vertex buffer */
+ int discard; /* Client finished with buffer? */
+ int nr_states;
+ drm_radeon_state_t __user *state;
+ int nr_prims;
+ drm_radeon_prim_t __user *prim;
+} drm_radeon_vertex2_t;
+
+/* v1.3 - obsoletes drm_radeon_vertex2
+ * - allows arbitarily large cliprect list
+ * - allows updating of tcl packet, vector and scalar state
+ * - allows memory-efficient description of state updates
+ * - allows state to be emitted without a primitive
+ * (for clears, ctx switches)
+ * - allows more than one dma buffer to be referenced per ioctl
+ * - supports tcl driver
+ * - may be extended in future versions with new cmd types, packets
+ */
+typedef struct drm_radeon_cmd_buffer {
+ int bufsz;
+ char __user *buf;
+ int nbox;
+ struct drm_clip_rect __user *boxes;
+} drm_radeon_cmd_buffer_t;
+
+typedef struct drm_radeon_tex_image {
+ unsigned int x, y; /* Blit coordinates */
+ unsigned int width, height;
+ const void __user *data;
+} drm_radeon_tex_image_t;
+
+typedef struct drm_radeon_texture {
+ unsigned int offset;
+ int pitch;
+ int format;
+ int width; /* Texture image coordinates */
+ int height;
+ drm_radeon_tex_image_t __user *image;
+} drm_radeon_texture_t;
+
+typedef struct drm_radeon_stipple {
+ unsigned int __user *mask;
+} drm_radeon_stipple_t;
+
+typedef struct drm_radeon_indirect {
+ int idx;
+ int start;
+ int end;
+ int discard;
+} drm_radeon_indirect_t;
+
+#define RADEON_INDIRECT_DISCARD (1 << 0)
+#define RADEON_INDIRECT_NOFLUSH (1 << 1)
+
+/* enum for card type parameters */
+#define RADEON_CARD_PCI 0
+#define RADEON_CARD_AGP 1
+#define RADEON_CARD_PCIE 2
+
+/* 1.3: An ioctl to get parameters that aren't available to the 3d
+ * client any other way.
+ */
+#define RADEON_PARAM_GART_BUFFER_OFFSET 1 /* card offset of 1st GART buffer */
+#define RADEON_PARAM_LAST_FRAME 2
+#define RADEON_PARAM_LAST_DISPATCH 3
+#define RADEON_PARAM_LAST_CLEAR 4
+/* Added with DRM version 1.6. */
+#define RADEON_PARAM_IRQ_NR 5
+#define RADEON_PARAM_GART_BASE 6 /* card offset of GART base */
+/* Added with DRM version 1.8. */
+#define RADEON_PARAM_REGISTER_HANDLE 7 /* for drmMap() */
+#define RADEON_PARAM_STATUS_HANDLE 8
+#define RADEON_PARAM_SAREA_HANDLE 9
+#define RADEON_PARAM_GART_TEX_HANDLE 10
+#define RADEON_PARAM_SCRATCH_OFFSET 11
+#define RADEON_PARAM_CARD_TYPE 12
+#define RADEON_PARAM_VBLANK_CRTC 13 /* VBLANK CRTC */
+#define RADEON_PARAM_FB_LOCATION 14 /* FB location */
+#define RADEON_PARAM_NUM_GB_PIPES 15 /* num GB pipes */
+
+typedef struct drm_radeon_getparam {
+ int param;
+ void __user *value;
+} drm_radeon_getparam_t;
+
+/* 1.6: Set up a memory manager for regions of shared memory:
+ */
+#define RADEON_MEM_REGION_GART 1
+#define RADEON_MEM_REGION_FB 2
+
+typedef struct drm_radeon_mem_alloc {
+ int region;
+ int alignment;
+ int size;
+ int __user *region_offset; /* offset from start of fb or GART */
+} drm_radeon_mem_alloc_t;
+
+typedef struct drm_radeon_mem_free {
+ int region;
+ int region_offset;
+} drm_radeon_mem_free_t;
+
+typedef struct drm_radeon_mem_init_heap {
+ int region;
+ int size;
+ int start;
+} drm_radeon_mem_init_heap_t;
+
+/* 1.6: Userspace can request & wait on irq's:
+ */
+typedef struct drm_radeon_irq_emit {
+ int __user *irq_seq;
+} drm_radeon_irq_emit_t;
+
+typedef struct drm_radeon_irq_wait {
+ int irq_seq;
+} drm_radeon_irq_wait_t;
+
+/* 1.10: Clients tell the DRM where they think the framebuffer is located in
+ * the card's address space, via a new generic ioctl to set parameters
+ */
+
+typedef struct drm_radeon_setparam {
+ unsigned int param;
+ int64_t value;
+} drm_radeon_setparam_t;
+
+#define RADEON_SETPARAM_FB_LOCATION 1 /* determined framebuffer location */
+#define RADEON_SETPARAM_SWITCH_TILING 2 /* enable/disable color tiling */
+#define RADEON_SETPARAM_PCIGART_LOCATION 3 /* PCI Gart Location */
+
+#define RADEON_SETPARAM_NEW_MEMMAP 4 /* Use new memory map */
+#define RADEON_SETPARAM_PCIGART_TABLE_SIZE 5 /* PCI GART Table Size */
+#define RADEON_SETPARAM_VBLANK_CRTC 6 /* VBLANK CRTC */
+/* 1.14: Clients can allocate/free a surface
+ */
+typedef struct drm_radeon_surface_alloc {
+ unsigned int address;
+ unsigned int size;
+ unsigned int flags;
+} drm_radeon_surface_alloc_t;
+
+typedef struct drm_radeon_surface_free {
+ unsigned int address;
+} drm_radeon_surface_free_t;
+
+#define DRM_RADEON_VBLANK_CRTC1 1
+#define DRM_RADEON_VBLANK_CRTC2 2
+
+#endif